1
- import fs from 'fs'
1
+ import fs from 'fs/promises'
2
+ import { constants } from 'fs'
2
3
import path from 'path'
3
4
import { fileURLToPath } from 'url'
4
5
import * as ort from 'onnxruntime-web'
@@ -10,18 +11,14 @@ const MODEL_DIR = path.resolve(__dirname, '..', 'models')
10
11
11
12
const FILES = [ 'onnx/model.onnx' , 'tokenizer.json' , 'tokenizer_config.json' ]
12
13
13
- function saveFile ( buffer , outputPath ) {
14
- return new Promise ( ( resolve , reject ) => {
15
- fs . writeFile ( outputPath , Buffer . from ( buffer ) , err => {
16
- if ( err ) reject ( err )
17
- else resolve ( )
18
- } )
19
- } )
14
+ async function saveFile ( buffer , outputPath ) {
15
+ await fs . writeFile ( outputPath , Buffer . from ( buffer ) )
20
16
}
21
17
22
- function fileExists ( filePath ) {
18
+ async function fileExists ( filePath ) {
23
19
try {
24
- return fs . existsSync ( filePath )
20
+ await fs . access ( filePath , constants . F_OK )
21
+ return true
25
22
} catch {
26
23
return false
27
24
}
@@ -44,13 +41,15 @@ async function downloadFile(url, outputPath) {
44
41
}
45
42
46
43
async function downloadModelIfNeeded ( ) {
47
- if ( ! fs . existsSync ( MODEL_DIR ) ) {
48
- fs . mkdirSync ( MODEL_DIR , { recursive : true } )
44
+ try {
45
+ await fs . access ( MODEL_DIR )
46
+ } catch {
47
+ await fs . mkdir ( MODEL_DIR , { recursive : true } )
49
48
}
50
49
51
50
for ( const file of FILES ) {
52
51
const filePath = path . join ( MODEL_DIR , path . basename ( file ) )
53
- if ( ! fileExists ( filePath ) ) {
52
+ if ( ! ( await fileExists ( filePath ) ) ) {
54
53
const url = `https://huggingface.co/${ MODEL_NAME } /resolve/main/${ file } `
55
54
await downloadFile ( url , filePath )
56
55
}
@@ -65,12 +64,8 @@ async function forceRedownloadModel() {
65
64
// Delete all model files to force re-download
66
65
for ( const file of FILES ) {
67
66
const filePath = path . join ( MODEL_DIR , path . basename ( file ) )
68
- try {
69
- if ( fileExists ( filePath ) ) {
70
- fs . unlinkSync ( filePath )
71
- }
72
- } catch {
73
- // Ignore deletion errors, we'll overwrite anyway
67
+ if ( await fileExists ( filePath ) ) {
68
+ await fs . unlink ( filePath ) . catch ( ( ) => { } )
74
69
}
75
70
}
76
71
@@ -84,11 +79,11 @@ async function initializeModelAndVocab() {
84
79
85
80
const loadModelAndVocab = async ( ) => {
86
81
// Load model as buffer for onnxruntime-web
87
- const modelBuffer = fs . readFileSync ( modelPath )
82
+ const modelBuffer = await fs . readFile ( modelPath )
88
83
session = await ort . InferenceSession . create ( modelBuffer )
89
84
90
85
// Try to parse tokenizer JSON
91
- const tokenizerJson = JSON . parse ( fs . readFileSync ( vocabPath , 'utf-8' ) )
86
+ const tokenizerJson = JSON . parse ( await fs . readFile ( vocabPath , 'utf-8' ) )
92
87
93
88
// Validate tokenizer structure
94
89
if ( ! tokenizerJson . model || ! tokenizerJson . model . vocab ) {
@@ -112,7 +107,6 @@ async function initializeModelAndVocab() {
112
107
} catch ( error ) {
113
108
// Model or tokenizer is corrupted, force re-download
114
109
// eslint-disable-next-line no-console
115
- console . warn ( 'Model corruption detected, re-downloading...' , error . message )
116
110
await forceRedownloadModel ( )
117
111
118
112
// Retry initialization after re-download
@@ -347,15 +341,13 @@ async function processChunkedEmbeddings(chunks, session) {
347
341
const validIds = ids . filter ( id => {
348
342
const isValid = typeof id === 'number' && ! isNaN ( id ) && isFinite ( id )
349
343
if ( ! isValid ) {
350
- // eslint-disable-next-line no-console
351
- console . warn ( `Invalid token ID detected: ${ id } (type: ${ typeof id } )` )
344
+ throw new Error ( `Invalid token ID detected: ${ id } (type: ${ typeof id } )` )
352
345
}
353
346
return isValid
354
347
} )
355
348
356
349
if ( validIds . length !== ids . length ) {
357
- // eslint-disable-next-line no-console
358
- console . warn ( `Filtered out ${ ids . length - validIds . length } invalid token IDs` )
350
+ throw new Error ( `Found ${ ids . length - validIds . length } invalid token IDs` )
359
351
}
360
352
361
353
const inputIds = new BigInt64Array ( validIds . map ( i => BigInt ( i ) ) )
0 commit comments