@@ -607,7 +607,7 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
607
607
toktypes : list [int ] = []
608
608
609
609
from transformers import AutoTokenizer
610
- tokenizer = AutoTokenizer .from_pretrained (self .dir_model , trust_remote_code = True )
610
+ tokenizer = AutoTokenizer .from_pretrained (self .dir_model )
611
611
vocab = getattr (tokenizer , 'vocab' , tokenizer .get_vocab ())
612
612
vocab_size = self .hparams .get ("vocab_size" , len (vocab ))
613
613
assert max (vocab .values ()) < vocab_size
@@ -1219,12 +1219,8 @@ def __init__(self, *args, **kwargs):
1219
1219
self .tensor_map = gguf .get_tensor_name_map (gguf .MODEL_ARCH .MMPROJ , self .block_count )
1220
1220
1221
1221
# load preprocessor config
1222
- preprocess_config_file = self .dir_model / "preprocessor_config.json"
1223
- if preprocess_config_file .exists ():
1224
- with open (preprocess_config_file , "r" , encoding = "utf-8" ) as f :
1225
- self .preprocessor_config = json .load (f )
1226
- else :
1227
- self .preprocessor_config = dict (image_mean = [0.485 , 0.456 , 0.406 ], image_std = [0.229 , 0.224 , 0.225 ])
1222
+ with open (self .dir_model / "preprocessor_config.json" , "r" , encoding = "utf-8" ) as f :
1223
+ self .preprocessor_config = json .load (f )
1228
1224
1229
1225
def get_vision_config (self ) -> dict [str , Any ] | None :
1230
1226
return self .global_config .get ("vision_config" )
@@ -3160,7 +3156,7 @@ def set_gguf_parameters(self):
3160
3156
3161
3157
def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
3162
3158
# process the experts separately
3163
- name = name .replace (r "language_model." , r "" ) # InternVL
3159
+ name = name .replace ("language_model." , "" ) # InternVL
3164
3160
if name .startswith ("mlp" ) or name .startswith ("vision_model" ) or name .startswith ("model.vision_tower" ) or name .startswith ("model.multi_modal_projector" ):
3165
3161
# skip visual tensors
3166
3162
return []
@@ -3217,9 +3213,14 @@ class Qwen3Model(Qwen2Model):
3217
3213
class Qwen3MoeModel (Qwen2MoeModel ):
3218
3214
model_arch = gguf .MODEL_ARCH .QWEN3MOE
3219
3215
3216
+ def __init__ (self , * args , ** kwargs ):
3217
+ super ().__init__ (* args , ** kwargs )
3218
+ hparams = ModelBase .load_hparams (self .dir_model )
3219
+ self .origin_hf_arch = hparams .get ('architectures' , [None ])[0 ]
3220
+
3220
3221
def set_vocab (self ):
3221
- # deal with interns1
3222
- if 'interns1' in f' { self . dir_model } ' . lower () :
3222
+ # deal with intern-s1
3223
+ if self . origin_hf_arch == 'InternS1ForConditionalGeneration' :
3223
3224
self ._set_vocab_interns1 ()
3224
3225
return
3225
3226
@@ -3240,19 +3241,20 @@ def _set_vocab_interns1(self):
3240
3241
additional_special_tokens = []
3241
3242
if special_tokens_map_file .is_file ():
3242
3243
with open (special_tokens_map_file , encoding = 'utf-8' ) as f :
3243
- additional_special_tokens = json .load (f ).get ('additional_special_tokens' , [])
3244
+ additional_special_tokens = json .load (f ).get ('additional_special_tokens' , [])
3244
3245
tokenizer_cfg_file = self .dir_model / 'special_tokens_map.json'
3245
3246
if tokenizer_cfg_file .is_file ():
3246
3247
with open (tokenizer_cfg_file , encoding = 'utf-8' ) as f :
3247
- added_tokens_decoder = json .load (f ).get ('added_tokens_decoder' , {})
3248
- token2ids_map = {data ['content' ] : int (token ) for token , data in added_tokens_decoder .items () if data ['special' ]}
3249
- for token in additional_special_tokens :
3250
- if token in token2ids_map :
3251
- special_vocab ._set_special_token (token , token2ids_map [token ])
3248
+ added_tokens_decoder = json .load (f ).get ('added_tokens_decoder' , {})
3249
+ token2ids_map = {data ['content' ] : int (token ) for token , data in added_tokens_decoder .items () if data ['special' ]}
3250
+ for token in additional_special_tokens :
3251
+ if token in token2ids_map :
3252
+ special_vocab ._set_special_token (token , token2ids_map [token ])
3252
3253
special_vocab ._set_special_token ('eos' , 151645 )
3253
3254
special_vocab ._set_special_token ("bos" , 151643 )
3254
3255
special_vocab .add_to_gguf (self .gguf_writer )
3255
3256
3257
+
3256
3258
@ModelBase .register ("GPT2LMHeadModel" )
3257
3259
class GPT2Model (TextModel ):
3258
3260
model_arch = gguf .MODEL_ARCH .GPT2
0 commit comments