@@ -420,6 +420,9 @@ def get_model_part_names(dir_model: Path, prefix: str, suffix: str) -> list[str]
420
420
for filename in os .listdir (dir_model ):
421
421
if filename .startswith (prefix ) and filename .endswith (suffix ):
422
422
part_names .append (filename )
423
+ # TODO remove later
424
+ elif filename .endswith (suffix ):
425
+ part_names .append (filename )
423
426
424
427
part_names .sort ()
425
428
@@ -607,13 +610,14 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
607
610
toktypes : list [int ] = []
608
611
609
612
from transformers import AutoTokenizer
610
- tokenizer = AutoTokenizer .from_pretrained (self .dir_model )
611
- vocab_size = self .hparams .get ("vocab_size" , len (tokenizer .vocab ))
612
- assert max (tokenizer .vocab .values ()) < vocab_size
613
+ tokenizer = AutoTokenizer .from_pretrained (self .dir_model , trust_remote_code = True )
614
+ vocab = getattr (tokenizer , 'vocab' , tokenizer .get_vocab ())
615
+ vocab_size = self .hparams .get ("vocab_size" , len (vocab ))
616
+ assert max (vocab .values ()) < vocab_size
613
617
614
618
tokpre = self .get_vocab_base_pre (tokenizer )
615
619
616
- reverse_vocab = {id_ : encoded_tok for encoded_tok , id_ in tokenizer . vocab .items ()}
620
+ reverse_vocab = {id_ : encoded_tok for encoded_tok , id_ in vocab .items ()}
617
621
added_vocab = tokenizer .get_added_vocab ()
618
622
619
623
added_tokens_decoder = tokenizer .added_tokens_decoder
@@ -1218,8 +1222,12 @@ def __init__(self, *args, **kwargs):
1218
1222
self .tensor_map = gguf .get_tensor_name_map (gguf .MODEL_ARCH .MMPROJ , self .block_count )
1219
1223
1220
1224
# load preprocessor config
1221
- with open (self .dir_model / "preprocessor_config.json" , "r" , encoding = "utf-8" ) as f :
1222
- self .preprocessor_config = json .load (f )
1225
+ preprocess_config_file = self .dir_model / "preprocessor_config.json"
1226
+ if preprocess_config_file .exists ():
1227
+ with open (preprocess_config_file , "r" , encoding = "utf-8" ) as f :
1228
+ self .preprocessor_config = json .load (f )
1229
+ else :
1230
+ self .preprocessor_config = dict (image_mean = [0.485 , 0.456 , 0.406 ], image_std = [0.229 , 0.224 , 0.225 ])
1223
1231
1224
1232
def get_vision_config (self ) -> dict [str , Any ] | None :
1225
1233
return self .global_config .get ("vision_config" )
@@ -3115,6 +3123,10 @@ def set_gguf_parameters(self):
3115
3123
3116
3124
def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
3117
3125
# process the experts separately
3126
+ name = name .replace ("language_model." , "" ) # InternVL
3127
+ if name .startswith ("mlp" ) or name .startswith ("vision_model" ):
3128
+ # skip visual tensors
3129
+ return []
3118
3130
if name .find ("experts" ) != - 1 :
3119
3131
n_experts = self .hparams ["num_experts" ]
3120
3132
assert bid is not None
0 commit comments