@@ -1054,11 +1054,13 @@ class TensorNameMap:
1054
1054
1055
1055
MODEL_TENSOR .V_ENC_EMBD_CLS : (
1056
1056
"vision_tower.vision_model.embeddings.class_embedding" ,
1057
+ "model.vision_tower.embeddings.cls_token" , # Intern-S1
1057
1058
"vision_model.class_embedding" , # llama 4
1058
1059
),
1059
1060
1060
1061
MODEL_TENSOR .V_ENC_EMBD_PATCH : (
1061
1062
"vision_tower.vision_model.embeddings.patch_embedding" ,
1063
+ "model.vision_tower.embeddings.patch_embeddings.projection" , # Intern-S1
1062
1064
"vpm.embeddings.patch_embedding" ,
1063
1065
"model.vision_model.embeddings.patch_embedding" , # SmolVLM
1064
1066
"vision_tower.patch_conv" , # pixtral
@@ -1068,13 +1070,15 @@ class TensorNameMap:
1068
1070
1069
1071
MODEL_TENSOR .V_ENC_EMBD_POS : (
1070
1072
"vision_tower.vision_model.embeddings.position_embedding" ,
1073
+ "model.vision_tower.embeddings.position_embeddings" , # Intern-S1
1071
1074
"vpm.embeddings.position_embedding" ,
1072
1075
"model.vision_model.embeddings.position_embedding" , # SmolVLM
1073
1076
"vision_model.positional_embedding_vlm" , # llama 4
1074
1077
),
1075
1078
1076
1079
MODEL_TENSOR .V_ENC_ATTN_Q : (
1077
1080
"vision_tower.vision_model.encoder.layers.{bid}.self_attn.q_proj" ,
1081
+ "model.vision_tower.encoder.layer.{bid}.attention.q_proj" , # Intern-S1
1078
1082
"vpm.encoder.layers.{bid}.self_attn.q_proj" ,
1079
1083
"model.vision_model.encoder.layers.{bid}.self_attn.q_proj" , # SmolVLM
1080
1084
"vision_model.model.layers.{bid}.self_attn.q_proj" , # llama4
@@ -1084,10 +1088,12 @@ class TensorNameMap:
1084
1088
1085
1089
MODEL_TENSOR .V_ENC_ATTN_Q_NORM : (
1086
1090
"vision_tower.vision_model.encoder.layers.{bid}.attn.q_norm" , # InternVL
1091
+ "model.vision_tower.encoder.layer.{bid}.attention.q_norm" , # Intern-S1
1087
1092
),
1088
1093
1089
1094
MODEL_TENSOR .V_ENC_ATTN_K : (
1090
1095
"vision_tower.vision_model.encoder.layers.{bid}.self_attn.k_proj" ,
1096
+ "model.vision_tower.encoder.layer.{bid}.attention.k_proj" , # Intern-S1
1091
1097
"vpm.encoder.layers.{bid}.self_attn.k_proj" ,
1092
1098
"model.vision_model.encoder.layers.{bid}.self_attn.k_proj" , # SmolVLM
1093
1099
"vision_model.model.layers.{bid}.self_attn.k_proj" , # llama4
@@ -1097,10 +1103,12 @@ class TensorNameMap:
1097
1103
1098
1104
MODEL_TENSOR .V_ENC_ATTN_K_NORM : (
1099
1105
"vision_tower.vision_model.encoder.layers.{bid}.attn.k_norm" , # InternVL
1106
+ "model.vision_tower.encoder.layer.{bid}.attention.k_norm" , # Intern-S1
1100
1107
),
1101
1108
1102
1109
MODEL_TENSOR .V_ENC_ATTN_V : (
1103
1110
"vision_tower.vision_model.encoder.layers.{bid}.self_attn.v_proj" ,
1111
+ "model.vision_tower.encoder.layer.{bid}.attention.v_proj" , # Intern-S1
1104
1112
"vpm.encoder.layers.{bid}.self_attn.v_proj" ,
1105
1113
"model.vision_model.encoder.layers.{bid}.self_attn.v_proj" , # SmolVLM
1106
1114
"vision_model.model.layers.{bid}.self_attn.v_proj" , # llama4
@@ -1111,6 +1119,7 @@ class TensorNameMap:
1111
1119
MODEL_TENSOR .V_ENC_INPUT_NORM : (
1112
1120
"vision_tower.vision_model.encoder.layers.{bid}.layer_norm1" ,
1113
1121
"vision_tower.vision_model.encoder.layers.{bid}.norm1" , # InternVL
1122
+ "model.vision_tower.encoder.layer.{bid}.layernorm_before" , # Intern-S1
1114
1123
"vpm.encoder.layers.{bid}.layer_norm1" ,
1115
1124
"model.vision_model.encoder.layers.{bid}.layer_norm1" , # SmolVLM
1116
1125
"vision_tower.transformer.layers.{bid}.attention_norm" , # pixtral
@@ -1121,6 +1130,7 @@ class TensorNameMap:
1121
1130
MODEL_TENSOR .V_ENC_ATTN_O : (
1122
1131
"vision_tower.vision_model.encoder.layers.{bid}.self_attn.out_proj" ,
1123
1132
"vision_tower.vision_model.encoder.layers.{bid}.attn.proj" , # InternVL
1133
+ "model.vision_tower.encoder.layer.{bid}.attention.projection_layer" , # Intern-S1
1124
1134
"vpm.encoder.layers.{bid}.self_attn.out_proj" ,
1125
1135
"model.vision_model.encoder.layers.{bid}.self_attn.out_proj" , # SmolVLM
1126
1136
"vision_model.model.layers.{bid}.self_attn.o_proj" , # llama4
@@ -1131,6 +1141,7 @@ class TensorNameMap:
1131
1141
MODEL_TENSOR .V_ENC_POST_ATTN_NORM : (
1132
1142
"vision_tower.vision_model.encoder.layers.{bid}.layer_norm2" ,
1133
1143
"vision_tower.vision_model.encoder.layers.{bid}.norm2" , # InternVL
1144
+ "model.vision_tower.encoder.layer.{bid}.layernorm_after" , # Intern-S1
1134
1145
"vpm.encoder.layers.{bid}.layer_norm2" ,
1135
1146
"model.vision_model.encoder.layers.{bid}.layer_norm2" , # SmolVLM
1136
1147
"vision_model.model.layers.{bid}.post_attention_layernorm" , # llama4
@@ -1140,6 +1151,7 @@ class TensorNameMap:
1140
1151
1141
1152
MODEL_TENSOR .V_ENC_FFN_UP : (
1142
1153
"vision_tower.vision_model.encoder.layers.{bid}.mlp.fc1" ,
1154
+ "model.vision_tower.encoder.layer.{bid}.mlp.fc1" , # Intern-S1
1143
1155
"vpm.encoder.layers.{bid}.mlp.fc1" ,
1144
1156
"model.vision_model.encoder.layers.{bid}.mlp.fc1" , # SmolVLM, gemma3
1145
1157
"vision_tower.transformer.layers.{bid}.feed_forward.up_proj" , # pixtral
@@ -1155,6 +1167,7 @@ class TensorNameMap:
1155
1167
1156
1168
MODEL_TENSOR .V_ENC_FFN_DOWN : (
1157
1169
"vision_tower.vision_model.encoder.layers.{bid}.mlp.fc2" ,
1170
+ "model.vision_tower.encoder.layer.{bid}.mlp.fc2" , # Intern-S1
1158
1171
"vpm.encoder.layers.{bid}.mlp.fc2" ,
1159
1172
"model.vision_model.encoder.layers.{bid}.mlp.fc2" , # SmolVLM, gemma3
1160
1173
"vision_tower.transformer.layers.{bid}.feed_forward.down_proj" , # pixtral
@@ -1165,10 +1178,12 @@ class TensorNameMap:
1165
1178
1166
1179
MODEL_TENSOR .V_LAYER_SCALE_1 : (
1167
1180
"vision_tower.vision_model.encoder.layers.{bid}.ls1" , # InternVL
1181
+ "model.vision_tower.encoder.layer.{bid}.lambda_1" , # Intern-S1
1168
1182
),
1169
1183
1170
1184
MODEL_TENSOR .V_LAYER_SCALE_2 : (
1171
1185
"vision_tower.vision_model.encoder.layers.{bid}.ls2" , # InternVL
1186
+ "model.vision_tower.encoder.layer.{bid}.lambda_2" , # Intern-S1
1172
1187
),
1173
1188
1174
1189
MODEL_TENSOR .V_PRE_NORM : (
@@ -1190,6 +1205,7 @@ class TensorNameMap:
1190
1205
1191
1206
MODEL_TENSOR .V_MM_INP_NORM : (
1192
1207
"multi_modal_projector.norm" ,
1208
+ "model.multi_modal_projector.layer_norm" , # Intern-S1
1193
1209
),
1194
1210
1195
1211
MODEL_TENSOR .V_MM_SOFT_EMB_NORM : (
0 commit comments