Skip to content

Commit 9d6ea41

Browse files
committed
feat: support GLM 4.5 family of models
1 parent 2927b5e commit 9d6ea41

File tree

1 file changed

+41
-13
lines changed

1 file changed

+41
-13
lines changed

convert_hf_to_gguf.py

Lines changed: 41 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6647,12 +6647,6 @@ def set_gguf_parameters(self):
66476647
def modify_tensors(
66486648
self, data_torch: Tensor, name: str, bid: int | None
66496649
) -> Iterable[tuple[str, Tensor]]:
6650-
# Handle layer 46 tensors - preserve all for future MTP support
6651-
if bid is not None and bid == 46:
6652-
# Convert layer 46 tensors to GGUF naming but don't try to map them
6653-
new_name = name.replace("model.layers.", "blk.")
6654-
return [(new_name, data_torch)]
6655-
66566650
if name.startswith("model.visual."): # ignore visual part
66576651
return []
66586652
elif name.startswith("model.language_model."):
@@ -6662,14 +6656,18 @@ def modify_tensors(
66626656
if name == "model.embed_tokens.weight":
66636657
return [(self.map_tensor_name("token_embd.weight"), data_torch)]
66646658

6665-
# Handle routed experts (skip for NextN layer 46)
6666-
if name.find("mlp.experts") != -1 and "shared_experts" not in name and bid != 46:
6659+
# Handle routed experts
6660+
if name.find("mlp.experts") != -1 and "shared_experts" not in name:
66676661
n_experts = self.hparams["n_routed_experts"]
66686662
assert bid is not None
66696663

66706664
if self._experts is None:
66716665
self._experts = [{} for _ in range(self.block_count)]
66726666

6667+
# Extend experts array if needed (for models where actual layers > num_hidden_layers)
6668+
while len(self._experts) <= bid:
6669+
self._experts.append({})
6670+
66736671
self._experts[bid][name] = data_torch
66746672

66756673
if len(self._experts[bid]) >= n_experts * 3:
@@ -6705,11 +6703,22 @@ def modify_tensors(
67056703
new_name = name.replace("model.layers.", "blk.").replace(
67066704
".mlp.gate.e_score_correction_bias", ".ffn_gate_inp.bias"
67076705
)
6708-
return [(self.map_tensor_name(new_name), data_torch)]
6706+
return [(new_name, data_torch)]
6707+
elif ".mlp.gate.weight" in name:
6708+
new_name = name.replace("model.layers.", "blk.").replace(
6709+
".mlp.gate.weight", ".ffn_gate_inp.weight"
6710+
)
6711+
return [(new_name, data_torch)]
67096712

67106713
# Handle shared expert tensors
6711-
if ".mlp.ffn_" in name and "_shexp" in name:
6712-
new_name = name.replace("model.layers.", "blk.")
6714+
if ".mlp.shared_experts." in name:
6715+
new_name = name.replace("model.layers.", "blk.").replace(".mlp.shared_experts.", ".ffn_")
6716+
if "gate_proj" in new_name:
6717+
new_name = new_name.replace("gate_proj", "gate_shexp")
6718+
elif "down_proj" in new_name:
6719+
new_name = new_name.replace("down_proj", "down_shexp")
6720+
elif "up_proj" in new_name:
6721+
new_name = new_name.replace("up_proj", "up_shexp")
67136722
return [(new_name, data_torch)]
67146723

67156724
# Handle regular dense FFN layers (for hybrid dense/MoE architecture)
@@ -6738,8 +6747,27 @@ def modify_tensors(
67386747
or ".enorm." in name
67396748
or ".hnorm." in name
67406749
):
6741-
# For NextN tensors, convert to GGUF naming convention
6742-
new_name = name.replace("model.layers.", "blk.").replace("model.", "")
6750+
new_name = name.replace("model.layers.", "blk.").replace("model.", "").replace(".weight", "")
6751+
return [(new_name, data_torch)]
6752+
6753+
# GLM tensor mapping - handle directly without map_tensor_name
6754+
if ".input_layernorm." in name:
6755+
new_name = name.replace("model.layers.", "blk.").replace(".input_layernorm.", ".attn_norm.")
6756+
return [(new_name, data_torch)]
6757+
elif ".post_attention_layernorm." in name:
6758+
new_name = name.replace("model.layers.", "blk.").replace(".post_attention_layernorm.", ".ffn_norm.")
6759+
return [(new_name, data_torch)]
6760+
elif ".self_attn." in name:
6761+
# Map GLM self_attn to standard attention naming
6762+
new_name = name.replace("model.layers.", "blk.").replace(".self_attn.", ".attn_")
6763+
if "q_proj" in new_name:
6764+
new_name = new_name.replace("q_proj", "q")
6765+
elif "k_proj" in new_name:
6766+
new_name = new_name.replace("k_proj", "k")
6767+
elif "v_proj" in new_name:
6768+
new_name = new_name.replace("v_proj", "v")
6769+
elif "o_proj" in new_name:
6770+
new_name = new_name.replace("o_proj", "output")
67436771
return [(new_name, data_torch)]
67446772

67456773
return super().modify_tensors(data_torch, name, bid)

0 commit comments

Comments
 (0)