@@ -6578,6 +6578,181 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
6578
6578
return super ().modify_tensors (data_torch , name , bid )
6579
6579
6580
6580
6581
+ @ModelBase .register ("Glm4MoeForCausalLM" )
6582
+ class Glm4MoeModel (TextModel ):
6583
+ model_arch = gguf .MODEL_ARCH .GLM4_MOE
6584
+
6585
+ def set_vocab (self ):
6586
+ from transformers import AutoTokenizer
6587
+
6588
+ tokenizer = AutoTokenizer .from_pretrained (
6589
+ self .dir_model , trust_remote_code = True
6590
+ )
6591
+ special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = True )
6592
+ tokens , toktypes , tokpre = self .get_vocab_base ()
6593
+ self .gguf_writer .add_tokenizer_model ("gpt2" )
6594
+ self .gguf_writer .add_tokenizer_pre (tokpre )
6595
+ self .gguf_writer .add_token_list (tokens )
6596
+ self .gguf_writer .add_token_types (toktypes )
6597
+ special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = True )
6598
+ special_vocab ._set_special_token (
6599
+ "eos" , tokenizer .get_added_vocab ()["<|endoftext|>" ]
6600
+ )
6601
+ special_vocab ._set_special_token ("eot" , tokenizer .get_added_vocab ()["<|user|>" ])
6602
+ special_vocab ._set_special_token ("eog" , tokenizer .get_added_vocab ()["<|user|>" ])
6603
+ special_vocab ._set_special_token ("eog" , tokenizer .get_added_vocab ()["<|observation|>" ])
6604
+ special_vocab ._set_special_token (
6605
+ "unk" , tokenizer .get_added_vocab ()["<|endoftext|>" ]
6606
+ )
6607
+ special_vocab ._set_special_token (
6608
+ "bos" , tokenizer .get_added_vocab ()["<|endoftext|>" ]
6609
+ )
6610
+ special_vocab .add_to_gguf (self .gguf_writer )
6611
+
6612
+ def set_gguf_parameters (self ):
6613
+ super ().set_gguf_parameters ()
6614
+ if (rope_dim := self .hparams .get ("head_dim" )) is None :
6615
+ rope_dim = (
6616
+ self .hparams ["hidden_size" ] // self .hparams ["num_attention_heads" ]
6617
+ )
6618
+ self .gguf_writer .add_rope_dimension_count (
6619
+ int (rope_dim * self .hparams .get ("partial_rotary_factor" , 0.5 ))
6620
+ )
6621
+
6622
+ # MoE parameters
6623
+ if (n_experts := self .hparams .get ("n_routed_experts" )) is not None :
6624
+ self .gguf_writer .add_expert_count (n_experts )
6625
+ # Note: expert_used_count is already set by parent class using num_experts_per_tok
6626
+ if (moe_intermediate_size := self .hparams .get ("moe_intermediate_size" )) is not None :
6627
+ self .gguf_writer .add_expert_feed_forward_length (moe_intermediate_size )
6628
+ if (n_shared_experts := self .hparams .get ("n_shared_experts" )) is not None :
6629
+ self .gguf_writer .add_expert_shared_count (n_shared_experts )
6630
+ if (first_k_dense_replace := self .hparams .get ("first_k_dense_replace" )) is not None :
6631
+ self .gguf_writer .add_leading_dense_block_count (first_k_dense_replace )
6632
+
6633
+ # Expert gating function (sigmoid for GLM4_MOE)
6634
+ self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SIGMOID )
6635
+
6636
+ # Routed scaling factor
6637
+ if (routed_scaling_factor := self .hparams .get ("routed_scaling_factor" )) is not None :
6638
+ self .gguf_writer .add_expert_weights_scale (routed_scaling_factor )
6639
+
6640
+ # Normalise topk probabilities
6641
+ if (norm_topk_prob := self .hparams .get ("norm_topk_prob" )) is not None :
6642
+ self .gguf_writer .add_expert_weights_norm (norm_topk_prob )
6643
+
6644
+ _experts : list [dict [str , Tensor ]] | None = None
6645
+ _shared_experts : list [dict [str , Tensor ]] | None = None
6646
+
6647
+ def modify_tensors (
6648
+ self , data_torch : Tensor , name : str , bid : int | None
6649
+ ) -> Iterable [tuple [str , Tensor ]]:
6650
+ # Handle layer 46 tensors - preserve all for future MTP support
6651
+ if bid is not None and bid == 46 :
6652
+ # Convert layer 46 tensors to GGUF naming but don't try to map them
6653
+ new_name = name .replace ("model.layers." , "blk." )
6654
+ return [(new_name , data_torch )]
6655
+
6656
+ if name .startswith ("model.visual." ): # ignore visual part
6657
+ return []
6658
+ elif name .startswith ("model.language_model." ):
6659
+ name = name .replace ("language_model." , "" ) # for multimodal variants
6660
+
6661
+ # Handle main token embedding
6662
+ if name == "model.embed_tokens.weight" :
6663
+ return [(self .map_tensor_name ("token_embd.weight" ), data_torch )]
6664
+
6665
+ # Handle routed experts (skip for NextN layer 46)
6666
+ if name .find ("mlp.experts" ) != - 1 and "shared_experts" not in name and bid != 46 :
6667
+ n_experts = self .hparams ["n_routed_experts" ]
6668
+ assert bid is not None
6669
+
6670
+ if self ._experts is None :
6671
+ self ._experts = [{} for _ in range (self .block_count )]
6672
+
6673
+ self ._experts [bid ][name ] = data_torch
6674
+
6675
+ if len (self ._experts [bid ]) >= n_experts * 3 :
6676
+ tensors : list [tuple [str , Tensor ]] = []
6677
+
6678
+ # merge the experts into a single 3d tensor
6679
+ for w_name in ["down_proj" , "gate_proj" , "up_proj" ]:
6680
+ datas : list [Tensor ] = []
6681
+
6682
+ for xid in range (n_experts ):
6683
+ ename = f"model.layers.{ bid } .mlp.experts.{ xid } .{ w_name } .weight"
6684
+ datas .append (self ._experts [bid ][ename ])
6685
+ del self ._experts [bid ][ename ]
6686
+
6687
+ data_torch = torch .stack (datas , dim = 0 )
6688
+ # Generate GGUF tensor names for merged experts
6689
+ if w_name == "down_proj" :
6690
+ new_name = f"blk.{ bid } .ffn_down_exps.weight"
6691
+ elif w_name == "gate_proj" :
6692
+ new_name = f"blk.{ bid } .ffn_gate_exps.weight"
6693
+ elif w_name == "up_proj" :
6694
+ new_name = f"blk.{ bid } .ffn_up_exps.weight"
6695
+ else :
6696
+ merged_name = f"model.layers.{ bid } .mlp.experts.{ w_name } .weight"
6697
+ new_name = self .map_tensor_name (merged_name )
6698
+ tensors .append ((new_name , data_torch ))
6699
+ return tensors
6700
+ else :
6701
+ return []
6702
+
6703
+ # Handle expert gating input (routing gate)
6704
+ if ".mlp.gate.e_score_correction_bias" in name :
6705
+ new_name = name .replace ("model.layers." , "blk." ).replace (
6706
+ ".mlp.gate.e_score_correction_bias" , ".ffn_gate_inp.bias"
6707
+ )
6708
+ return [(self .map_tensor_name (new_name ), data_torch )]
6709
+
6710
+ # Handle shared expert tensors
6711
+ if ".mlp.ffn_" in name and "_shexp" in name :
6712
+ new_name = name .replace ("model.layers." , "blk." )
6713
+ return [(new_name , data_torch )]
6714
+
6715
+ # Handle regular dense FFN layers (for hybrid dense/MoE architecture)
6716
+ if ".mlp." in name and "experts" not in name and "_shexp" not in name :
6717
+ if "gate_proj" in name :
6718
+ new_name = name .replace ("model.layers." , "blk." ).replace (
6719
+ ".mlp.gate_proj.weight" , ".ffn_gate.weight"
6720
+ )
6721
+ elif "up_proj" in name :
6722
+ new_name = name .replace ("model.layers." , "blk." ).replace (
6723
+ ".mlp.up_proj.weight" , ".ffn_up.weight"
6724
+ )
6725
+ elif "down_proj" in name :
6726
+ new_name = name .replace ("model.layers." , "blk." ).replace (
6727
+ ".mlp.down_proj.weight" , ".ffn_down.weight"
6728
+ )
6729
+ else :
6730
+ new_name = name
6731
+ return [(self .map_tensor_name (new_name ), data_torch )]
6732
+
6733
+ # Handle special NextN tensors - preserve for future MTP support
6734
+ if (
6735
+ ".embed_tokens." in name
6736
+ or ".shared_head." in name
6737
+ or ".eh_proj." in name
6738
+ or ".enorm." in name
6739
+ or ".hnorm." in name
6740
+ ):
6741
+ # For NextN tensors, convert to GGUF naming convention
6742
+ new_name = name .replace ("model.layers." , "blk." ).replace ("model." , "" )
6743
+ return [(new_name , data_torch )]
6744
+
6745
+ return super ().modify_tensors (data_torch , name , bid )
6746
+
6747
+ def prepare_tensors (self ):
6748
+ super ().prepare_tensors ()
6749
+ if self ._experts is not None :
6750
+ # flatten `list[dict[str, Tensor]]` into `list[str]`
6751
+ experts = [k for d in self ._experts for k in d .keys ()]
6752
+ if len (experts ) > 0 :
6753
+ raise ValueError (f"Unprocessed experts: { experts } " )
6754
+
6755
+
6581
6756
@ModelBase .register ("GlmForCausalLM" , "ChatGLMModel" , "ChatGLMForConditionalGeneration" )
6582
6757
class ChatGLMModel (TextModel ):
6583
6758
model_arch = gguf .MODEL_ARCH .CHATGLM
0 commit comments