Skip to content

Commit 0297d05

Browse files
zRzRzRzRzRzRzRx22x22
authored andcommitted
self.gate dtype update for GLM-4.5 (vllm-project#22203)
Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com> Signed-off-by: x22x22 <wadeking@qq.com>
1 parent 8d55ca3 commit 0297d05

File tree

2 files changed

+3
-2
lines changed

2 files changed

+3
-2
lines changed

tests/models/registry.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,7 @@ def check_available_online(
383383
trust_remote_code=True,
384384
hf_overrides={"architectures": ["GLM4VForCausalLM"]}), # noqa: E501
385385
"Glm4vForConditionalGeneration": _HfExamplesInfo("zai-org/GLM-4.1V-9B-Thinking"), # noqa: E501
386-
"Glm4v_moeForConditionalGeneration": _HfExamplesInfo("zai-org/GLM-4.5V-Air",
386+
"Glm4v_moeForConditionalGeneration": _HfExamplesInfo("zai-org/GLM-4.5V",
387387
is_available_online=False), # noqa: E501
388388
"H2OVLChatModel": _HfExamplesInfo("h2oai/h2ovl-mississippi-800m",
389389
trust_remote_code=True,

vllm/model_executor/models/glm4_moe.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ def __init__(
123123
config.n_routed_experts,
124124
bias=False,
125125
quant_config=None,
126+
params_dtype=torch.float32,
126127
prefix=f"{prefix}.gate")
127128

128129
self.gate.e_score_correction_bias = nn.Parameter(
@@ -180,7 +181,7 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
180181

181182
if self.n_shared_experts is not None:
182183
shared_output = self.shared_experts(hidden_states)
183-
router_logits, _ = self.gate(hidden_states)
184+
router_logits, _ = self.gate(hidden_states.to(dtype=torch.float32))
184185
final_hidden_states = self.experts(
185186
hidden_states=hidden_states,
186187
router_logits=router_logits) * self.routed_scaling_factor

0 commit comments

Comments
 (0)