Skip to content

Commit fdf3da0

Browse files
committed
feat: support GLM 4.5 family of models
1 parent 4397ccb commit fdf3da0

File tree

2 files changed

+9
-5
lines changed

2 files changed

+9
-5
lines changed

src/llama-model.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ const char * llm_type_name(llm_type type) {
111111
case LLM_TYPE_30B_A3B: return "30B.A3B";
112112
case LLM_TYPE_235B_A22B: return "235B.A22B";
113113
case LLM_TYPE_300B_A47B: return "300B.A47B";
114+
case LLM_TYPE_9B_A2B: return "9B.A2B";
115+
case LLM_TYPE_32B_A7B: return "32B.A7B";
114116
case LLM_TYPE_E2B: return "E2B";
115117
case LLM_TYPE_E4B: return "E4B";
116118
default: return "?B";
@@ -1435,8 +1437,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
14351437
}
14361438

14371439
switch (hparams.n_layer) {
1438-
case 46: type = LLM_TYPE_12B; break; // GLM-4.5-Air
1439-
case 93: type = LLM_TYPE_32B; break; // GLM-4.5
1440+
case 46: type = LLM_TYPE_9B_A2B; break; // GLM-4.5-Air
1441+
case 93: type = LLM_TYPE_32B_A7B; break; // GLM-4.5
14401442
default: type = LLM_TYPE_UNKNOWN;
14411443
}
14421444
} break;
@@ -4393,9 +4395,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
43934395
layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), { n_embd, n_embd_head_k * n_head }, 0);
43944396
layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), { n_embd, n_embd_k_gqa }, 0);
43954397
layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), { n_embd, n_embd_v_gqa }, 0);
4396-
layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), { n_embd }, TENSOR_NOT_REQUIRED);
4397-
layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), { n_embd_gqa }, TENSOR_NOT_REQUIRED);
4398-
layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), { n_embd_gqa }, TENSOR_NOT_REQUIRED);
4398+
layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), { n_embd_head_k * n_head }, TENSOR_NOT_REQUIRED);
4399+
layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), { n_embd_k_gqa }, TENSOR_NOT_REQUIRED);
4400+
layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), { n_embd_v_gqa }, TENSOR_NOT_REQUIRED);
43994401

44004402
layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), { n_embd, n_embd }, 0);
44014403
layer.attn_post_norm = create_tensor(tn(LLM_TENSOR_ATTN_POST_NORM, "weight", i), { n_embd }, 0);

src/llama-model.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ enum llm_type {
103103
LLM_TYPE_30B_A3B,
104104
LLM_TYPE_235B_A22B,
105105
LLM_TYPE_300B_A47B, // Ernie MoE big
106+
LLM_TYPE_9B_A2B, // GLM-4.5-Air (9B total, ~2B active)
107+
LLM_TYPE_32B_A7B, // GLM-4.5 (32B total, ~7B active)
106108
LLM_TYPE_E2B,
107109
LLM_TYPE_E4B,
108110
};

0 commit comments

Comments
 (0)