model: add Glm4MoeLiteForCausalLM architecture to support GLM-4.7-Flash (#13779)

2026-03-27 02:58:43 +07:00 · 2026-01-19 12:47:17 -08:00
parent 03bf241c33
commit 4f138a1749
17 changed files with 2577 additions and 1 deletions
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -269,6 +269,7 @@ func (kv KV) OllamaEngineRequired() bool {
 		"qwen25vl",
 		"qwen3", "qwen3moe",
 		"qwen3vl", "qwen3vlmoe",
+		"glm4moelite",
 	}, kv.Architecture())
 }

@@ -856,6 +857,7 @@ func (f GGML) FlashAttention() bool {
 	return slices.Contains([]string{
 		"bert",
 		"gemma3",
+		"glm4moelite",
 		"gptoss", "gpt-oss",
 		"mistral3",
 		"olmo3",