model: add Glm4MoeLiteForCausalLM architecture to support GLM-4.7-Flash (#13779)

This commit is contained in:
Jeffrey Morgan
2026-01-19 12:47:17 -08:00
committed by GitHub
parent 03bf241c33
commit 4f138a1749
17 changed files with 2577 additions and 1 deletions

View File

@@ -269,6 +269,7 @@ func (kv KV) OllamaEngineRequired() bool {
"qwen25vl",
"qwen3", "qwen3moe",
"qwen3vl", "qwen3vlmoe",
"glm4moelite",
}, kv.Architecture())
}
@@ -856,6 +857,7 @@ func (f GGML) FlashAttention() bool {
return slices.Contains([]string{
"bert",
"gemma3",
"glm4moelite",
"gptoss", "gpt-oss",
"mistral3",
"olmo3",