diff --git a/model/parsers/glm46.go b/model/parsers/glm46.go index fb6ea888e..87d5c8948 100644 --- a/model/parsers/glm46.go +++ b/model/parsers/glm46.go @@ -345,44 +345,163 @@ func escapeGLM46Content(s string) string { return result.String() } -// repairUnclosedArgValues inserts missing closing tags. -// GLM models sometimes omit the closing tag, producing XML like: +// repairPhase represents the expected next tag in the repair cycle. +type repairPhase int + +const ( + phaseArgKeyOpen repairPhase = iota // expecting + phaseArgKeyClose // expecting + phaseArgValOpen // expecting + phaseArgValClose // expecting + phaseCount // number of phases +) + +// repairGLM46XML reconstructs well-formed XML from GLM model output that may +// have missing or mismatched tags. The expected structure is: // -// value +// func_name +// key +// value +// ... // -// instead of: -// -// value -func repairUnclosedArgValues(s string) string { +// GLM models frequently omit opening or closing tags. This function follows +// the expected tag cycle, scanning forward for each expected tag in sequence. +// When a tag is missing, it inserts the tag and consumes any text in between. +func repairGLM46XML(s string) string { + // tagCycle is the repeating sequence of tags after the function name. + tagCycle := [phaseCount]string{"", "", "", ""} + + // findNextTag returns the index and identity of the earliest known tag in s. + findNextTag := func(s string) (int, string) { + bestIdx := -1 + bestTag := "" + for _, tag := range tagCycle { + if idx := strings.Index(s, tag); idx != -1 && (bestIdx == -1 || idx < bestIdx) { + bestIdx = idx + bestTag = tag + } + } + return bestIdx, bestTag + } + + // tagIndex returns the phase corresponding to the given tag. + tagIndex := func(tag string) repairPhase { + for i, t := range tagCycle { + if t == tag { + return repairPhase(i) + } + } + return -1 + } + var result strings.Builder - for { - openIdx := strings.Index(s, "") - if openIdx == -1 { + + idx, firstTag := findNextTag(s) + if idx == -1 { + return s + } + prefix := s[:idx] + s = s[idx:] + + // If the first tag is not , the text before it may contain both + // the function name and key content (e.g. "weather city"). + // Function names cannot contain space, so split at the first space. + phase := phaseArgKeyOpen + if firstTag != "" { + if spIdx := strings.IndexFunc(prefix, unicode.IsSpace); spIdx != -1 { + result.WriteString(prefix[:spIdx]) + keyContent := strings.TrimLeftFunc(prefix[spIdx:], unicode.IsSpace) + result.WriteString("") + result.WriteString(keyContent) + phase = phaseArgKeyClose + } else { + result.WriteString(prefix) + } + } else { + result.WriteString(prefix) + } + + // Walk through the expected tag cycle. At each step, look for the + // expected tag. If a different tag appears first, emit the missing + // tags to catch up, then continue. + for len(s) > 0 { + idx, found := findNextTag(s) + expected := tagCycle[phase] + isOpen := phase%2 == 0 // even phases are opening tags + + if idx == -1 { + // No more tags — emit remaining text with fixups + if isOpen { + // Expecting an opening tag but nothing left — we're done + break + } + // Expecting a closing tag — emit text then close result.WriteString(s) + result.WriteString(expected) + phase = (phase + 1) % phaseCount break } - afterOpen := openIdx + len("") - closeIdx := strings.Index(s[afterOpen:], "") - nextKeyIdx := strings.Index(s[afterOpen:], "") - // Check if properly closed before the next (or no next key) - if closeIdx != -1 && (nextKeyIdx == -1 || closeIdx < nextKeyIdx) { - end := afterOpen + closeIdx + len("") - result.WriteString(s[:end]) - s = s[end:] + + if found == expected { + // Found the expected tag — emit any text before it, then the tag + result.WriteString(s[:idx]) + result.WriteString(expected) + s = s[idx+len(expected):] + phase = (phase + 1) % phaseCount continue } - // Unclosed — insert before the next or at end - if nextKeyIdx != -1 { - insertAt := afterOpen + nextKeyIdx - result.WriteString(s[:insertAt]) - result.WriteString("") - s = s[insertAt:] - } else { - result.WriteString(s) - result.WriteString("") - break + + // Found a different tag. Insert missing tags to catch up. + foundIdx := tagIndex(found) + + if isOpen && idx > 0 { + // Text before the found tag while expecting an opening tag — + // the opening tag was omitted. Emit it before the text. + result.WriteString(expected) + // Advance to the next phase (text content) and then look + // for the closing tag — but the found tag might be that + // closing tag or something further ahead. Emit text up to + // the found tag and insert any missing tags between. + result.WriteString(s[:idx]) + phase = (phase + 1) % phaseCount // now expecting closing + s = s[idx:] + // Fall through to re-evaluate with the closing tag expected + continue } + + // Emit missing tags to advance from current phase to the found tag's phase + for phase != foundIdx { + tag := tagCycle[phase] + if phase%2 == 0 { + result.WriteString(tag) + } else { + // Closing tag — emit any text before the found tag first, + // but only if we're one step before the found tag + if (phase+1)%phaseCount == foundIdx && idx > 0 { + result.WriteString(s[:idx]) + s = s[idx:] + idx = 0 + } + result.WriteString(tag) + } + phase = (phase + 1) % phaseCount + } + // Now phase == foundIdx, re-process without advancing s } + + // If we stopped mid-pair (after an opening tag), close it + switch phase { + case phaseArgKeyClose: // after , expecting text/ + result.WriteString("") + result.WriteString("") + result.WriteString("") + case phaseArgValOpen: // after , expecting + result.WriteString("") + result.WriteString("") + case phaseArgValClose: // after , expecting text/ + result.WriteString("") + } + return result.String() } @@ -398,7 +517,7 @@ func parseGLM46ToolCall(raw glm46EventRawToolCall, tools []api.Tool) (api.ToolCa var parsed GLMToolCallXML if err := xml.Unmarshal([]byte(xmlString), &parsed); err != nil { parsed = GLMToolCallXML{} - repaired := "" + repairUnclosedArgValues(escaped) + "" + repaired := "" + repairGLM46XML(escaped) + "" if err2 := xml.Unmarshal([]byte(repaired), &parsed); err2 != nil { return api.ToolCall{}, fmt.Errorf("failed to parse XML: %w", err) } diff --git a/model/parsers/glm46_test.go b/model/parsers/glm46_test.go index 8cd88d196..fa6e665ea 100644 --- a/model/parsers/glm46_test.go +++ b/model/parsers/glm46_test.go @@ -887,6 +887,28 @@ line3`, }, }, }, + { + name: "unopened arg_value after arg_key", + tools: []api.Tool{}, + rawToolCall: "get-weather\ncity\nNew York\nunit\ncelsius", + wantToolCall: api.ToolCall{ + Function: api.ToolCallFunction{ + Name: "get-weather", + Arguments: args(`{"city": "New York", "unit": "celsius"}`), + }, + }, + }, + { + name: "mixed unopened and valid arg_values", + tools: []api.Tool{}, + rawToolCall: "get-weather\ncity\nParis\nunit\ncelsius", + wantToolCall: api.ToolCall{ + Function: api.ToolCallFunction{ + Name: "get-weather", + Arguments: args(`{"city": "Paris", "unit": "celsius"}`), + }, + }, + }, } for i, tc := range cases { @@ -902,7 +924,7 @@ line3`, } } -func TestRepairUnclosedArgValues(t *testing.T) { +func TestRepairGLM46XML(t *testing.T) { cases := []struct { name string input string @@ -910,33 +932,63 @@ func TestRepairUnclosedArgValues(t *testing.T) { }{ { name: "already valid", - input: `kv`, - want: `kv`, + input: `funckv`, + want: `funckv`, }, { - name: "unclosed at end", - input: `kv`, - want: `kv`, + name: "missing at end", + input: `funckv`, + want: `funckv`, }, { - name: "unclosed before next arg_key", - input: `a1b2`, - want: `a1b2`, + name: "missing before next arg_key", + input: `funca1b2`, + want: `funca1b2`, }, { - name: "no arg_value tags", + name: "no tags at all", input: `just plain text`, want: `just plain text`, }, { - name: "multiple unclosed", - input: `a1b2`, - want: `a1b2`, + name: "missing open tag", + input: `funckv`, + want: `funckv`, + }, + { + name: "missing close tag", + input: `funckv`, + want: `funckv`, + }, + { + name: "missing open tag", + input: `func kv`, + want: `funckv`, + }, + { + name: "all closing tags missing", + input: `funckv`, + want: `funckv`, + }, + { + name: "all opening tags missing", + input: "func kv", + want: "funckv", + }, + { + name: "multiple pairs with mixed missing tags", + input: `funca1b2`, + want: `funca1b2`, + }, + { + name: "newlines preserved", + input: "func\ncity\nNew York", + want: "func\ncity\nNew York", }, } for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { - got := repairUnclosedArgValues(tc.input) + got := repairGLM46XML(tc.input) if got != tc.want { t.Errorf("got %q, want %q", got, tc.want) }