runner: add token history sampling parameters to ollama runner (#14537)

This commit is contained in:
Jeffrey Morgan
2026-03-01 19:16:07 -08:00
committed by GitHub
parent 3490e9590b
commit 86513cb697
8 changed files with 193 additions and 15 deletions

View File

@@ -25,6 +25,48 @@ func (h *tokenHeap) Pop() any {
return x
}
func tokenCounts(history []int32, vocabSize int) map[int32]int {
if len(history) == 0 {
return nil
}
start := 0
if len(history) > DefaultPenaltyLookback {
start = len(history) - DefaultPenaltyLookback
}
counts := make(map[int32]int, len(history)-start)
for _, token := range history[start:] {
if token < 0 || int(token) >= vocabSize {
continue
}
counts[token]++
}
return counts
}
func applyPenalty(logit float32, count int, repeatPenalty float32, presencePenalty float32, frequencyPenalty float32) float32 {
if repeatPenalty != 1.0 {
// Preserve ordering for negative logits when applying repeat penalty.
if logit < 0 {
logit *= repeatPenalty
} else {
logit /= repeatPenalty
}
}
if frequencyPenalty != 0 {
logit -= float32(count) * frequencyPenalty
}
if presencePenalty != 0 {
logit -= presencePenalty
}
return logit
}
// temperature applies scaling to the logits
func temperature(ts []token, temp float32) {
// Ensure temperature clipping near 0 to avoid numerical instability