mirror of
https://github.com/ollama/ollama.git
synced 2026-03-27 02:58:43 +07:00
server: reduce download concurrency for HuggingFace URLs
Reduces concurrent download parts from 16 to 4 for HuggingFace URLs to avoid triggering rate limits (HTTP 429 errors). Adds OLLAMA_HF_CONCURRENCY environment variable for users who want to customize the concurrency level. Fixes #13297
This commit is contained in:
@@ -96,10 +96,36 @@ func (p *blobDownloadPart) UnmarshalJSON(b []byte) error {
|
||||
|
||||
const (
|
||||
numDownloadParts = 16
|
||||
numHFDownloadParts = 4
|
||||
minDownloadPartSize int64 = 100 * format.MegaByte
|
||||
maxDownloadPartSize int64 = 1000 * format.MegaByte
|
||||
)
|
||||
|
||||
// isHuggingFaceURL returns true if the URL is from HuggingFace domains
|
||||
func isHuggingFaceURL(u *url.URL) bool {
|
||||
if u == nil {
|
||||
return false
|
||||
}
|
||||
host := strings.ToLower(u.Hostname())
|
||||
return strings.HasSuffix(host, "huggingface.co") ||
|
||||
strings.HasSuffix(host, ".hf.co") ||
|
||||
host == "hf.co"
|
||||
}
|
||||
|
||||
// getNumDownloadParts returns the number of concurrent download parts to use
|
||||
// for the given URL. HuggingFace URLs use reduced concurrency to avoid rate limiting.
|
||||
func getNumDownloadParts(u *url.URL) int {
|
||||
if isHuggingFaceURL(u) {
|
||||
if v := os.Getenv("OLLAMA_HF_CONCURRENCY"); v != "" {
|
||||
if n, err := strconv.Atoi(v); err == nil && n > 0 {
|
||||
return n
|
||||
}
|
||||
}
|
||||
return numHFDownloadParts
|
||||
}
|
||||
return numDownloadParts
|
||||
}
|
||||
|
||||
func (p *blobDownloadPart) Name() string {
|
||||
return strings.Join([]string{
|
||||
p.blobDownload.Name, "partial", strconv.Itoa(p.N),
|
||||
@@ -271,7 +297,11 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *regis
|
||||
}
|
||||
|
||||
g, inner := errgroup.WithContext(ctx)
|
||||
g.SetLimit(numDownloadParts)
|
||||
concurrency := getNumDownloadParts(directURL)
|
||||
if concurrency != numDownloadParts {
|
||||
slog.Info(fmt.Sprintf("using reduced concurrency (%d) for HuggingFace download", concurrency))
|
||||
}
|
||||
g.SetLimit(concurrency)
|
||||
for i := range b.Parts {
|
||||
part := b.Parts[i]
|
||||
if part.Completed.Load() == part.Size {
|
||||
|
||||
Reference in New Issue
Block a user