fix(llama.cpp-ggml): fixup max_tokens for old backend (mudler#2094)

fix(llama.cpp-ggml): set 0 as default for `max_tokens` Signed-off-by: Ettore Di Giacinto <[email protected]>
kp-forks · Apr 21, 2024 · 180cd4c · 180cd4c
1 parent 284ad02
commit 180cd4c
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/core/config/backend_config.go b/core/config/backend_config.go
@@ -210,7 +210,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
 	defaultMirostatETA := 0.1
 	defaultTypicalP := 1.0
 	defaultTFZ := 1.0
-	defaultInfinity := -1
+	defaultZero := 0
 
 	// Try to offload all GPU layers (if GPU is found)
 	defaultHigh := 99999999
@@ -254,7 +254,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
 	}
 
 	if cfg.Maxtokens == nil {
-		cfg.Maxtokens = &defaultInfinity
+		cfg.Maxtokens = &defaultZero
 	}
 
 	if cfg.Mirostat == nil {