refactor: cmd ctx default value

Signed-off-by: thxCode <[email protected]>
gpustack · Jan 7, 2025 · b6b5d91 · b6b5d91
1 parent 6b2fca7
commit b6b5d91
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 2 deletions.
diff --git a/cmd/gguf-parser/README.md b/cmd/gguf-parser/README.md
@@ -40,7 +40,7 @@ GLOBAL OPTIONS:
    --batch-size value, -b value                                        Specify the logical batch size, which is used to estimate the usage. (default: 2048)
    --cache-type-k value, --ctk value                                   Specify the type of Key cache, which is used to estimate the usage, select from [f32, f16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1]. (default: "f16")
    --cache-type-v value, --ctv value                                   Specify the type of Value cache, which is used to estimate the usage, select from [f32, f16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1]. (default: "f16")
-   --ctx-size value, -c value                                          Specify the size of prompt context, which is used to estimate the usage, default is equal to the model's maximum context size. (default: -1)
+   --ctx-size value, -c value                                          Specify the size of prompt context, which is used to estimate the usage, default is equal to the model's maximum context size. (default: 0)
    --gpu-layers value, --ngl value, --n-gpu-layers value               Specify how many layers of the main model to offload, which is used to estimate the usage, default is full offloaded. (default: -1)
    --gpu-layers-draft value, --ngld value, --n-gpu-layers-draft value  Specify how many layers of the draft model to offload, which is used to estimate the usage, default is full offloaded. (default: -1)
    --gpu-layers-step value                                             Specify the step of layers to offload, works with "--gpu-layers". (default: 0)

diff --git a/cmd/gguf-parser/main.go b/cmd/gguf-parser/main.go
@@ -949,7 +949,7 @@ var (
 	deviceMetrics     cli.StringSlice
 	platformFootprint = "150,250"
 	// estimate options for llama.cpp
-	lmcCtxSize            = -1
+	lmcCtxSize            = 0
 	lmcInMaxCtxSize       bool
 	lmcLogicalBatchSize   = 2048
 	lmcPhysicalBatchSize  = 512