diff --git a/cmd/gguf-parser/README.md b/cmd/gguf-parser/README.md index 3a20956..b490ed5 100644 --- a/cmd/gguf-parser/README.md +++ b/cmd/gguf-parser/README.md @@ -40,7 +40,7 @@ GLOBAL OPTIONS: --batch-size value, -b value Specify the logical batch size, which is used to estimate the usage. (default: 2048) --cache-type-k value, --ctk value Specify the type of Key cache, which is used to estimate the usage, select from [f32, f16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1]. (default: "f16") --cache-type-v value, --ctv value Specify the type of Value cache, which is used to estimate the usage, select from [f32, f16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1]. (default: "f16") - --ctx-size value, -c value Specify the size of prompt context, which is used to estimate the usage, default is equal to the model's maximum context size. (default: -1) + --ctx-size value, -c value Specify the size of prompt context, which is used to estimate the usage, default is equal to the model's maximum context size. (default: 0) --gpu-layers value, --ngl value, --n-gpu-layers value Specify how many layers of the main model to offload, which is used to estimate the usage, default is full offloaded. (default: -1) --gpu-layers-draft value, --ngld value, --n-gpu-layers-draft value Specify how many layers of the draft model to offload, which is used to estimate the usage, default is full offloaded. (default: -1) --gpu-layers-step value Specify the step of layers to offload, works with "--gpu-layers". (default: 0) diff --git a/cmd/gguf-parser/main.go b/cmd/gguf-parser/main.go index f2dfbbd..8304c5c 100644 --- a/cmd/gguf-parser/main.go +++ b/cmd/gguf-parser/main.go @@ -949,7 +949,7 @@ var ( deviceMetrics cli.StringSlice platformFootprint = "150,250" // estimate options for llama.cpp - lmcCtxSize = -1 + lmcCtxSize = 0 lmcInMaxCtxSize bool lmcLogicalBatchSize = 2048 lmcPhysicalBatchSize = 512