Skip to content

Commit

Permalink
Show warning when using a quantized base model
Browse files Browse the repository at this point in the history
  • Loading branch information
slaren committed Apr 15, 2023
1 parent ecd4827 commit 061f1ce
Showing 1 changed file with 10 additions and 3 deletions.
13 changes: 10 additions & 3 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1842,9 +1842,8 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *
model_loader->mapping.reset(new llama_mmap(&model_loader->file_loaders.at(0)->file, false));
}

fprintf(stderr, "%s: ", __func__);

// read tensors and apply
bool warned = false;
int n_tensors = 0;
while (true) {
int32_t n_dims;
Expand Down Expand Up @@ -1937,6 +1936,14 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *
base_t = dest_t;
}

if (base_t->type == GGML_TYPE_Q4_0 || base_t->type == GGML_TYPE_Q4_1) {
if (!warned) {
fprintf(stderr, "%s: warning: using a lora adapter with a quantized model may result in poor quality, "
"use a f16 or f32 base model with --lora-base\n", __func__);
warned = true;
}
}

ggml_tensor * loraA = lora_tensors[base_name + ".loraA"];
ggml_tensor * loraB = lora_tensors[base_name + ".loraB"];

Expand Down Expand Up @@ -1973,7 +1980,7 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *
lora_tensors.clear();

n_tensors++;
if (n_tensors % 8 == 0)
if (n_tensors % 4 == 0)
fprintf(stderr, ".");
}
}
Expand Down

0 comments on commit 061f1ce

Please sign in to comment.