diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 1dc18b2a577218..33a223b8f406c6 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -239,10 +239,7 @@ def write_tensors(self): data: np.ndarray = data # type hint n_dims = len(data.shape) data_dtype = data.dtype - - # if f32 desired, convert any float16 to float32 - if self.ftype == 0 and data_dtype == np.float16: - data = data.astype(np.float32) + data_qtype: gguf.GGMLQuantizationType | None = None # when both are True, f32 should win extra_f32 = self.extra_f32_tensors(name, new_name, bid, n_dims) @@ -254,20 +251,27 @@ def write_tensors(self): # if f16 desired, convert any float32 2-dim weight tensors to float16 extra_f16 = extra_f16 or (name.endswith(".weight") and n_dims >= 2) - # when both extra_f32 and extra_f16 are False, convert to float32 by default - if self.ftype == 1 and data_dtype == np.float16 and (extra_f32 or not extra_f16): - data = data.astype(np.float32) + if self.ftype != gguf.GGMLFileType.ALL_F32 and extra_f16 and not extra_f32: + if self.ftype == gguf.GGMLFileType.MOSTLY_F16: + if data_dtype != np.float16: + data = data.astype(np.float16) + data_qtype = gguf.GGMLQuantizationType.F16 + # TODO: add more types (like BF16) here + + else: # by default, convert to float32 + if data_dtype != np.float32: + data = data.astype(np.float32) + data_qtype = gguf.GGMLQuantizationType.F32 - if self.ftype == 1 and data_dtype == np.float32 and extra_f16 and not extra_f32: - data = data.astype(np.float16) + assert data_qtype is not None # reverse shape to make it similar to the internal ggml dimension order shape_str = f"{{{', '.join(str(n) for n in reversed(data.shape))}}}" # n_dims is implicit in the shape - logger.info(f"{f'%-{max_name_len}s' % f'{new_name},'} {old_dtype} --> {data.dtype}, shape = {shape_str}") + logger.info(f"{f'%-{max_name_len}s' % f'{new_name},'} {old_dtype} --> {data_qtype.name}, shape = {shape_str}") - self.gguf_writer.add_tensor(new_name, data) + self.gguf_writer.add_tensor(new_name, data, raw_dtype=data_qtype) def write(self): self.write_tensors() @@ -2472,9 +2476,9 @@ def main() -> None: logger.error(f'Error: {args.model} is not a directory') sys.exit(1) - ftype_map = { - "f32": gguf.GGMLQuantizationType.F32, - "f16": gguf.GGMLQuantizationType.F16, + ftype_map: dict[str, gguf.GGMLFileType] = { + "f32": gguf.GGMLFileType.ALL_F32, + "f16": gguf.GGMLFileType.MOSTLY_F16, } if args.outfile is not None: diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 5951c0bb0fb5ea..9675a8560e8b37 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -820,6 +820,42 @@ class GGMLQuantizationType(IntEnum): BF16 = 30 +class GGMLFileType(IntEnum): + ALL_F32 = 0 + MOSTLY_F16 = 1 # except 1d tensors + MOSTLY_Q4_0 = 2 # except 1d tensors + MOSTLY_Q4_1 = 3 # except 1d tensors + MOSTLY_Q4_1_SOME_F16 = 4 # tok_embeddings.weight and output.weight are F16 + # MOSTLY_Q4_2 = 5 # support has been removed + # MOSTLY_Q4_3 = 6 # support has been removed + MOSTLY_Q8_0 = 7 # except 1d tensors + MOSTLY_Q5_0 = 8 # except 1d tensors + MOSTLY_Q5_1 = 9 # except 1d tensors + MOSTLY_Q2_K = 10 # except 1d tensors + MOSTLY_Q3_K_S = 11 # except 1d tensors + MOSTLY_Q3_K_M = 12 # except 1d tensors + MOSTLY_Q3_K_L = 13 # except 1d tensors + MOSTLY_Q4_K_S = 14 # except 1d tensors + MOSTLY_Q4_K_M = 15 # except 1d tensors + MOSTLY_Q5_K_S = 16 # except 1d tensors + MOSTLY_Q5_K_M = 17 # except 1d tensors + MOSTLY_Q6_K = 18 # except 1d tensors + MOSTLY_IQ2_XXS = 19 # except 1d tensors + MOSTLY_IQ2_XS = 20 # except 1d tensors + MOSTLY_Q2_K_S = 21 # except 1d tensors + MOSTLY_IQ3_XS = 22 # except 1d tensors + MOSTLY_IQ3_XXS = 23 # except 1d tensors + MOSTLY_IQ1_S = 24 # except 1d tensors + MOSTLY_IQ4_NL = 25 # except 1d tensors + MOSTLY_IQ3_S = 26 # except 1d tensors + MOSTLY_IQ3_M = 27 # except 1d tensors + MOSTLY_IQ2_S = 28 # except 1d tensors + MOSTLY_IQ2_M = 29 # except 1d tensors + MOSTLY_IQ4_XS = 30 # except 1d tensors + MOSTLY_IQ1_M = 31 # except 1d tensors + MOSTLY_BF16 = 32 # except 1d tensors + + class GGUFEndian(IntEnum): LITTLE = 0 BIG = 1