From 3c06dd48661deb36dd36861848ea134389017a36 Mon Sep 17 00:00:00 2001 From: Cebtenzzre Date: Sun, 1 Oct 2023 17:49:22 -0400 Subject: [PATCH 1/4] gguf : do not store defaults in class vars Making an assignment in a class outside of a method does not set the default value, it actually sets the attribute on the class itself. Instances of the class inherit these, but it's incorrect to expose these fields here. --- gguf-py/gguf/gguf.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/gguf-py/gguf/gguf.py b/gguf-py/gguf/gguf.py index 598cf8e594aa8a..6f30bb5c3faf89 100644 --- a/gguf-py/gguf/gguf.py +++ b/gguf-py/gguf/gguf.py @@ -464,24 +464,24 @@ def get_type(val): class GGUFWriter: fout: BufferedWriter - arch: str - offset_tensor = 0 - data_alignment = GGUF_DEFAULT_ALIGNMENT - kv_data = b"" - kv_data_count = 0 - ti_data = b"" - ti_data_count = 0 - use_temp_file: bool - temp_file: tempfile.SpooledTemporaryFile[bytes] | None = None + temp_file: tempfile.SpooledTemporaryFile[bytes] | None tensors: list[tuple[np.ndarray[Any, Any], int]] def __init__(self, path: os.PathLike[str] | str, arch: str, use_temp_file = True): self.fout = open(path, "wb") self.arch = arch - self.add_architecture() + self.offset_tensor = 0 + self.data_alignment = GGUF_DEFAULT_ALIGNMENT + self.kv_data = b"" + self.kv_data_count = 0 + self.ti_data = b"" + self.ti_data_count = 0 self.use_temp_file = use_temp_file + self.temp_file = None self.tensors = [] + self.add_architecture() + def write_header_to_file(self): self.fout.write(struct.pack(" Date: Sun, 1 Oct 2023 18:07:43 -0400 Subject: [PATCH 2/4] gguf : force use_temp_file to false --- convert-llama-ggml-to-gguf.py | 5 +-- .../convert-finetune-checkpoint-to-gguf.py | 2 +- .../convert-train-checkpoint-to-gguf.py | 2 +- gguf-py/gguf/gguf.py | 38 +++---------------- 4 files changed, 9 insertions(+), 38 deletions(-) diff --git a/convert-llama-ggml-to-gguf.py b/convert-llama-ggml-to-gguf.py index b5d3e0b3c3acea..847ea7c69ac9d6 100755 --- a/convert-llama-ggml-to-gguf.py +++ b/convert-llama-ggml-to-gguf.py @@ -235,10 +235,7 @@ def __init__(self, ggml_model, data, cfg, params_override = None, vocab_override def save(self): print('* Preparing to save GGUF file') - gguf_writer = gguf.GGUFWriter( - self.cfg.output, - gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA], - use_temp_file = False ) + gguf_writer = gguf.GGUFWriter(self.cfg.output, gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA]) self.add_params(gguf_writer) self.add_vocab(gguf_writer) if self.special_vocab is not None: diff --git a/examples/finetune/convert-finetune-checkpoint-to-gguf.py b/examples/finetune/convert-finetune-checkpoint-to-gguf.py index 96d6633ed7d5ee..56d1d9ac06d500 100644 --- a/examples/finetune/convert-finetune-checkpoint-to-gguf.py +++ b/examples/finetune/convert-finetune-checkpoint-to-gguf.py @@ -475,7 +475,7 @@ def main(): # we should have read all available data assert(offset == len(data)) - gguf_writer = gguf.GGUFWriter(cfg.output, gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA], use_temp_file = False) + gguf_writer = gguf.GGUFWriter(cfg.output, gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA]) chk.save_gguf(gguf_writer) print(" gguf: write header") gguf_writer.write_header_to_file() diff --git a/examples/train-text-from-scratch/convert-train-checkpoint-to-gguf.py b/examples/train-text-from-scratch/convert-train-checkpoint-to-gguf.py index 351e7bc2d2a95f..07a341e98c78ce 100644 --- a/examples/train-text-from-scratch/convert-train-checkpoint-to-gguf.py +++ b/examples/train-text-from-scratch/convert-train-checkpoint-to-gguf.py @@ -485,7 +485,7 @@ def main(): # we should have read all available data assert(offset == len(data)) - gguf_writer = gguf.GGUFWriter(cfg.output, gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA], use_temp_file = False) + gguf_writer = gguf.GGUFWriter(cfg.output, gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA]) chk.save_gguf(gguf_writer) print(" gguf: write header") gguf_writer.write_header_to_file() diff --git a/gguf-py/gguf/gguf.py b/gguf-py/gguf/gguf.py index 6f30bb5c3faf89..2760ce79f01f15 100644 --- a/gguf-py/gguf/gguf.py +++ b/gguf-py/gguf/gguf.py @@ -464,10 +464,9 @@ def get_type(val): class GGUFWriter: fout: BufferedWriter - temp_file: tempfile.SpooledTemporaryFile[bytes] | None - tensors: list[tuple[np.ndarray[Any, Any], int]] + tensors: list[np.ndarray[Any, Any]] - def __init__(self, path: os.PathLike[str] | str, arch: str, use_temp_file = True): + def __init__(self, path: os.PathLike[str] | str, arch: str): self.fout = open(path, "wb") self.arch = arch self.offset_tensor = 0 @@ -476,8 +475,6 @@ def __init__(self, path: os.PathLike[str] | str, arch: str, use_temp_file = True self.kv_data_count = 0 self.ti_data = b"" self.ti_data_count = 0 - self.use_temp_file = use_temp_file - self.temp_file = None self.tensors = [] self.add_architecture() @@ -621,24 +618,10 @@ def add_tensor_info(self, name: str, tensor_shape: Sequence[int], tensor_dtype: self.ti_data_count += 1 def add_tensor(self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None, raw_dtype: GGMLQuantizationType | None = None): - if self.use_temp_file and self.temp_file is None: - fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256*1024*1024) - fp.seek(0) - self.temp_file = fp - shape: Sequence[int] = raw_shape if raw_shape is not None else tensor.shape self.add_tensor_info(name, shape, tensor.dtype, tensor.nbytes, raw_dtype = raw_dtype) - pad = GGUFWriter.ggml_pad(tensor.nbytes, self.data_alignment) - tensor.nbytes - - if self.temp_file is None: - self.tensors.append((tensor, pad)) - return - - tensor.tofile(self.temp_file) - - if pad != 0: - self.temp_file.write(bytes([0] * pad)) + self.tensors.append(tensor) def write_padding(self, fp: BinaryIO, n: int, align: int | None = None): pad = GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment) - n @@ -655,18 +638,9 @@ def write_tensors_to_file(self): self.write_padding(self.fout, self.fout.tell()) - if self.temp_file is None: - for (currtensor, currpad) in self.tensors: - currtensor.tofile(self.fout) - if currpad != 0: - self.fout.write(bytes([0] * currpad)) - return - - self.temp_file.seek(0) - - shutil.copyfileobj(self.temp_file, self.fout) - self.flush() - self.temp_file.close() + for tensor in self.tensors: + tensor.tofile(self.fout) + self.write_padding(self.fout, tensor.nbytes) def flush(self): self.fout.flush() From 199724f00424449e8b4076d0b03c2d7717c056cf Mon Sep 17 00:00:00 2001 From: Cebtenzzre Date: Sun, 1 Oct 2023 19:31:08 -0400 Subject: [PATCH 3/4] gguf : track writer state --- gguf-py/gguf/gguf.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/gguf-py/gguf/gguf.py b/gguf-py/gguf/gguf.py index 2760ce79f01f15..264ae264bb4cce 100644 --- a/gguf-py/gguf/gguf.py +++ b/gguf-py/gguf/gguf.py @@ -462,6 +462,13 @@ def get_type(val): sys.exit() +class WriterState: + EMPTY = auto() + HEADER = auto() + KV_DATA = auto() + TI_DATA = auto() + + class GGUFWriter: fout: BufferedWriter tensors: list[np.ndarray[Any, Any]] @@ -476,24 +483,37 @@ def __init__(self, path: os.PathLike[str] | str, arch: str): self.ti_data = b"" self.ti_data_count = 0 self.tensors = [] + self.state = WriterState.EMPTY self.add_architecture() def write_header_to_file(self): + if self.state is not WriterState.EMPTY: + raise ValueError(f'Expected output file to be empty, got {self.state}') + self.fout.write(struct.pack(" Date: Sun, 1 Oct 2023 21:42:42 -0400 Subject: [PATCH 4/4] gguf : free tensors as they are written --- gguf-py/gguf/gguf.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/gguf-py/gguf/gguf.py b/gguf-py/gguf/gguf.py index 264ae264bb4cce..ce015cc8694043 100644 --- a/gguf-py/gguf/gguf.py +++ b/gguf-py/gguf/gguf.py @@ -661,7 +661,11 @@ def write_tensors_to_file(self): self.write_padding(self.fout, self.fout.tell()) - for tensor in self.tensors: + while True: + try: + tensor = self.tensors.pop(0) + except IndexError: + break tensor.tofile(self.fout) self.write_padding(self.fout, tensor.nbytes)