From 3c06dd48661deb36dd36861848ea134389017a36 Mon Sep 17 00:00:00 2001
From: Cebtenzzre <cebtenzzre@gmail.com>
Date: Sun, 1 Oct 2023 17:49:22 -0400
Subject: [PATCH 1/4] gguf : do not store defaults in class vars

Making an assignment in a class outside of a method does not set the
default value, it actually sets the attribute on the class itself.
Instances of the class inherit these, but it's incorrect to expose these
fields here.
---
 gguf-py/gguf/gguf.py | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/gguf-py/gguf/gguf.py b/gguf-py/gguf/gguf.py
index 598cf8e594aa8a..6f30bb5c3faf89 100644
--- a/gguf-py/gguf/gguf.py
+++ b/gguf-py/gguf/gguf.py
@@ -464,24 +464,24 @@ def get_type(val):
 
 class GGUFWriter:
     fout: BufferedWriter
-    arch: str
-    offset_tensor = 0
-    data_alignment = GGUF_DEFAULT_ALIGNMENT
-    kv_data = b""
-    kv_data_count = 0
-    ti_data = b""
-    ti_data_count = 0
-    use_temp_file: bool
-    temp_file: tempfile.SpooledTemporaryFile[bytes] | None = None
+    temp_file: tempfile.SpooledTemporaryFile[bytes] | None
     tensors: list[tuple[np.ndarray[Any, Any], int]]
 
     def __init__(self, path: os.PathLike[str] | str, arch: str, use_temp_file = True):
         self.fout = open(path, "wb")
         self.arch = arch
-        self.add_architecture()
+        self.offset_tensor = 0
+        self.data_alignment = GGUF_DEFAULT_ALIGNMENT
+        self.kv_data = b""
+        self.kv_data_count = 0
+        self.ti_data = b""
+        self.ti_data_count = 0
         self.use_temp_file = use_temp_file
+        self.temp_file = None
         self.tensors = []
 
+        self.add_architecture()
+
     def write_header_to_file(self):
         self.fout.write(struct.pack("<I", GGUF_MAGIC))
         self.fout.write(struct.pack("<I", GGUF_VERSION))
@@ -795,16 +795,19 @@ def add_pad_token_id(self, id: int):
 
 
 class SpecialVocab:
-    load_merges: bool = False
-    merges: list[str] = []
-    special_token_types: tuple[str, ...] = ('bos', 'eos', 'unk', 'sep', 'pad')
-    special_token_ids: dict[str, int] = {}
+    load_merges: bool
+    merges: list[str]
+    special_token_types: tuple[str, ...]
+    special_token_ids: dict[str, int]
 
     def __init__(self, path: Path, load_merges: bool = False, special_token_types: tuple[str, ...] | None = None):
         self.special_token_ids = {}
         self.load_merges = load_merges
+        self.merges = []
         if special_token_types is not None:
             self.special_token_types = special_token_types
+        else:
+            self.special_token_types = ('bos', 'eos', 'unk', 'sep', 'pad')
         self.load(path)
 
     def load(self, path: Path):

From da3256e50c3ed9c719e32830a63b7cff477d6504 Mon Sep 17 00:00:00 2001
From: Cebtenzzre <cebtenzzre@gmail.com>
Date: Sun, 1 Oct 2023 18:07:43 -0400
Subject: [PATCH 2/4] gguf : force use_temp_file to false

---
 convert-llama-ggml-to-gguf.py                 |  5 +--
 .../convert-finetune-checkpoint-to-gguf.py    |  2 +-
 .../convert-train-checkpoint-to-gguf.py       |  2 +-
 gguf-py/gguf/gguf.py                          | 38 +++----------------
 4 files changed, 9 insertions(+), 38 deletions(-)

diff --git a/convert-llama-ggml-to-gguf.py b/convert-llama-ggml-to-gguf.py
index b5d3e0b3c3acea..847ea7c69ac9d6 100755
--- a/convert-llama-ggml-to-gguf.py
+++ b/convert-llama-ggml-to-gguf.py
@@ -235,10 +235,7 @@ def __init__(self, ggml_model, data, cfg, params_override = None, vocab_override
 
     def save(self):
         print('* Preparing to save GGUF file')
-        gguf_writer = gguf.GGUFWriter(
-            self.cfg.output,
-            gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA],
-            use_temp_file = False )
+        gguf_writer = gguf.GGUFWriter(self.cfg.output, gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA])
         self.add_params(gguf_writer)
         self.add_vocab(gguf_writer)
         if self.special_vocab is not None:
diff --git a/examples/finetune/convert-finetune-checkpoint-to-gguf.py b/examples/finetune/convert-finetune-checkpoint-to-gguf.py
index 96d6633ed7d5ee..56d1d9ac06d500 100644
--- a/examples/finetune/convert-finetune-checkpoint-to-gguf.py
+++ b/examples/finetune/convert-finetune-checkpoint-to-gguf.py
@@ -475,7 +475,7 @@ def main():
     # we should have read all available data
     assert(offset == len(data))
 
-    gguf_writer = gguf.GGUFWriter(cfg.output, gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA], use_temp_file = False)
+    gguf_writer = gguf.GGUFWriter(cfg.output, gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA])
     chk.save_gguf(gguf_writer)
     print("    gguf: write header")
     gguf_writer.write_header_to_file()
diff --git a/examples/train-text-from-scratch/convert-train-checkpoint-to-gguf.py b/examples/train-text-from-scratch/convert-train-checkpoint-to-gguf.py
index 351e7bc2d2a95f..07a341e98c78ce 100644
--- a/examples/train-text-from-scratch/convert-train-checkpoint-to-gguf.py
+++ b/examples/train-text-from-scratch/convert-train-checkpoint-to-gguf.py
@@ -485,7 +485,7 @@ def main():
     # we should have read all available data
     assert(offset == len(data))
 
-    gguf_writer = gguf.GGUFWriter(cfg.output, gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA], use_temp_file = False)
+    gguf_writer = gguf.GGUFWriter(cfg.output, gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA])
     chk.save_gguf(gguf_writer)
     print("    gguf: write header")
     gguf_writer.write_header_to_file()
diff --git a/gguf-py/gguf/gguf.py b/gguf-py/gguf/gguf.py
index 6f30bb5c3faf89..2760ce79f01f15 100644
--- a/gguf-py/gguf/gguf.py
+++ b/gguf-py/gguf/gguf.py
@@ -464,10 +464,9 @@ def get_type(val):
 
 class GGUFWriter:
     fout: BufferedWriter
-    temp_file: tempfile.SpooledTemporaryFile[bytes] | None
-    tensors: list[tuple[np.ndarray[Any, Any], int]]
+    tensors: list[np.ndarray[Any, Any]]
 
-    def __init__(self, path: os.PathLike[str] | str, arch: str, use_temp_file = True):
+    def __init__(self, path: os.PathLike[str] | str, arch: str):
         self.fout = open(path, "wb")
         self.arch = arch
         self.offset_tensor = 0
@@ -476,8 +475,6 @@ def __init__(self, path: os.PathLike[str] | str, arch: str, use_temp_file = True
         self.kv_data_count = 0
         self.ti_data = b""
         self.ti_data_count = 0
-        self.use_temp_file = use_temp_file
-        self.temp_file = None
         self.tensors = []
 
         self.add_architecture()
@@ -621,24 +618,10 @@ def add_tensor_info(self, name: str, tensor_shape: Sequence[int], tensor_dtype:
         self.ti_data_count += 1
 
     def add_tensor(self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None, raw_dtype: GGMLQuantizationType | None = None):
-        if self.use_temp_file and self.temp_file is None:
-            fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256*1024*1024)
-            fp.seek(0)
-            self.temp_file = fp
-
         shape: Sequence[int] = raw_shape if raw_shape is not None else tensor.shape
         self.add_tensor_info(name, shape, tensor.dtype, tensor.nbytes, raw_dtype = raw_dtype)
 
-        pad = GGUFWriter.ggml_pad(tensor.nbytes, self.data_alignment) - tensor.nbytes
-
-        if  self.temp_file is None:
-            self.tensors.append((tensor, pad))
-            return
-
-        tensor.tofile(self.temp_file)
-
-        if pad != 0:
-            self.temp_file.write(bytes([0] * pad))
+        self.tensors.append(tensor)
 
     def write_padding(self, fp: BinaryIO, n: int, align: int | None = None):
         pad = GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment) - n
@@ -655,18 +638,9 @@ def write_tensors_to_file(self):
 
         self.write_padding(self.fout, self.fout.tell())
 
-        if self.temp_file is None:
-            for (currtensor, currpad) in self.tensors:
-                currtensor.tofile(self.fout)
-                if currpad != 0:
-                    self.fout.write(bytes([0] * currpad))
-            return
-
-        self.temp_file.seek(0)
-
-        shutil.copyfileobj(self.temp_file, self.fout)
-        self.flush()
-        self.temp_file.close()
+        for tensor in self.tensors:
+            tensor.tofile(self.fout)
+            self.write_padding(self.fout, tensor.nbytes)
 
     def flush(self):
         self.fout.flush()

From 199724f00424449e8b4076d0b03c2d7717c056cf Mon Sep 17 00:00:00 2001
From: Cebtenzzre <cebtenzzre@gmail.com>
Date: Sun, 1 Oct 2023 19:31:08 -0400
Subject: [PATCH 3/4] gguf : track writer state

---
 gguf-py/gguf/gguf.py | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/gguf-py/gguf/gguf.py b/gguf-py/gguf/gguf.py
index 2760ce79f01f15..264ae264bb4cce 100644
--- a/gguf-py/gguf/gguf.py
+++ b/gguf-py/gguf/gguf.py
@@ -462,6 +462,13 @@ def get_type(val):
             sys.exit()
 
 
+class WriterState:
+    EMPTY   = auto()
+    HEADER  = auto()
+    KV_DATA = auto()
+    TI_DATA = auto()
+
+
 class GGUFWriter:
     fout: BufferedWriter
     tensors: list[np.ndarray[Any, Any]]
@@ -476,24 +483,37 @@ def __init__(self, path: os.PathLike[str] | str, arch: str):
         self.ti_data = b""
         self.ti_data_count = 0
         self.tensors = []
+        self.state = WriterState.EMPTY
 
         self.add_architecture()
 
     def write_header_to_file(self):
+        if self.state is not WriterState.EMPTY:
+            raise ValueError(f'Expected output file to be empty, got {self.state}')
+
         self.fout.write(struct.pack("<I", GGUF_MAGIC))
         self.fout.write(struct.pack("<I", GGUF_VERSION))
         self.fout.write(struct.pack("<Q", self.ti_data_count))
         self.fout.write(struct.pack("<Q", self.kv_data_count))
         self.flush()
-#        print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count))
+        #print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count))
+        self.state = WriterState.HEADER
 
     def write_kv_data_to_file(self):
+        if self.state is not WriterState.HEADER:
+            raise ValueError(f'Expected output file to contain the header, got {self.state}')
+
         self.fout.write(self.kv_data)
         self.flush()
+        self.state = WriterState.KV_DATA
 
     def write_ti_data_to_file(self):
+        if self.state is not WriterState.KV_DATA:
+            raise ValueError(f'Expected output file to contain KV data, got {self.state}')
+
         self.fout.write(self.ti_data)
         self.flush()
+        self.state = WriterState.TI_DATA
 
     def add_key(self, key: str):
         self.add_val(key, GGUFValueType.STRING, add_vtype=False)
@@ -629,6 +649,9 @@ def write_padding(self, fp: BinaryIO, n: int, align: int | None = None):
             fp.write(bytes([0] * pad))
 
     def write_tensor_data(self, tensor: np.ndarray[Any, Any]):
+        if self.state is not WriterState.TI_DATA:
+            raise ValueError(f'Expected output file to contain tensor info, got {self.state}')
+
         self.write_padding(self.fout, self.fout.tell())
         tensor.tofile(self.fout)
         self.write_padding(self.fout, tensor.nbytes)

From 47e0fb2dc6cd96f4246e06a5cde722b11bff1f34 Mon Sep 17 00:00:00 2001
From: Cebtenzzre <cebtenzzre@gmail.com>
Date: Sun, 1 Oct 2023 21:42:42 -0400
Subject: [PATCH 4/4] gguf : free tensors as they are written

---
 gguf-py/gguf/gguf.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/gguf-py/gguf/gguf.py b/gguf-py/gguf/gguf.py
index 264ae264bb4cce..ce015cc8694043 100644
--- a/gguf-py/gguf/gguf.py
+++ b/gguf-py/gguf/gguf.py
@@ -661,7 +661,11 @@ def write_tensors_to_file(self):
 
         self.write_padding(self.fout, self.fout.tell())
 
-        for tensor in self.tensors:
+        while True:
+            try:
+                tensor = self.tensors.pop(0)
+            except IndexError:
+                break
             tensor.tofile(self.fout)
             self.write_padding(self.fout, tensor.nbytes)