ngxson · dnhkng · Mar 9, 2024 · Mar 10, 2024
diff --git a/examples/merge/linear.yml b/examples/merge/linear.yml
@@ -0,0 +1,8 @@
+models:
+  - model: models/mistral-7b-instruct-v0.1.Q5_K_M.gguf
+    parameters:
+      weight: 0.7
+  - model: models/zephyr-7b-beta.Q5_K_M.gguf
+    parameters:
+      weight: 0.3
+merge_method: slerp
diff --git a/examples/merge/slices.yml b/examples/merge/slices.yml
@@ -0,0 +1,33 @@
+slices:
+- sources:
+  - layer_range: [0, 1]
+    model: models/mistral-7b-instruct-v0.1.Q5_K_M.gguf
+    weight: 0.6
+  - layer_range: [0, 1]
+    model: models/zephyr-7b-beta.Q5_K_M.gguf
+    weight: 0.4
+  merge_method: slerp
+- sources:
+  - layer_range: [1, 16]
+    model: models/mistral-7b-instruct-v0.1.Q5_K_M.gguf
+    weight: 0.2
+  - layer_range: [3, 18]
+    model: models/zephyr-7b-beta.Q5_K_M.gguf
+    weight: 0.8
+  merge_method: linear
+- sources:
+  - layer_range: [8, 24]
+    model: models/mistral-7b-instruct-v0.1.Q5_K_M.gguf
+  merge_method: copy
+- sources:
+  - layer_range: [16, 30]
+    model: models/zephyr-7b-beta.Q5_K_M.gguf
+  merge_method: copy
+- sources:
+  - layer_range: [31, 32]
+    model: models/mistral-7b-instruct-v0.1.Q5_K_M.gguf
+    weight: 0.1
+  - layer_range: [31, 32]
+    model: models/zephyr-7b-beta.Q5_K_M.gguf
+    weight: 0.9
+  merge_method: linear
diff --git a/examples/merge/yamlconfig.py b/examples/merge/yamlconfig.py
@@ -0,0 +1,321 @@
+import yaml
+from gguf.gguf_reader import GGUFReader
+import argparse
+
+argparser = argparse.ArgumentParser(description="Merge gguf files")
+argparser.add_argument("config", help="The config file to use for merging")
+args = argparser.parse_args()
+
+configFile = args.config
+
+
+def checkMergeMode(mergeInstructions):
+    """
+    Check the Merge Mode, either 'slices' or 'models', which will be used to merge the gguf file
+    This is the first key in the merge instructions
+
+    Parameters
+    ----------
+    mergeInstructions : dict
+        The merge instructions
+
+    Returns
+    -------
+    str
+        The merge type
+    """
+    mergeMode = next(iter(mergeInstructions))
+    if mergeMode not in ["models", "slices"]:
+        raise ValueError(
+            'Merge instructions must contain either "models" or "slices" key'
+        )
+
+    return mergeMode
+
+
+def getModelLength(model):
+    """
+    Load the gguf file and determine the length of the model
+
+    Currently uses the "llama.block_count" field to determine the length of the model
+    This might need to be checked for other kinds of models?
+
+    Parameters
+    ----------
+    model : str
+        The model name to load
+
+    Returns
+    -------
+    int
+        The length of the model
+    """
+    reader = GGUFReader(model)
+    block_count_location = reader.fields["llama.block_count"].data[0]
+    block_count = reader.fields["llama.block_count"].parts[block_count_location][0]
+    return int(block_count)
+
+
+def checkModelsLengths(models):
+    """
+    Check that all models have the same length
+
+    Parameters
+    ----------
+    models : list
+        A list of model names
+
+    Returns
+    -------
+    int
+        The length of the models if they all have the same length
+    """
+    modelLengths = set([getModelLength(model) for model in models])
+    if len(modelLengths) > 1:
+        raise ValueError("All models must have the same length")
+
+    return modelLengths.pop()
+
+
+def getModelWeights(mergeInstructions):
+    """
+    Get the model names from the merge instructions
+
+    Parameters
+    ----------
+    mergeInstructions : dict
+        The merge instructions
+
+    Returns
+    -------
+    dict
+        A dict of model names to their index
+    """
+
+    modelWeights = [
+        source["weight"]
+        for item in mergeInstructions["slices"]
+        for source in item["sources"]
+    ]
+    return modelWeights
+
+
+def getModelNames(mergeInstructions):
+    """
+    Get the model names from the merge instructions
+
+    Parameters
+    ----------
+    mergeInstructions : dict
+        The merge instructions
+
+    Returns
+    -------
+    dict
+        A dict of model names to their index
+    """
+
+    modelNames = set(
+        [
+            source["model"]
+            for item in mergeInstructions["slices"]
+            for source in item["sources"]
+        ]
+    )
+    namesDict = {name: i for i, name in enumerate(modelNames)}
+    return namesDict
+
+
+def mergeModels(mergeInstructions):
+    """
+    Merge the models using the merge instructions
+
+    Parameters
+    ----------
+    mergeInstructions : dict
+        The merge instructions
+
+    Returns
+    -------
+    None
+    """
+
+    modelNames = [model["model"] for model in mergeInstructions["models"]]
+    modelWeights = [
+        str(model["parameters"]["weight"]) for model in mergeInstructions["models"]
+    ]
+    numModels = len(modelNames)
+    modelLength = checkModelsLengths(modelNames)
+    with open("config.txt", "a") as config:
+        config.write("# GGUF Merge Config\n")
+        config.write(
+            "# This autogenerated config is used to merge the GGUF models with this command:\n"
+        )
+        config.write(
+            f"# gguf_merge -c config.txt {' '.join(modelNames)} -o OUTPUT.gguf\n\n"
+        )
+
+        for i in range(modelLength):
+            config.write(f"output layer {i}\n")
+            config.write(f"all {mergeInstructions['merge_method']} ")
+            config.write(f"{','.join([str(i)]*numModels)},")
+            config.write(f"{','.join(modelWeights)}\n")
+            config.write("\n")
+
+
+def getSliceLength(souces):
+    """
+    Get the length of the slice
+    Make sure all slices have the same length
+
+    Parameters
+    ----------
+    souces : dict
+        The merge instructions
+
+    Returns
+    -------
+    list
+        A list of the layer ranges
+    """
+    layerRanges = [source["layer_range"] for source in souces["sources"]]
+    rangeLengths = [layerRange[1] - layerRange[0] for layerRange in layerRanges]
+    sourceLength = set(rangeLengths)
+    if len(sourceLength) > 1:
+        raise ValueError("All sources must have the same length")
+    return layerRanges
+
+
+def processFusion(config, layerIndex, slice, method, layerList):
+    """
+    Process the fusion of the slices
+
+    Parameters
+    ----------
+    config : file
+        The config file to write to
+    layerIndex : int
+        The index of the layer
+    slice : dict
+        The slice to process
+    method : str
+        The method to use for the fusion
+    layerList : list
+        The layer ranges
+
+    Returns
+    -------
+    None
+    """ 
+
+    for i, j in zip(range(*layerList[0]), range(*layerList[1])):
+        config.write(f"output layer {layerIndex}\n")
+        config.write(f"all {method} ")
+        config.write(f"{i},{j},")
+        config.write(
+                        ",".join([str(source["weight"]) for source in slice["sources"]])
+                    )
+        config.write("\n\n")
+        layerIndex += 1
+
+    return layerIndex
+
+
+def processCopy(modelNames, config, layerIndex, slice, method, layerList):
+    """
+    Process the copy of the slices
+
+    Parameters
+    ----------
+    modelNames : list
+        The model names
+    config : file
+        The config file to write to
+    layerIndex : int
+        The index of the layer
+    slice : dict
+        The slice to process
+    method : str
+        The method to use for the fusion
+    layerList : list
+        The layer ranges
+
+    Returns
+    -------
+    None
+    """
+
+    for i in range(*layerList[0]):
+        config.write(f"output layer {layerIndex}\n")
+        config.write(
+                        f"all {method} {modelNames[slice['sources'][0]['model']]},{i}\n"
+                    )
+        config.write("\n")
+        layerIndex += 1
+
+    return layerIndex
+
+
+def mergeSlices(mergeInstructions):
+    """
+    Merge the slices using the merge instructions
+
+    Parameters
+    ----------
+    mergeInstructions : dict
+        The merge instructions
+
+    Returns
+    -------
+    None
+    """
+    modelNames = getModelNames(mergeInstructions)
+
+    with open("config.txt", "a") as config:
+        config.write("# GGUF Merge Config\n")
+        config.write(
+            "# This autogenerated config is used to merge the GGUF models with this command:\n"
+        )
+        config.write(
+            f"# gguf_merge -c config.txt {' '.join(modelNames)} -o OUTPUT.gguf\n\n"
+        )
+
+        layerIndex = 0
+        for slice in mergeInstructions["slices"]:
+            method = slice["merge_method"]
+            layerList = getSliceLength(slice)
+            if method == "copy" and len(slice["sources"]) == 1:
+                layerIndex = processCopy(modelNames, config, layerIndex, slice, method, layerList)
+            elif method in ["linear", "slerp"] and len(slice["sources"]) > 1:
+                layerIndex = processFusion(config, layerIndex, slice, method, layerList)
+            else:
+                raise ValueError(
+                    "Invalid merge method. Must be 'copy' or 'linear' or 'slerp', and the correct number of sources for the method."
+                )
+
+
+def mergeGGUF(mergeInstructions):
+    """
+    Merge the gguf file using the merge instructions
+
+    Parameters
+    ----------
+    mergeInstructions : dict
+        The merge instructions
+
+    Returns
+    -------
+    None
+    """
+    mergeMode = checkMergeMode(mergeInstructions)
+    if mergeMode == "models":
+        mergeModels(mergeInstructions)
+    else:
+        mergeSlices(mergeInstructions)
+
+
+if __name__ == "__main__":
+    # load the merge instructions
+    with open(configFile, "r") as f:
+        mergeInstructions = yaml.load(f, Loader=yaml.FullLoader)
+        mergeGGUF(mergeInstructions)