Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

YAML Parser #3

Open
wants to merge 2 commits into
base: xsn/model_merge
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions examples/merge/linear.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
models:
- model: models/mistral-7b-instruct-v0.1.Q5_K_M.gguf
parameters:
weight: 0.7
- model: models/zephyr-7b-beta.Q5_K_M.gguf
parameters:
weight: 0.3
merge_method: slerp
33 changes: 33 additions & 0 deletions examples/merge/slices.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
slices:
- sources:
- layer_range: [0, 1]
model: models/mistral-7b-instruct-v0.1.Q5_K_M.gguf
weight: 0.6
- layer_range: [0, 1]
model: models/zephyr-7b-beta.Q5_K_M.gguf
weight: 0.4
merge_method: slerp
- sources:
- layer_range: [1, 16]
model: models/mistral-7b-instruct-v0.1.Q5_K_M.gguf
weight: 0.2
- layer_range: [3, 18]
model: models/zephyr-7b-beta.Q5_K_M.gguf
weight: 0.8
merge_method: linear
- sources:
- layer_range: [8, 24]
model: models/mistral-7b-instruct-v0.1.Q5_K_M.gguf
merge_method: copy
- sources:
- layer_range: [16, 30]
model: models/zephyr-7b-beta.Q5_K_M.gguf
merge_method: copy
- sources:
- layer_range: [31, 32]
model: models/mistral-7b-instruct-v0.1.Q5_K_M.gguf
weight: 0.1
- layer_range: [31, 32]
model: models/zephyr-7b-beta.Q5_K_M.gguf
weight: 0.9
merge_method: linear
321 changes: 321 additions & 0 deletions examples/merge/yamlconfig.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,321 @@
import yaml
from gguf.gguf_reader import GGUFReader
import argparse

argparser = argparse.ArgumentParser(description="Merge gguf files")
argparser.add_argument("config", help="The config file to use for merging")
args = argparser.parse_args()

configFile = args.config


def checkMergeMode(mergeInstructions):
"""
Check the Merge Mode, either 'slices' or 'models', which will be used to merge the gguf file
This is the first key in the merge instructions

Parameters
----------
mergeInstructions : dict
The merge instructions

Returns
-------
str
The merge type
"""
mergeMode = next(iter(mergeInstructions))
if mergeMode not in ["models", "slices"]:
raise ValueError(
'Merge instructions must contain either "models" or "slices" key'
)

return mergeMode


def getModelLength(model):
"""
Load the gguf file and determine the length of the model

Currently uses the "llama.block_count" field to determine the length of the model
This might need to be checked for other kinds of models?

Parameters
----------
model : str
The model name to load

Returns
-------
int
The length of the model
"""
reader = GGUFReader(model)
block_count_location = reader.fields["llama.block_count"].data[0]
block_count = reader.fields["llama.block_count"].parts[block_count_location][0]
return int(block_count)


def checkModelsLengths(models):
"""
Check that all models have the same length

Parameters
----------
models : list
A list of model names

Returns
-------
int
The length of the models if they all have the same length
"""
modelLengths = set([getModelLength(model) for model in models])
if len(modelLengths) > 1:
raise ValueError("All models must have the same length")

return modelLengths.pop()


def getModelWeights(mergeInstructions):
"""
Get the model names from the merge instructions

Parameters
----------
mergeInstructions : dict
The merge instructions

Returns
-------
dict
A dict of model names to their index
"""

modelWeights = [
source["weight"]
for item in mergeInstructions["slices"]
for source in item["sources"]
]
return modelWeights


def getModelNames(mergeInstructions):
"""
Get the model names from the merge instructions

Parameters
----------
mergeInstructions : dict
The merge instructions

Returns
-------
dict
A dict of model names to their index
"""

modelNames = set(
[
source["model"]
for item in mergeInstructions["slices"]
for source in item["sources"]
]
)
namesDict = {name: i for i, name in enumerate(modelNames)}
return namesDict


def mergeModels(mergeInstructions):
"""
Merge the models using the merge instructions

Parameters
----------
mergeInstructions : dict
The merge instructions

Returns
-------
None
"""

modelNames = [model["model"] for model in mergeInstructions["models"]]
modelWeights = [
str(model["parameters"]["weight"]) for model in mergeInstructions["models"]
]
numModels = len(modelNames)
modelLength = checkModelsLengths(modelNames)
with open("config.txt", "a") as config:
config.write("# GGUF Merge Config\n")
config.write(
"# This autogenerated config is used to merge the GGUF models with this command:\n"
)
config.write(
f"# gguf_merge -c config.txt {' '.join(modelNames)} -o OUTPUT.gguf\n\n"
)

for i in range(modelLength):
config.write(f"output layer {i}\n")
config.write(f"all {mergeInstructions['merge_method']} ")
config.write(f"{','.join([str(i)]*numModels)},")
config.write(f"{','.join(modelWeights)}\n")
config.write("\n")


def getSliceLength(souces):
"""
Get the length of the slice
Make sure all slices have the same length

Parameters
----------
souces : dict
The merge instructions

Returns
-------
list
A list of the layer ranges
"""
layerRanges = [source["layer_range"] for source in souces["sources"]]
rangeLengths = [layerRange[1] - layerRange[0] for layerRange in layerRanges]
sourceLength = set(rangeLengths)
if len(sourceLength) > 1:
raise ValueError("All sources must have the same length")
return layerRanges


def processFusion(config, layerIndex, slice, method, layerList):
"""
Process the fusion of the slices

Parameters
----------
config : file
The config file to write to
layerIndex : int
The index of the layer
slice : dict
The slice to process
method : str
The method to use for the fusion
layerList : list
The layer ranges

Returns
-------
None
"""

for i, j in zip(range(*layerList[0]), range(*layerList[1])):
config.write(f"output layer {layerIndex}\n")
config.write(f"all {method} ")
config.write(f"{i},{j},")
config.write(
",".join([str(source["weight"]) for source in slice["sources"]])
)
config.write("\n\n")
layerIndex += 1

return layerIndex


def processCopy(modelNames, config, layerIndex, slice, method, layerList):
"""
Process the copy of the slices

Parameters
----------
modelNames : list
The model names
config : file
The config file to write to
layerIndex : int
The index of the layer
slice : dict
The slice to process
method : str
The method to use for the fusion
layerList : list
The layer ranges

Returns
-------
None
"""

for i in range(*layerList[0]):
config.write(f"output layer {layerIndex}\n")
config.write(
f"all {method} {modelNames[slice['sources'][0]['model']]},{i}\n"
)
config.write("\n")
layerIndex += 1

return layerIndex


def mergeSlices(mergeInstructions):
"""
Merge the slices using the merge instructions

Parameters
----------
mergeInstructions : dict
The merge instructions

Returns
-------
None
"""
modelNames = getModelNames(mergeInstructions)

with open("config.txt", "a") as config:
config.write("# GGUF Merge Config\n")
config.write(
"# This autogenerated config is used to merge the GGUF models with this command:\n"
)
config.write(
f"# gguf_merge -c config.txt {' '.join(modelNames)} -o OUTPUT.gguf\n\n"
)

layerIndex = 0
for slice in mergeInstructions["slices"]:
method = slice["merge_method"]
layerList = getSliceLength(slice)
if method == "copy" and len(slice["sources"]) == 1:
layerIndex = processCopy(modelNames, config, layerIndex, slice, method, layerList)
elif method in ["linear", "slerp"] and len(slice["sources"]) > 1:
layerIndex = processFusion(config, layerIndex, slice, method, layerList)
else:
raise ValueError(
"Invalid merge method. Must be 'copy' or 'linear' or 'slerp', and the correct number of sources for the method."
)


def mergeGGUF(mergeInstructions):
"""
Merge the gguf file using the merge instructions

Parameters
----------
mergeInstructions : dict
The merge instructions

Returns
-------
None
"""
mergeMode = checkMergeMode(mergeInstructions)
if mergeMode == "models":
mergeModels(mergeInstructions)
else:
mergeSlices(mergeInstructions)


if __name__ == "__main__":
# load the merge instructions
with open(configFile, "r") as f:
mergeInstructions = yaml.load(f, Loader=yaml.FullLoader)
mergeGGUF(mergeInstructions)