diff --git a/gptqmodel/models/auto.py b/gptqmodel/models/auto.py index 638fd9ed..ecbb571c 100644 --- a/gptqmodel/models/auto.py +++ b/gptqmodel/models/auto.py @@ -76,7 +76,7 @@ from .definitions.opt import OPTGPTQ # noqa: E402 from .definitions.ovis import OvisGPTQ # noqa: E402 from .definitions.phi import PhiGPTQ # noqa: E402 -from .definitions.phi3 import Phi3GPTQ # noqa: E402 +from .definitions.phi3 import Phi3GPTQ, PhiMoEGPTQForCausalLM # noqa: E402 from .definitions.qwen import QwenGPTQ # noqa: E402 from .definitions.qwen2 import Qwen2GPTQ # noqa: E402 from .definitions.qwen2_moe import Qwen2MoeGPTQ # noqa: E402 @@ -124,6 +124,7 @@ "gemma2": Gemma2GPTQ, "phi": PhiGPTQ, "phi3": Phi3GPTQ, + "phimoe": PhiMoEGPTQForCausalLM, "mpt": MPTGPTQ, "minicpm": MiniCPMGPTQ, "minicpm3":MiniCPM3GPTQ, diff --git a/gptqmodel/models/definitions/phi3.py b/gptqmodel/models/definitions/phi3.py index 1e7091ff..b0818db5 100644 --- a/gptqmodel/models/definitions/phi3.py +++ b/gptqmodel/models/definitions/phi3.py @@ -29,6 +29,8 @@ class Phi3GPTQ(BaseGPTQModel): ] class PhiMoEGPTQForCausalLM(BaseGPTQModel): + require_pkgs_version = ["transformers<=4.44.2"] + layer_type = "PhiMoEDecoderLayer" layers_block_name = "model.layers" base_modules = ["model.embed_tokens", "model.norm"]