triton backend requires< 3.0 (#348)

intel · Nov 27, 2024 · 5c4a908 · 5c4a908
1 parent f0546d3
commit 5c4a908
Show file tree

Hide file tree

Showing 4 changed files with 5 additions and 6 deletions.
diff --git a/auto_round/backend.py b/auto_round/backend.py
@@ -98,6 +98,7 @@ def feature_num_greater_checker(in_feature, out_feature, num):
                                                   packing_format="triton",
                                                   bits=[2, 4, 8], group_size=None,
                                                   priority=0, feature_checks=[feature_multiply_checker_32],
+                                                  requirements=["triton<3.0,>=2.0"]
                                                   )
 
 BackendInfos['gptq:exllamav2'] = BackendInfo(device=["cuda"], sym=[True, False],
@@ -116,13 +117,13 @@ def feature_num_greater_checker(in_feature, out_feature, num):
                                             priority=0, feature_checks=[feature_multiply_checker_32],
                                             alias=["auto_round:gptq:tritonv2", "auto_round:auto_gptq:tritonv2",
                                                    "auto_gptq:tritonv2"],
-                                            requirements=["auto-gptq>=0.7.1"]
+                                            requirements=["auto-gptq>=0.7.1","triton<3.0,>=2.0"]
                                             )
 
 BackendInfos['gptq:cuda'] = BackendInfo(device=["cuda"], sym=[True, False],
                                             packing_format="triton_zp+-1",
                                             bits=[2, 3, 4, 8], group_size=None,
-                                            priority=0, feature_checks=[feature_multiply_checker_32],
+                                            priority=1, feature_checks=[feature_multiply_checker_32],
                                             alias=["auto_round:auto_gptq:cuda,auto_gptq:cuda, auto_round:gptq:cuda"],
                                             convertable_format=["triton_zp+-1"],
                                             requirements=["auto-gptq>=0.7.1"]

diff --git a/auto_round/mllm/autoround_mllm.py b/auto_round/mllm/autoround_mllm.py
@@ -172,15 +172,15 @@ def __init__(
                         f"rest batch_size({batch_size}) to 1 and "
                         f"gradient_accumulate_steps({gradient_accumulate_steps}) "
                         f"to {batch_size * gradient_accumulate_steps}, "
-                        f"cause batch_size={batch_size} cannot be used for {dataset}")
+                        f"because batch_size={batch_size} cannot be used for {dataset}")
                     gradient_accumulate_steps = batch_size * gradient_accumulate_steps
                     batch_size = 1
         if quant_nontext_module and batch_size != 1:
             logger.warning(
                 f"rest batch_size({batch_size}) to 1 and "
                 f"gradient_accumulate_steps({gradient_accumulate_steps}) "
                 f"to {batch_size * gradient_accumulate_steps}, "
-                f"cause batch_size={batch_size} cannot be used for calibrating non-text modules.")
+                f"because batch_size={batch_size} cannot be used for calibrating non-text modules.")
             gradient_accumulate_steps = batch_size * gradient_accumulate_steps
             batch_size = 1
         seqlen = 2048 if seqlen is None else seqlen

diff --git a/requirements-cpu.txt b/requirements-cpu.txt
@@ -4,7 +4,6 @@ py-cpuinfo
 sentencepiece
 torch
 transformers>=4.38
-triton
 numpy < 2.0
 threadpoolctl
 lm-eval>=0.4.2,<0.5

diff --git a/requirements.txt b/requirements.txt
@@ -4,7 +4,6 @@ py-cpuinfo
 sentencepiece
 torch
 transformers>=4.38
-triton
 numpy < 2.0
 threadpoolctl
 lm-eval>=0.4.2,<0.5