argilla-io · pre-commit-ci · Feb 11, 2025 · Feb 11, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -11,7 +11,7 @@ repos:
           - --fuzzy-match-generates-todo
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.8.1
+    rev: v0.9.6
     hooks:
       - id: ruff
         args: [--fix]

diff --git a/src/distilabel/distiset.py b/src/distilabel/distiset.py
@@ -509,9 +509,9 @@ def load_from_disk(
         )
         dest_distiset_path = distiset_path
 
-        assert fs.isdir(
-            original_distiset_path
-        ), "`distiset_path` must be a `PathLike` object pointing to a folder or a URI of a remote filesystem."
+        assert fs.isdir(original_distiset_path), (
+            "`distiset_path` must be a `PathLike` object pointing to a folder or a URI of a remote filesystem."
+        )
 
         has_config = False
         has_artifacts = False

diff --git a/src/distilabel/steps/base.py b/src/distilabel/steps/base.py
@@ -101,7 +101,7 @@ def _infer_step_name(
         base_name = "_".join(parts[:-1])
         while name in step_names:
             idx = int(name.split("_")[-1])
-            name = f"{base_name}_{idx+1}"
+            name = f"{base_name}_{idx + 1}"
     return name
 
 

diff --git a/src/distilabel/steps/tasks/math_shepherd/completer.py b/src/distilabel/steps/tasks/math_shepherd/completer.py
@@ -485,7 +485,7 @@ def _auto_label(
                     self._logger.info("Completer failed due to empty completion")
                     continue
                 if completion[-1] == golden_answers[instruction_i]:
-                    label = f" { self.tags[0]}"
+                    label = f" {self.tags[0]}"
                     # If we found one, it's enough as we are doing Hard Estimation
                     continue
             # In case we had no solutions from the previous step, otherwise we would have

diff --git a/src/distilabel/utils/mkdocs/components_gallery.py b/src/distilabel/utils/mkdocs/components_gallery.py
@@ -296,9 +296,9 @@ def _generate_steps_pages(self, src_dir: Path, steps: list) -> List[str]:
                 docstring["icon"] = _STEPS_CATEGORY_TO_ICON.get(first_category, "")
 
             if docstring["icon"]:
-                assert (
-                    docstring["icon"] in _STEPS_CATEGORY_TO_ICON.values()
-                ), f"Icon {docstring['icon']} not found in _STEPS_CATEGORY_TO_ICON"
+                assert docstring["icon"] in _STEPS_CATEGORY_TO_ICON.values(), (
+                    f"Icon {docstring['icon']} not found in _STEPS_CATEGORY_TO_ICON"
+                )
 
             name = step["name"]
 
@@ -364,9 +364,9 @@ def _generate_tasks_pages(self, src_dir: Path, tasks: list) -> List[str]:
                 first_category = docstring["categories"][0]
                 docstring["icon"] = _STEPS_CATEGORY_TO_ICON.get(first_category, "")
             if docstring["icon"]:
-                assert (
-                    docstring["icon"] in _STEPS_CATEGORY_TO_ICON.values()
-                ), f"Icon {docstring['icon']} not found in _STEPS_CATEGORY_TO_ICON"
+                assert docstring["icon"] in _STEPS_CATEGORY_TO_ICON.values(), (
+                    f"Icon {docstring['icon']} not found in _STEPS_CATEGORY_TO_ICON"
+                )
 
             name = task["name"]
 

diff --git a/tests/unit/models/embeddings/test_llamacpp.py b/tests/unit/models/embeddings/test_llamacpp.py
@@ -115,9 +115,9 @@ def test_normalize_embeddings(self, test_inputs):
         for result in results:
             # Check if the embedding is normalized (L2 norm should be close to 1)
             norm = np.linalg.norm(result)
-            assert np.isclose(
-                norm, 1.0, atol=1e-6
-            ), f"Norm is {norm}, expected close to 1.0"
+            assert np.isclose(norm, 1.0, atol=1e-6), (
+                f"Norm is {norm}, expected close to 1.0"
+            )
 
     def test_normalize_embeddings_false(self, test_inputs):
         """
@@ -129,15 +129,15 @@ def test_normalize_embeddings_false(self, test_inputs):
         for result in results:
             # Check if the embedding is not normalized (L2 norm should not be close to 1)
             norm = np.linalg.norm(result)
-            assert not np.isclose(
-                norm, 1.0, atol=1e-6
-            ), f"Norm is {norm}, expected not close to 1.0"
+            assert not np.isclose(norm, 1.0, atol=1e-6), (
+                f"Norm is {norm}, expected not close to 1.0"
+            )
 
         # Additional check: ensure that at least one embedding has a norm significantly different from 1
         norms = [np.linalg.norm(result) for result in results]
-        assert any(
-            not np.isclose(norm, 1.0, atol=0.1) for norm in norms
-        ), "Expected at least one embedding with norm not close to 1.0"
+        assert any(not np.isclose(norm, 1.0, atol=0.1) for norm in norms), (
+            "Expected at least one embedding with norm not close to 1.0"
+        )
 
     def test_encode_batch(self) -> None:
         """
@@ -149,20 +149,20 @@ def test_encode_batch(self) -> None:
             inputs = [f"This is test sentence {i}" for i in range(batch_size)]
             results = self.embeddings.encode(inputs=inputs)
 
-            assert (
-                len(results) == batch_size
-            ), f"Expected {batch_size} results, got {len(results)}"
+            assert len(results) == batch_size, (
+                f"Expected {batch_size} results, got {len(results)}"
+            )
             for result in results:
-                assert (
-                    len(result) == 384
-                ), f"Expected embedding dimension 384, got {len(result)}"
+                assert len(result) == 384, (
+                    f"Expected embedding dimension 384, got {len(result)}"
+                )
 
         # Test with a large batch to ensure it doesn't cause issues
         large_batch = ["Large batch test" for _ in range(100)]
         large_results = self.embeddings.encode(inputs=large_batch)
-        assert (
-            len(large_results) == 100
-        ), f"Expected 100 results for large batch, got {len(large_results)}"
+        assert len(large_results) == 100, (
+            f"Expected 100 results for large batch, got {len(large_results)}"
+        )
 
     def test_encode_batch_consistency(self) -> None:
         """
@@ -180,6 +180,6 @@ def test_encode_batch_consistency(self) -> None:
         batch_result = self.embeddings.encode([input_text, "Another sentence"])[0]
 
         # Compare the embeddings
-        assert np.allclose(
-            single_result, batch_result, atol=1e-5
-        ), "Embeddings are not consistent between single and batch processing"
+        assert np.allclose(single_result, batch_result, atol=1e-5), (
+            "Embeddings are not consistent between single and batch processing"
+        )