Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pre-commit.ci] pre-commit autoupdate #1097

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ repos:
- --fuzzy-match-generates-todo

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.8.1
rev: v0.9.6
hooks:
- id: ruff
args: [--fix]
Expand Down
6 changes: 3 additions & 3 deletions src/distilabel/distiset.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,9 +509,9 @@ def load_from_disk(
)
dest_distiset_path = distiset_path

assert fs.isdir(
original_distiset_path
), "`distiset_path` must be a `PathLike` object pointing to a folder or a URI of a remote filesystem."
assert fs.isdir(original_distiset_path), (
"`distiset_path` must be a `PathLike` object pointing to a folder or a URI of a remote filesystem."
)

has_config = False
has_artifacts = False
Expand Down
2 changes: 1 addition & 1 deletion src/distilabel/steps/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def _infer_step_name(
base_name = "_".join(parts[:-1])
while name in step_names:
idx = int(name.split("_")[-1])
name = f"{base_name}_{idx+1}"
name = f"{base_name}_{idx + 1}"
return name


Expand Down
2 changes: 1 addition & 1 deletion src/distilabel/steps/tasks/math_shepherd/completer.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,7 @@ def _auto_label(
self._logger.info("Completer failed due to empty completion")
continue
if completion[-1] == golden_answers[instruction_i]:
label = f" { self.tags[0]}"
label = f" {self.tags[0]}"
# If we found one, it's enough as we are doing Hard Estimation
continue
# In case we had no solutions from the previous step, otherwise we would have
Expand Down
12 changes: 6 additions & 6 deletions src/distilabel/utils/mkdocs/components_gallery.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,9 +296,9 @@ def _generate_steps_pages(self, src_dir: Path, steps: list) -> List[str]:
docstring["icon"] = _STEPS_CATEGORY_TO_ICON.get(first_category, "")

if docstring["icon"]:
assert (
docstring["icon"] in _STEPS_CATEGORY_TO_ICON.values()
), f"Icon {docstring['icon']} not found in _STEPS_CATEGORY_TO_ICON"
assert docstring["icon"] in _STEPS_CATEGORY_TO_ICON.values(), (
f"Icon {docstring['icon']} not found in _STEPS_CATEGORY_TO_ICON"
)

name = step["name"]

Expand Down Expand Up @@ -364,9 +364,9 @@ def _generate_tasks_pages(self, src_dir: Path, tasks: list) -> List[str]:
first_category = docstring["categories"][0]
docstring["icon"] = _STEPS_CATEGORY_TO_ICON.get(first_category, "")
if docstring["icon"]:
assert (
docstring["icon"] in _STEPS_CATEGORY_TO_ICON.values()
), f"Icon {docstring['icon']} not found in _STEPS_CATEGORY_TO_ICON"
assert docstring["icon"] in _STEPS_CATEGORY_TO_ICON.values(), (
f"Icon {docstring['icon']} not found in _STEPS_CATEGORY_TO_ICON"
)

name = task["name"]

Expand Down
42 changes: 21 additions & 21 deletions tests/unit/models/embeddings/test_llamacpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,9 @@ def test_normalize_embeddings(self, test_inputs):
for result in results:
# Check if the embedding is normalized (L2 norm should be close to 1)
norm = np.linalg.norm(result)
assert np.isclose(
norm, 1.0, atol=1e-6
), f"Norm is {norm}, expected close to 1.0"
assert np.isclose(norm, 1.0, atol=1e-6), (
f"Norm is {norm}, expected close to 1.0"
)

def test_normalize_embeddings_false(self, test_inputs):
"""
Expand All @@ -129,15 +129,15 @@ def test_normalize_embeddings_false(self, test_inputs):
for result in results:
# Check if the embedding is not normalized (L2 norm should not be close to 1)
norm = np.linalg.norm(result)
assert not np.isclose(
norm, 1.0, atol=1e-6
), f"Norm is {norm}, expected not close to 1.0"
assert not np.isclose(norm, 1.0, atol=1e-6), (
f"Norm is {norm}, expected not close to 1.0"
)

# Additional check: ensure that at least one embedding has a norm significantly different from 1
norms = [np.linalg.norm(result) for result in results]
assert any(
not np.isclose(norm, 1.0, atol=0.1) for norm in norms
), "Expected at least one embedding with norm not close to 1.0"
assert any(not np.isclose(norm, 1.0, atol=0.1) for norm in norms), (
"Expected at least one embedding with norm not close to 1.0"
)

def test_encode_batch(self) -> None:
"""
Expand All @@ -149,20 +149,20 @@ def test_encode_batch(self) -> None:
inputs = [f"This is test sentence {i}" for i in range(batch_size)]
results = self.embeddings.encode(inputs=inputs)

assert (
len(results) == batch_size
), f"Expected {batch_size} results, got {len(results)}"
assert len(results) == batch_size, (
f"Expected {batch_size} results, got {len(results)}"
)
for result in results:
assert (
len(result) == 384
), f"Expected embedding dimension 384, got {len(result)}"
assert len(result) == 384, (
f"Expected embedding dimension 384, got {len(result)}"
)

# Test with a large batch to ensure it doesn't cause issues
large_batch = ["Large batch test" for _ in range(100)]
large_results = self.embeddings.encode(inputs=large_batch)
assert (
len(large_results) == 100
), f"Expected 100 results for large batch, got {len(large_results)}"
assert len(large_results) == 100, (
f"Expected 100 results for large batch, got {len(large_results)}"
)

def test_encode_batch_consistency(self) -> None:
"""
Expand All @@ -180,6 +180,6 @@ def test_encode_batch_consistency(self) -> None:
batch_result = self.embeddings.encode([input_text, "Another sentence"])[0]

# Compare the embeddings
assert np.allclose(
single_result, batch_result, atol=1e-5
), "Embeddings are not consistent between single and batch processing"
assert np.allclose(single_result, batch_result, atol=1e-5), (
"Embeddings are not consistent between single and batch processing"
)
Loading