style

huggingface · Nov 28, 2024 · fae39eb · fae39eb
1 parent 5888fe0
commit fae39eb
Show file tree

Hide file tree

Showing 5 changed files with 46 additions and 31 deletions.
diff --git a/src/datasets/load.py b/src/datasets/load.py
@@ -242,9 +242,11 @@ def __reduce__(self):  # to make dynamically created class pickable, see _Initia
 def get_dataset_builder_class(
     dataset_module: "DatasetModule", dataset_name: Optional[str] = None
 ) -> Type[DatasetBuilder]:
-    with lock_importable_file(
-        dataset_module.importable_file_path
-    ) if dataset_module.importable_file_path else nullcontext():
+    with (
+        lock_importable_file(dataset_module.importable_file_path)
+        if dataset_module.importable_file_path
+        else nullcontext()
+    ):
         builder_cls = import_main_class(dataset_module.module_path)
     if dataset_module.builder_configs_parameters.builder_configs:
         dataset_name = dataset_name or dataset_module.builder_kwargs.get("dataset_name")

diff --git a/tests/test_arrow_dataset.py b/tests/test_arrow_dataset.py
@@ -2717,9 +2717,11 @@ def test_format_vectors(self, in_memory):
         import tensorflow as tf
         import torch
 
-        with tempfile.TemporaryDirectory() as tmp_dir, self._create_dummy_dataset(
-            in_memory, tmp_dir
-        ) as dset, dset.map(lambda ex, i: {"vec": np.ones(3) * i}, with_indices=True) as dset:
+        with (
+            tempfile.TemporaryDirectory() as tmp_dir,
+            self._create_dummy_dataset(in_memory, tmp_dir) as dset,
+            dset.map(lambda ex, i: {"vec": np.ones(3) * i}, with_indices=True) as dset,
+        ):
             columns = dset.column_names
 
             self.assertIsNotNone(dset[0])
@@ -2770,9 +2772,11 @@ def test_format_ragged_vectors(self, in_memory):
         import tensorflow as tf
         import torch
 
-        with tempfile.TemporaryDirectory() as tmp_dir, self._create_dummy_dataset(
-            in_memory, tmp_dir
-        ) as dset, dset.map(lambda ex, i: {"vec": np.ones(3 + i) * i}, with_indices=True) as dset:
+        with (
+            tempfile.TemporaryDirectory() as tmp_dir,
+            self._create_dummy_dataset(in_memory, tmp_dir) as dset,
+            dset.map(lambda ex, i: {"vec": np.ones(3 + i) * i}, with_indices=True) as dset,
+        ):
             columns = dset.column_names
 
             self.assertIsNotNone(dset[0])
@@ -2830,9 +2834,11 @@ def test_format_nested(self, in_memory):
         import tensorflow as tf
         import torch
 
-        with tempfile.TemporaryDirectory() as tmp_dir, self._create_dummy_dataset(
-            in_memory, tmp_dir
-        ) as dset, dset.map(lambda ex: {"nested": [{"foo": np.ones(3)}] * len(ex["filename"])}, batched=True) as dset:
+        with (
+            tempfile.TemporaryDirectory() as tmp_dir,
+            self._create_dummy_dataset(in_memory, tmp_dir) as dset,
+            dset.map(lambda ex: {"nested": [{"foo": np.ones(3)}] * len(ex["filename"])}, batched=True) as dset,
+        ):
             self.assertDictEqual(
                 dset.features, Features({"filename": Value("string"), "nested": {"foo": Sequence(Value("float64"))}})
             )
@@ -3224,11 +3230,11 @@ def test_concatenate_mixed_memory_and_disk(self):
         info1 = DatasetInfo(description="Dataset1")
         info2 = DatasetInfo(description="Dataset2")
         with tempfile.TemporaryDirectory() as tmp_dir:
-            with Dataset.from_dict(data1, info=info1).map(
-                cache_file_name=os.path.join(tmp_dir, "d1.arrow")
-            ) as dset1, Dataset.from_dict(data2, info=info2).map(
-                cache_file_name=os.path.join(tmp_dir, "d2.arrow")
-            ) as dset2, Dataset.from_dict(data3) as dset3:
+            with (
+                Dataset.from_dict(data1, info=info1).map(cache_file_name=os.path.join(tmp_dir, "d1.arrow")) as dset1,
+                Dataset.from_dict(data2, info=info2).map(cache_file_name=os.path.join(tmp_dir, "d2.arrow")) as dset2,
+                Dataset.from_dict(data3) as dset3,
+            ):
                 with concatenate_datasets([dset1, dset2, dset3]) as concatenated_dset:
                     self.assertEqual(len(concatenated_dset), len(dset1) + len(dset2) + len(dset3))
                     self.assertListEqual(concatenated_dset["id"], dset1["id"] + dset2["id"] + dset3["id"])
@@ -4130,9 +4136,10 @@ def test_dataset_to_json(dataset, tmp_path):
 )
 def test_pickle_dataset_after_transforming_the_table(in_memory, method_and_params, arrow_file):
     method, args, kwargs = method_and_params
-    with Dataset.from_file(arrow_file, in_memory=in_memory) as dataset, Dataset.from_file(
-        arrow_file, in_memory=in_memory
-    ) as reference_dataset:
+    with (
+        Dataset.from_file(arrow_file, in_memory=in_memory) as dataset,
+        Dataset.from_file(arrow_file, in_memory=in_memory) as reference_dataset,
+    ):
         out = getattr(dataset, method)(*args, **kwargs)
         dataset = out if out is not None else dataset
         pickled_dataset = pickle.dumps(dataset)

diff --git a/tests/test_py_utils.py b/tests/test_py_utils.py
@@ -116,9 +116,10 @@ class Foo:
     ],
 )
 def test_map_nested_num_proc(iterable_length, num_proc, expected_num_proc):
-    with patch("datasets.utils.py_utils._single_map_nested") as mock_single_map_nested, patch(
-        "datasets.parallel.parallel.Pool"
-    ) as mock_multiprocessing_pool:
+    with (
+        patch("datasets.utils.py_utils._single_map_nested") as mock_single_map_nested,
+        patch("datasets.parallel.parallel.Pool") as mock_multiprocessing_pool,
+    ):
         data_struct = {f"{i}": i for i in range(iterable_length)}
         _ = map_nested(lambda x: x + 10, data_struct, num_proc=num_proc, parallel_min_length=16)
         if expected_num_proc == 1:

diff --git a/tests/test_search.py b/tests/test_search.py
@@ -88,9 +88,11 @@ def test_add_elasticsearch_index(self):
         from elasticsearch import Elasticsearch
 
         dset: Dataset = self._create_dummy_dataset()
-        with patch("elasticsearch.Elasticsearch.search") as mocked_search, patch(
-            "elasticsearch.client.IndicesClient.create"
-        ) as mocked_index_create, patch("elasticsearch.helpers.streaming_bulk") as mocked_bulk:
+        with (
+            patch("elasticsearch.Elasticsearch.search") as mocked_search,
+            patch("elasticsearch.client.IndicesClient.create") as mocked_index_create,
+            patch("elasticsearch.helpers.streaming_bulk") as mocked_bulk,
+        ):
             mocked_index_create.return_value = {"acknowledged": True}
             mocked_bulk.return_value([(True, None)] * 30)
             mocked_search.return_value = {"hits": {"hits": [{"_score": 1, "_id": 29}]}}
@@ -198,9 +200,11 @@ class ElasticSearchIndexTest(TestCase):
     def test_elasticsearch(self):
         from elasticsearch import Elasticsearch
 
-        with patch("elasticsearch.Elasticsearch.search") as mocked_search, patch(
-            "elasticsearch.client.IndicesClient.create"
-        ) as mocked_index_create, patch("elasticsearch.helpers.streaming_bulk") as mocked_bulk:
+        with (
+            patch("elasticsearch.Elasticsearch.search") as mocked_search,
+            patch("elasticsearch.client.IndicesClient.create") as mocked_index_create,
+            patch("elasticsearch.helpers.streaming_bulk") as mocked_bulk,
+        ):
             es_client = Elasticsearch()
             mocked_index_create.return_value = {"acknowledged": True}
             index = ElasticSearchIndex(es_client=es_client)

diff --git a/tests/test_upstream_hub.py b/tests/test_upstream_hub.py
@@ -242,8 +242,9 @@ def test_push_dataset_dict_to_hub_with_multiple_commits(self, temporary_repo):
         with temporary_repo() as ds_name:
             self._api.create_repo(ds_name, token=self._token, repo_type="dataset")
             num_commits_before_push = len(self._api.list_repo_commits(ds_name, repo_type="dataset", token=self._token))
-            with patch("datasets.config.MAX_SHARD_SIZE", "16KB"), patch(
-                "datasets.config.UPLOADS_MAX_NUMBER_PER_COMMIT", 1
+            with (
+                patch("datasets.config.MAX_SHARD_SIZE", "16KB"),
+                patch("datasets.config.UPLOADS_MAX_NUMBER_PER_COMMIT", 1),
             ):
                 local_ds.push_to_hub(ds_name, token=self._token)
             hub_ds = load_dataset(ds_name, download_mode="force_redownload")