diff --git a/src/tagpack/cli.py b/src/tagpack/cli.py index 420d3e5..9f6d352 100644 --- a/src/tagpack/cli.py +++ b/src/tagpack/cli.py @@ -192,7 +192,7 @@ def low_quality_addresses(args): if args.cluster: print("\nSets of tags appearing in several addresses:") s_int = sorted(intersections, key=lambda x: x[1], reverse=True) - for (k, v) in s_int: + for k, v in s_int: if v > 1: print(f"\t{v}: {', '.join(k)}") else: diff --git a/src/tagpack/graphsense.py b/src/tagpack/graphsense.py index d50ffc7..b9dc391 100644 --- a/src/tagpack/graphsense.py +++ b/src/tagpack/graphsense.py @@ -46,7 +46,7 @@ def _execute_query(self, statement, parameters): i = 0 all_results = [] - for (success, result) in results: + for success, result in results: if not success: print("failed" + result) else: @@ -59,7 +59,6 @@ def contains_keyspace_mapping(self, currency: str) -> bool: return currency in self.ks_map def _check_passed_params(self, df: DataFrame, currency: str, req_column: str): - if df.empty: raise Exception(f"Received empty dataframe for currency {currency}") if req_column not in df.columns: diff --git a/src/tagpack/tagpack.py b/src/tagpack/tagpack.py index 9ac84b1..76c1519 100644 --- a/src/tagpack/tagpack.py +++ b/src/tagpack/tagpack.py @@ -88,7 +88,7 @@ def get_uri_for_tagpack(repo_path, tagpack_file, strict_check, no_git): return res, rel_path, default_prefix -def collect_tagpack_files(path, search_actorpacks=False): +def collect_tagpack_files(path, search_actorpacks=False, max_mb=200): """ Collect Tagpack YAML files from the given path. This function returns a dict made of sets. Each key of the dict is the corresponding header path of @@ -144,6 +144,19 @@ def collect_tagpack_files(path, search_actorpacks=False): tagpack_files = {k: v for k, v in tagpack_files.items() if v} + # exclude files that are too large + max_bytes = max_mb * 1048576 + for _, files in tagpack_files.items(): + for f in files.copy(): + if os.stat(f).st_size > max_bytes: + print_warn( + f"{f} is too large and will be not be processed: " + f"{(os.stat(f).st_size / 1048576):.2f} mb, current " + f"max file size is {max_mb} mb. " + "Please split the file to be processed." + ) + files.remove(f) + return tagpack_files @@ -397,7 +410,6 @@ def get_user_choice_cached(hl, hl_context_str, cache): if hl in cache: return cache[hl] else: - candidates = find_actor_candidates(hl) if len(candidates) == 0: choice = None diff --git a/src/tagpack/tagstore.py b/src/tagpack/tagstore.py index 3f0fdae..8eb4409 100644 --- a/src/tagpack/tagstore.py +++ b/src/tagpack/tagstore.py @@ -76,7 +76,6 @@ def insert_taxonomy(self, taxonomy): "(%(label)s,%(taxonomy)s,%(source)s,%(description)s);" ) for c in taxonomy.concepts: - v = { "id": c.id, "label": c.label, @@ -88,7 +87,6 @@ def insert_taxonomy(self, taxonomy): @auto_commit def insert_confidence_scores(self, confidence): - statement = ( "INSERT INTO confidence (id, label, description, level)" " VALUES (%(id)s,%(label)s,%(description)s,%(level)s)" @@ -120,7 +118,6 @@ def create_id(self, prefix, rel_path): def insert_tagpack( self, tagpack, is_public, force_insert, prefix, rel_path, batch=1000 ): - tagpack_id = self.create_id(prefix, rel_path) h = _get_header(tagpack, tagpack_id) diff --git a/src/tagpack/utils.py b/src/tagpack/utils.py index 0e8e50f..0cf5af4 100644 --- a/src/tagpack/utils.py +++ b/src/tagpack/utils.py @@ -93,7 +93,6 @@ def get_github_repo_url(github_url): def open_localfile_with_pkgresource_fallback(path): - if os.path.isfile(path): return open(path, "r") else: diff --git a/tests/test_actorpack_schema.py b/tests/test_actorpack_schema.py index 502b66c..480a31b 100644 --- a/tests/test_actorpack_schema.py +++ b/tests/test_actorpack_schema.py @@ -115,7 +115,7 @@ def test_field_no_taxonomy(schema): def test_check_type(schema): for field, value in field_values.items(): assert schema.check_type(field, value) - with (pytest.raises(ValidationError)) as e: + with pytest.raises(ValidationError) as e: assert schema.check_type(field, 5) msg = f"Field {field} must be of type {field_types[field]}" assert msg in str(e.value) @@ -123,21 +123,21 @@ def test_check_type(schema): def test_check_taxonomies(schema, taxonomies): schema.schema["actor"]["test"] = {"taxonomy": "nonexistent"} - with (pytest.raises(ValidationError)) as e: + with pytest.raises(ValidationError) as e: assert schema.check_taxonomies("test", "invalid", None) assert "No taxonomies loaded" in str(e.value) schema.schema["actor"]["invalidtax"] = {"taxonomy": "nonexistent"} - with (pytest.raises(ValidationError)) as e: + with pytest.raises(ValidationError) as e: assert schema.check_taxonomies("invalidtax", "value", taxonomies) assert "Unknown taxonomy in" in str(e.value) assert schema.check_taxonomies("categories", "exchange", taxonomies) - with (pytest.raises(ValidationError)) as e: + with pytest.raises(ValidationError) as e: assert schema.check_taxonomies("categories", "test", taxonomies) assert "Undefined concept test for categories field" in str(e.value) assert schema.check_taxonomies("jurisdictions", "MX", taxonomies) - with (pytest.raises(ValidationError)) as e: + with pytest.raises(ValidationError) as e: assert schema.check_taxonomies("jurisdictions", "test", taxonomies) assert "Undefined concept test for jurisdictions field" in str(e.value) diff --git a/tests/test_tagpack_schema.py b/tests/test_tagpack_schema.py index 0b0ffe0..cb90100 100644 --- a/tests/test_tagpack_schema.py +++ b/tests/test_tagpack_schema.py @@ -110,33 +110,33 @@ def test_field_no_taxonomy(schema): def test_check_type(schema): assert schema.check_type("title", "some test string") - with (pytest.raises(ValidationError)) as e: + with pytest.raises(ValidationError) as e: assert schema.check_type("title", 5) assert "Field title must be of type text" in str(e.value) assert schema.check_type("lastmod", date.fromisoformat("2021-04-21")) - with (pytest.raises(ValidationError)) as e: + with pytest.raises(ValidationError) as e: assert schema.check_type("lastmod", 5) assert "Field lastmod must be of type datetime" in str(e.value) assert schema.check_type("address", "string") - with (pytest.raises(ValidationError)) as e: + with pytest.raises(ValidationError) as e: assert schema.check_type("address", 0x2342) assert "Field address must be of type text" in str(e.value) assert schema.check_type("tags", [{"a": 1}, {"b": 2}]) - with (pytest.raises(ValidationError)) as e: + with pytest.raises(ValidationError) as e: assert schema.check_type("tags", "56abc") assert "Field tags must be of type list" in str(e.value) def test_check_taxonomies(schema, taxonomies): assert schema.check_taxonomies("category", "exchange", taxonomies) - with (pytest.raises(ValidationError)) as e: + with pytest.raises(ValidationError) as e: assert schema.check_taxonomies("category", "test", taxonomies) assert "Undefined concept test in field category" in str(e.value) schema.schema["tag"]["dummy"] = {"taxonomy": "test"} - with (pytest.raises(ValidationError)) as e: + with pytest.raises(ValidationError) as e: assert schema.check_taxonomies("dummy", "test", taxonomies) assert "Unknown taxonomy test" in str(e.value)