diff --git a/aboutcode/hashid/__init__.py b/aboutcode/hashid/__init__.py index 18c6e4e6d..5ccba8e04 100644 --- a/aboutcode/hashid/__init__.py +++ b/aboutcode/hashid/__init__.py @@ -72,7 +72,7 @@ def get_vcid_yml_file_path(vcid: str): return Path(VULNERABILITY_REPO_NAME) / vulnerability_yml_path(vcid) -# This cuxstom 32 characters alphabet is designed to avoid visually easily confusable characters: +# This custom 32 characters alphabet is designed to avoid visually easily confusable characters: # i and l # 0 and o _base32_alphabet = b"abcdefghjkmnpqrstuvwxyz123456789" @@ -164,7 +164,7 @@ def get_package_vulnerabilities_yml_file_path(purl: Union[PackageURL, str]): # We use a 4-tier system for storing package metadata. # The tiers are as follows: -# 1. Super Large Ecosystem (~5M packages): 2^10 = 1,028 git repositories +# 1. Super Large Ecosystem (~5M packages): 2^10 = 1,024 git repositories # 2. Large Ecosystem (~500K packages): 2^7 = 128 git repositories # 3. Medium Ecosystem (~50K packages): 2^5 = 32 git repositories # 4. Small Ecosystem (~2K packages): 2^0 = 1 git repository @@ -188,6 +188,7 @@ def get_package_vulnerabilities_yml_file_path(purl: Union[PackageURL, str]): "composer": 5, "deb": 5, "docker": 5, + "gem": 5, "generic": 5, "huggingface": 5, "mlflow": 5, @@ -200,7 +201,6 @@ def get_package_vulnerabilities_yml_file_path(purl: Union[PackageURL, str]): "conda": 0, "cpan": 0, "cran": 0, - "gem": 0, "hackage": 0, "hex": 0, "luarocks": 0, diff --git a/aboutcode/hashid/tests/test_hashid.py b/aboutcode/hashid/tests/test_hashid.py new file mode 100644 index 000000000..f83944984 --- /dev/null +++ b/aboutcode/hashid/tests/test_hashid.py @@ -0,0 +1,54 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# Portions Copyright (c) The Python Software Foundation +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 and Python-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import pytest + +from aboutcode.hashid import package_path_elements + + +@pytest.mark.parametrize( + "purl, purl_hash", + [ + ("pkg:maven/org.apache.commons/io", "4f"), + ("pkg:GOLANG/google.golang.org/genproto@abcdedf#/googleapis/api/annotations/", "4a"), + ("pkg:golang/github.com/nats-io/nats-server/v2/server@v1.2.9", "22"), + ("pkg:bitbucket/birKenfeld/pyGments-main@244fd47e07d1014f0aed9c", "03"), + ("pkg:github/Package-url/purl-Spec@244fd47e07d1004f0aed9c", "095"), + ("pkg:deb/debian/curl@7.50.3-1?arch=i386&distro=jessie", "19"), + ( + "pkg:docker/customer/dockerimage@sha256:244fd47e07d1004f0aed9c?repository_url=gcr.io", + "10", + ), + ("pkg:gem/jruby-launcher@1.1.2?Platform=java", "1e"), + ( + "pkg:Maven/org.apache.xmlgraphics/batik-anim@1.9.1?repositorY_url=repo.spring.io/release&classifier=sources", + "28", + ), + ( + "pkg:Maven/org.apache.xmlgraphics/batik-anim@1.9.1?repositorY_url=repo.spring.io/release&extension=pom", + "28", + ), + ("pkg:Maven/net.sf.jacob-project/jacob@1.14.3?type=dll&classifier=x86", "17"), + ("pkg:npm/%40angular/animation@12.3.1", "323"), + ("pkg:Nuget/EnterpriseLibrary.Common@6.0.1304", "63"), + ("pkg:PYPI/Django_package@1.11.1.dev1", "00"), + ("pkg:composer/guzzlehttp/promises@2.0.2", "1d"), + ("pkg:Rpm/fedora/curl@7.50.3-1.fc25?Arch=i386&Distro=fedora-25", "16"), + ("pkg:maven/HTTPClient/HTTPClient@0.3-3", "4d"), + ("pkg:maven/mygroup/myartifact@1.0.0%20Final?mykey=my%20value", "6f"), + ("pkg:npm/@babel/core#/googleapis/api/annotations/", "0dc"), + ("pkg:npm/@babel/core@1.0.2#/googleapis/api/annotations/", "0dc"), + ("pkg:npm/core@1.0.2#/googleapis/api/annotations/", "23b"), + ("pkg:npm/core#/googleapis/api/annotations/", "23b"), + ], +) +def test_purl_hash(purl, purl_hash): + result_hash, *_ = package_path_elements(purl) + assert result_hash == purl_hash diff --git a/pyproject-aboutcode.hashid.toml b/pyproject-aboutcode.hashid.toml index 705b6015f..84c494ad8 100644 --- a/pyproject-aboutcode.hashid.toml +++ b/pyproject-aboutcode.hashid.toml @@ -66,6 +66,7 @@ excludes = [ "**/*.bak", "**/.ipynb_checkpoints", "aboutcode/hashid/python.LICENSE", + "aboutcode/hashid/tests/**/*", ] metadata_files = ["apache-2.0.LICENSE", "NOTICE", "aboutcode/hashid/python.LICENSE"]