Skip to content

Commit

Permalink
Add gitlab importer
Browse files Browse the repository at this point in the history
And use NginxVersion instead of SemverVersion in Nginx importer and improver

Signed-off-by: Tushar Goel <[email protected]>
  • Loading branch information
TG1999 committed May 13, 2022
1 parent 75b2920 commit 8cc0045
Show file tree
Hide file tree
Showing 38 changed files with 1,614 additions and 140 deletions.
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,10 @@ toml==0.10.2
tomli==2.0.1
traitlets==5.1.1
typing_extensions==4.1.1
univers==30.4.0
univers==30.7.0
urllib3==1.26.9
wcwidth==0.2.5
websocket-client==0.59.0
yarl==1.7.2
zipp==3.8.0
fetchcode==0.1.0
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ install_requires =
GitPython>=3.1.17
aiohttp>=3.7.4.post0
requests>=2.25.1
fetchcode>=0.1.0

[options.extras_require]
dev =
Expand Down
96 changes: 87 additions & 9 deletions vulnerabilities/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import urllib3
from packageurl import PackageURL
from univers.version_range import RANGE_CLASS_BY_SCHEMES
from univers.version_range import VersionRange

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -204,26 +205,25 @@ def __get__(self, owner_self, owner_cls):
return self.fget(owner_cls)


def get_item(object: dict, *attributes):
def get_item(dictionary: dict, *attributes):
"""
Return `item` by going through all the `attributes` present in the `json_object`
Return `item` by going through all the `attributes` present in the `dictionary`
Do a DFS for the `item` in the `json_object` by traversing the `attributes`
Do a DFS for the `item` in the `dictionary` by traversing the `attributes`
and return None if can not traverse through the `attributes`
For example:
>>> get_item({'a': {'b': {'c': 'd'}}}, 'a', 'b', 'c')
'd'
>>> assert(get_item({'a': {'b': {'c': 'd'}}}, 'a', 'b', 'e')) == None
"""
if not object:
if not dictionary:
return
item = object
for attribute in attributes:
if attribute not in item:
logger.error(f"Missing attribute {attribute} in {item}")
if attribute not in dictionary:
logger.error(f"Missing attribute {attribute} in {dictionary}")
return None
item = item[attribute]
return item
dictionary = dictionary[attribute]
return dictionary


class GitHubTokenError(Exception):
Expand Down Expand Up @@ -267,3 +267,81 @@ def _get_gh_response(gh_token, graphql_query):
endpoint = "https://api.github.com/graphql"
headers = {"Authorization": f"bearer {gh_token}"}
return requests.post(endpoint, headers=headers, json=graphql_query).json()


def get_reference_id(url: str):
"""
Return the reference id from a URL
For example:
>>> get_reference_id("https://github.com/advisories/GHSA-c9hw-wf7x-jp9j")
'GHSA-c9hw-wf7x-jp9j'
"""
url_parts = url.split("/")
last_url_part = url_parts[-1]
return last_url_part


def extract_references(urls: List[str]):

# To avoid circular import
from vulnerabilities.importer import Reference

"""
Yield `reference` by iterating over `reference_data`
>>> list(extract_references(['url': "https://github.com/advisories/GHSA-c9hw-wf7x-jp9j"]))
[Reference(url="https://github.com/advisories/GHSA-c9hw-wf7x-jp9j"), reference_id = "GHSA-c9hw-wf7x-jp9j" ]
>>> list(extract_references(['url': "https://github.com/advisories/c9hw-wf7x-jp9j"]))
[Reference(url="https://github.com/advisories/c9hw-wf7x-jp9j")]
"""
for url in urls:
if not isinstance(url, str):
logger.error(f"extract_references: url is not of type `str`: {url}")
continue
reference_id = get_reference_id(url)
if "GHSA-" in reference_id.upper():
reference = Reference(url=url, reference_id=get_reference_id(url))
elif is_cve(reference_id):
reference = Reference(url=url, reference_id=reference_id.upper())
else:
reference = Reference(url=url)
yield reference


def resolve_version_range(
affected_version_range: VersionRange,
package_versions: List[str],
ignorable_versions: List[str],
) -> Tuple[List[str], List[str]]:
"""
Given an affected version range and a list of `package_versions`, resolve
which versions are in this range and return a tuple of two lists of
`affected_versions` and `unaffected_versions`.
"""
if not affected_version_range:
logger.error(f"affected version range is {affected_version_range!r}")
return [], []
affected_versions = []
unaffected_versions = []
for package_version in package_versions or []:
if package_version in ignorable_versions:
continue
# Remove whitespace
package_version = package_version.replace(" ", "")
# Remove leading 'v'
package_version = package_version.lstrip("vV")
try:
version = affected_version_range.version_class(package_version)
except Exception:
logger.error(f"Could not parse version {package_version!r}")
continue
try:
if version in affected_version_range:
affected_versions.append(package_version)
else:
unaffected_versions.append(package_version)
except Exception:
logger.error(
f"Invalid version range constraints {affected_version_range.constraints!r}"
)
continue
return affected_versions, unaffected_versions
2 changes: 2 additions & 0 deletions vulnerabilities/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# Visit https://github.com/nexB/vulnerablecode/ for support and download.
from vulnerabilities.importers import alpine_linux
from vulnerabilities.importers import github
from vulnerabilities.importers import gitlab
from vulnerabilities.importers import nginx
from vulnerabilities.importers import nvd
from vulnerabilities.importers import openssl
Expand All @@ -31,6 +32,7 @@
github.GitHubAPIImporter,
nvd.NVDImporter,
openssl.OpensslImporter,
gitlab.GitLabAPIImporter,
]

IMPORTERS_REGISTRY = {x.qualified_name: x for x in IMPORTERS_REGISTRY}
107 changes: 13 additions & 94 deletions vulnerabilities/importers/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,11 @@
from vulnerabilities import helpers
from vulnerabilities import severity_systems
from vulnerabilities.helpers import AffectedPackage as LegacyAffectedPackage
from vulnerabilities.helpers import extract_references
from vulnerabilities.helpers import get_item
from vulnerabilities.helpers import get_reference_id
from vulnerabilities.helpers import nearest_patched_package
from vulnerabilities.helpers import resolve_version_range
from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import AffectedPackage
from vulnerabilities.importer import Importer
Expand All @@ -48,13 +51,16 @@
from vulnerabilities.improver import Improver
from vulnerabilities.improver import Inference
from vulnerabilities.models import Advisory
from vulnerabilities.package_managers import VERSION_API_CLASSES_BY_PACKAGE_TYPE
from vulnerabilities.package_managers import ComposerVersionAPI
from vulnerabilities.package_managers import GoproxyVersionAPI
from vulnerabilities.package_managers import MavenVersionAPI
from vulnerabilities.package_managers import NpmVersionAPI
from vulnerabilities.package_managers import NugetVersionAPI
from vulnerabilities.package_managers import PypiVersionAPI
from vulnerabilities.package_managers import RubyVersionAPI
from vulnerabilities.package_managers import VersionAPI
from vulnerabilities.package_managers import get_api_package_name

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -170,17 +176,6 @@
}
"""

VERSION_API_CLASSES = [
MavenVersionAPI,
NugetVersionAPI,
ComposerVersionAPI,
PypiVersionAPI,
RubyVersionAPI,
GoproxyVersionAPI,
]

VERSION_API_CLASSES_BY_PACKAGE_TYPE = {cls.package_type: cls for cls in VERSION_API_CLASSES}


class GitHubAPIImporter(Importer):
spdx_license_expression = "CC-BY-4.0"
Expand All @@ -204,36 +199,12 @@ def advisory_data(self) -> Iterable[AdvisoryData]:
break


def get_reference_id(url: str):
def extract_urls(references: List[Reference]):
"""
Return the reference id from a URL
For example:
>>> get_reference_id("https://github.com/advisories/GHSA-c9hw-wf7x-jp9j")
'GHSA-c9hw-wf7x-jp9j'
Yield `url` by iterating over `references`
"""
url_parts = url.split("/")
last_url_part = url_parts[-1]
return last_url_part


def extract_references(reference_data: List[dict]) -> Iterable[Reference]:
"""
Yield `reference` by iterating over `reference_data`
>>> list(extract_references([{'url': "https://github.com/advisories/GHSA-c9hw-wf7x-jp9j"}]))
[Reference(url="https://github.com/advisories/GHSA-c9hw-wf7x-jp9j"), reference_id = "GHSA-c9hw-wf7x-jp9j" ]
>>> list(extract_references([{'url': "https://github.com/advisories/c9hw-wf7x-jp9j"}]))
[Reference(url="https://github.com/advisories/c9hw-wf7x-jp9j")]
"""
for ref in reference_data:
url = ref["url"]
if not isinstance(url, str):
logger.error(f"extract_references: url is not of type `str`: {url}")
continue
if "GHSA-" in url.upper():
reference = Reference(url=url, reference_id=get_reference_id(url))
else:
reference = Reference(url=url)
yield reference
for ref in references:
yield ref["url"]


def get_purl(pkg_type: str, github_name: str) -> Optional[PackageURL]:
Expand Down Expand Up @@ -271,26 +242,6 @@ class InvalidVersionRange(Exception):
"""


def get_api_package_name(purl: PackageURL) -> str:
"""
Return the package name expected by the GitHub API given a PackageURL
>>> get_api_package_name(PackageURL(type="maven", namespace="org.apache.commons", name="commons-lang3"))
"org.apache.commons:commons-lang3"
>>> get_api_package_name(PackageURL(type="composer", namespace="foo", name="bar"))
"foo/bar"
"""
if purl.type == "maven":
return f"{purl.namespace}:{purl.name}"

if purl.type == "composer":
return f"{purl.namespace}/{purl.name}"

if purl.type in ("nuget", "pypi", "gem", "golang"):
return purl.name

logger.error(f"get_api_package_name: Unknown PURL {purl!r}")


def process_response(resp: dict, package_type: str) -> Iterable[AdvisoryData]:
"""
Yield `AdvisoryData` by taking `resp` and `ecosystem` as input
Expand Down Expand Up @@ -348,7 +299,8 @@ def process_response(resp: dict, package_type: str) -> Iterable[AdvisoryData]:

references = get_item(advisory, "references") or []
if references:
references: List[Reference] = list(extract_references(references))
urls: List[str] = list(extract_urls(references))
references: List[Reference] = list(extract_references(urls))

summary = get_item(advisory, "summary")
identifiers = get_item(advisory, "identifiers") or []
Expand Down Expand Up @@ -450,6 +402,7 @@ def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]:
aff_vers, unaff_vers = resolve_version_range(
affected_version_range=affected_version_range,
package_versions=valid_versions,
ignorable_versions=WEIRD_IGNORABLE_VERSIONS,
)
affected_purls = [
PackageURL(type=pkg_type, namespace=pkg_namespace, name=pkg_name, version=version)
Expand Down Expand Up @@ -483,37 +436,3 @@ def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]:
affected_purls=affected_packages,
fixed_purl=fixed_package,
)


def resolve_version_range(
affected_version_range: VersionRange,
package_versions: List[str],
ignorable_versions=WEIRD_IGNORABLE_VERSIONS,
) -> Tuple[List[str], List[str]]:
"""
Given an affected version range and a list of `package_versions`, resolve
which versions are in this range and return a tuple of two lists of
`affected_versions` and `unaffected_versions`.
"""
if not affected_version_range:
logger.error(f"affected version range is {affected_version_range!r}")
return [], []
affected_versions = []
unaffected_versions = []
for package_version in package_versions or []:
if package_version in ignorable_versions:
continue
# Remove whitespace
package_version = package_version.replace(" ", "")
# Remove leading 'v'
package_version = package_version.lstrip("vV")
try:
version = affected_version_range.version_class(package_version)
except Exception:
logger.error(f"Could not parse version {package_version!r}")
continue
if version in affected_version_range:
affected_versions.append(package_version)
else:
unaffected_versions.append(package_version)
return affected_versions, unaffected_versions
Loading

0 comments on commit 8cc0045

Please sign in to comment.