Skip to content

Commit

Permalink
And gitlab importer
Browse files Browse the repository at this point in the history
And use NginxVersion instead of SemverVersion in Nginx importer and improver

Signed-off-by: Tushar Goel <[email protected]>
  • Loading branch information
TG1999 committed May 17, 2022
1 parent f71776b commit 372a8cc
Show file tree
Hide file tree
Showing 39 changed files with 1,614 additions and 156 deletions.
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,11 @@ toml==0.10.2
tomli==2.0.1
traitlets==5.1.1
typing_extensions==4.1.1
univers==30.5.1
univers==30.7.0
urllib3==1.26.9
wcwidth==0.2.5
websocket-client==0.59.0
yarl==1.7.2
zipp==3.8.0
dateparser==1.1.1
dateparser==1.1.1
fetchcode==0.1.0
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ install_requires =
GitPython>=3.1.17
aiohttp>=3.7.4.post0
requests>=2.25.1
fetchcode>=0.1.0

[options.extras_require]
dev =
Expand Down
69 changes: 60 additions & 9 deletions vulnerabilities/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import urllib3
from packageurl import PackageURL
from univers.version_range import RANGE_CLASS_BY_SCHEMES
from univers.version_range import VersionRange

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -205,26 +206,25 @@ def __get__(self, owner_self, owner_cls):
return self.fget(owner_cls)


def get_item(object: dict, *attributes):
def get_item(dictionary: dict, *attributes):
"""
Return `item` by going through all the `attributes` present in the `json_object`
Return `item` by going through all the `attributes` present in the `dictionary`
Do a DFS for the `item` in the `json_object` by traversing the `attributes`
Do a DFS for the `item` in the `dictionary` by traversing the `attributes`
and return None if can not traverse through the `attributes`
For example:
>>> get_item({'a': {'b': {'c': 'd'}}}, 'a', 'b', 'c')
'd'
>>> assert(get_item({'a': {'b': {'c': 'd'}}}, 'a', 'b', 'e')) == None
"""
if not object:
if not dictionary:
return
item = object
for attribute in attributes:
if attribute not in item:
logger.error(f"Missing attribute {attribute} in {item}")
if attribute not in dictionary:
logger.error(f"Missing attribute {attribute} in {dictionary}")
return None
item = item[attribute]
return item
dictionary = dictionary[attribute]
return dictionary


class GitHubTokenError(Exception):
Expand Down Expand Up @@ -292,3 +292,54 @@ def get_affected_packages_by_patched_package(
package.vulnerable_package
)
return affected_packages_by_patched_package


def get_reference_id(url: str):
"""
Return the reference id from a URL
For example:
>>> get_reference_id("https://github.com/advisories/GHSA-c9hw-wf7x-jp9j")
'GHSA-c9hw-wf7x-jp9j'
"""
_url, _, ref_id = url.strip("/").rpartition("/")
return ref_id


def resolve_version_range(
affected_version_range: VersionRange,
package_versions: List[str],
ignorable_versions: List[str],
) -> Tuple[List[str], List[str]]:
"""
Given an affected version range and a list of `package_versions`, resolve
which versions are in this range and return a tuple of two lists of
`affected_versions` and `unaffected_versions`.
"""
if not affected_version_range:
logger.error(f"affected version range is {affected_version_range!r}")
return [], []
affected_versions = []
unaffected_versions = []
for package_version in package_versions or []:
if package_version in ignorable_versions:
continue
# Remove whitespace
package_version = package_version.replace(" ", "")
# Remove leading 'v'
package_version = package_version.lstrip("vV")
try:
version = affected_version_range.version_class(package_version)
except Exception:
logger.error(f"Could not parse version {package_version!r}")
continue
try:
if version in affected_version_range:
affected_versions.append(package_version)
else:
unaffected_versions.append(package_version)
except Exception:
logger.error(
f"Invalid version range constraints {affected_version_range.constraints!r}"
)
continue
return affected_versions, unaffected_versions
11 changes: 11 additions & 0 deletions vulnerabilities/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@

from vulnerabilities.helpers import classproperty
from vulnerabilities.helpers import evolve_purl
from vulnerabilities.helpers import get_reference_id
from vulnerabilities.helpers import is_cve
from vulnerabilities.helpers import nearest_patched_package
from vulnerabilities.oval_parser import OvalParser
from vulnerabilities.severity_systems import SCORING_SYSTEMS
Expand Down Expand Up @@ -105,6 +107,15 @@ def from_dict(cls, ref: dict):
],
)

@classmethod
def from_url(cls, url):
reference_id = get_reference_id(url)
if "GHSA-" in reference_id.upper():
return cls(reference_id=reference_id, url=url)
if is_cve(reference_id):
return cls(url=url, reference_id=reference_id.upper())
return cls(url=url)


class UnMergeablePackageError(Exception):
"""
Expand Down
2 changes: 2 additions & 0 deletions vulnerabilities/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from vulnerabilities.importers import alpine_linux
from vulnerabilities.importers import debian
from vulnerabilities.importers import github
from vulnerabilities.importers import gitlab
from vulnerabilities.importers import nginx
from vulnerabilities.importers import nvd
from vulnerabilities.importers import openssl
Expand All @@ -37,6 +38,7 @@
redhat.RedhatImporter,
pysec.PyPIImporter,
debian.DebianImporter,
gitlab.GitLabAPIImporter,
]

IMPORTERS_REGISTRY = {x.qualified_name: x for x in IMPORTERS_REGISTRY}
111 changes: 6 additions & 105 deletions vulnerabilities/importers/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,10 @@
from typing import List
from typing import Mapping
from typing import Optional
from typing import Tuple

from dateutil import parser as dateparser
from django.db.models.query import QuerySet
from packageurl import PackageURL
from univers.version_range import VersionRange
from univers.version_range import build_range_from_github_advisory_constraint

from vulnerabilities import helpers
Expand All @@ -40,6 +38,7 @@
from vulnerabilities.helpers import get_affected_packages_by_patched_package
from vulnerabilities.helpers import get_item
from vulnerabilities.helpers import nearest_patched_package
from vulnerabilities.helpers import resolve_version_range
from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import AffectedPackage
from vulnerabilities.importer import Importer
Expand All @@ -49,13 +48,10 @@
from vulnerabilities.improver import Improver
from vulnerabilities.improver import Inference
from vulnerabilities.models import Advisory
from vulnerabilities.package_managers import ComposerVersionAPI
from vulnerabilities.package_managers import VERSION_API_CLASSES_BY_PACKAGE_TYPE
from vulnerabilities.package_managers import GoproxyVersionAPI
from vulnerabilities.package_managers import MavenVersionAPI
from vulnerabilities.package_managers import NugetVersionAPI
from vulnerabilities.package_managers import PypiVersionAPI
from vulnerabilities.package_managers import RubyVersionAPI
from vulnerabilities.package_managers import VersionAPI
from vulnerabilities.package_managers import get_api_package_name

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -171,17 +167,6 @@
}
"""

VERSION_API_CLASSES = [
MavenVersionAPI,
NugetVersionAPI,
ComposerVersionAPI,
PypiVersionAPI,
RubyVersionAPI,
GoproxyVersionAPI,
]

VERSION_API_CLASSES_BY_PACKAGE_TYPE = {cls.package_type: cls for cls in VERSION_API_CLASSES}


class GitHubAPIImporter(Importer):
spdx_license_expression = "CC-BY-4.0"
Expand All @@ -205,38 +190,6 @@ def advisory_data(self) -> Iterable[AdvisoryData]:
break


def get_reference_id(url: str):
"""
Return the reference id from a URL
For example:
>>> get_reference_id("https://github.com/advisories/GHSA-c9hw-wf7x-jp9j")
'GHSA-c9hw-wf7x-jp9j'
"""
url_parts = url.split("/")
last_url_part = url_parts[-1]
return last_url_part


def extract_references(reference_data: List[dict]) -> Iterable[Reference]:
"""
Yield `reference` by iterating over `reference_data`
>>> list(extract_references([{'url': "https://github.com/advisories/GHSA-c9hw-wf7x-jp9j"}]))
[Reference(url="https://github.com/advisories/GHSA-c9hw-wf7x-jp9j"), reference_id = "GHSA-c9hw-wf7x-jp9j" ]
>>> list(extract_references([{'url': "https://github.com/advisories/c9hw-wf7x-jp9j"}]))
[Reference(url="https://github.com/advisories/c9hw-wf7x-jp9j")]
"""
for ref in reference_data:
url = ref["url"]
if not isinstance(url, str):
logger.error(f"extract_references: url is not of type `str`: {url}")
continue
if "GHSA-" in url.upper():
reference = Reference(url=url, reference_id=get_reference_id(url))
else:
reference = Reference(url=url)
yield reference


def get_purl(pkg_type: str, github_name: str) -> Optional[PackageURL]:
"""
Return a PackageURL by splitting the `github_name` using the `pkg_type` convention.
Expand Down Expand Up @@ -272,26 +225,6 @@ class InvalidVersionRange(Exception):
"""


def get_api_package_name(purl: PackageURL) -> str:
"""
Return the package name expected by the GitHub API given a PackageURL
>>> get_api_package_name(PackageURL(type="maven", namespace="org.apache.commons", name="commons-lang3"))
"org.apache.commons:commons-lang3"
>>> get_api_package_name(PackageURL(type="composer", namespace="foo", name="bar"))
"foo/bar"
"""
if purl.type == "maven":
return f"{purl.namespace}:{purl.name}"

if purl.type == "composer":
return f"{purl.namespace}/{purl.name}"

if purl.type in ("nuget", "pypi", "gem", "golang"):
return purl.name

logger.error(f"get_api_package_name: Unknown PURL {purl!r}")


def process_response(resp: dict, package_type: str) -> Iterable[AdvisoryData]:
"""
Yield `AdvisoryData` by taking `resp` and `ecosystem` as input
Expand Down Expand Up @@ -349,7 +282,8 @@ def process_response(resp: dict, package_type: str) -> Iterable[AdvisoryData]:

references = get_item(advisory, "references") or []
if references:
references: List[Reference] = list(extract_references(references))
urls = (ref["url"] for ref in references)
references = [Reference.from_url(u) for u in urls]

summary = get_item(advisory, "summary")
identifiers = get_item(advisory, "identifiers") or []
Expand Down Expand Up @@ -451,6 +385,7 @@ def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]:
aff_vers, unaff_vers = resolve_version_range(
affected_version_range=affected_version_range,
package_versions=valid_versions,
ignorable_versions=WEIRD_IGNORABLE_VERSIONS,
)
affected_purls = [
PackageURL(type=pkg_type, namespace=pkg_namespace, name=pkg_name, version=version)
Expand All @@ -476,37 +411,3 @@ def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]:
affected_purls=affected_packages,
fixed_purl=fixed_package,
)


def resolve_version_range(
affected_version_range: VersionRange,
package_versions: List[str],
ignorable_versions=WEIRD_IGNORABLE_VERSIONS,
) -> Tuple[List[str], List[str]]:
"""
Given an affected version range and a list of `package_versions`, resolve
which versions are in this range and return a tuple of two lists of
`affected_versions` and `unaffected_versions`.
"""
if not affected_version_range:
logger.error(f"affected version range is {affected_version_range!r}")
return [], []
affected_versions = []
unaffected_versions = []
for package_version in package_versions or []:
if package_version in ignorable_versions:
continue
# Remove whitespace
package_version = package_version.replace(" ", "")
# Remove leading 'v'
package_version = package_version.lstrip("vV")
try:
version = affected_version_range.version_class(package_version)
except Exception:
logger.error(f"Could not parse version {package_version!r}")
continue
if version in affected_version_range:
affected_versions.append(package_version)
else:
unaffected_versions.append(package_version)
return affected_versions, unaffected_versions
Loading

0 comments on commit 372a8cc

Please sign in to comment.