diff --git a/.gitignore b/.gitignore
index 545630d64..73b68d215 100644
--- a/.gitignore
+++ b/.gitignore
@@ -125,3 +125,12 @@ Pipfile
# VSCode
.vscode
+
+# Various junk and temp files
+.DS_Store
+*~
+.*.sw[po]
+.build
+.ve
+*.bak
+/.cache/
diff --git a/pytest.ini b/pytest.ini
index d64e30334..7a196de40 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,4 +1,34 @@
[pytest]
DJANGO_SETTINGS_MODULE = vulnerablecode.settings
markers =
- webtest
\ No newline at end of file
+ webtest
+addopts =
+ --doctest-modules
+# Ignore the following doctests until these files are migrated to
+# import-improve structure
+ --ignore=vulnerabilities/importers/alpine_linux.py
+ --ignore=vulnerabilities/importers/apache_httpd.py
+ --ignore=vulnerabilities/importers/apache_kafka.py
+ --ignore=vulnerabilities/importers/apache_tomcat.py
+ --ignore=vulnerabilities/importers/archlinux.py
+ --ignore=vulnerabilities/importers/debian.py
+ --ignore=vulnerabilities/importers/elixir_security.py
+ --ignore=vulnerabilities/importers/gentoo.py
+ --ignore=vulnerabilities/importers/github.py
+ --ignore=vulnerabilities/importers/istio.py
+ --ignore=vulnerabilities/importers/kaybee.py
+ --ignore=vulnerabilities/importers/npm.py
+ --ignore=vulnerabilities/importers/nvd.py
+ --ignore=vulnerabilities/importers/openssl.py
+ --ignore=vulnerabilities/importers/postgresql.py
+ --ignore=vulnerabilities/importers/project_kb_msr2019.py
+ --ignore=vulnerabilities/importers/redhat.py
+ --ignore=vulnerabilities/importers/retiredotnet.py
+ --ignore=vulnerabilities/importers/ruby.py
+ --ignore=vulnerabilities/importers/rust.py
+ --ignore=vulnerabilities/importers/safety_db.py
+ --ignore=vulnerabilities/importers/suse_backports.py
+ --ignore=vulnerabilities/importers/suse_scores.py
+ --ignore=vulnerabilities/importers/ubuntu_usn.py
+ --ignore=vulnerabilities/management/commands/create_cpe_to_purl_map.py
+ --ignore=vulnerabilities/lib_oval.py
diff --git a/requirements.txt b/requirements.txt
index 7d23f3b96..79f88a040 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,12 +8,12 @@ django-widget-tweaks>=1.4.8
packageurl-python>=0.9.4
binaryornot>=0.4.4
GitPython>=3.1.17
-univers>=21.4.16.6
+univers>=30.0.0
saneyaml>=0.5.2
beautifulsoup4>=4.9.3
python-dateutil>=2.8.1
toml>=0.10.2
-lxml>=4.6.3
+lxml>=4.6.4
gunicorn>=20.1.0
django-environ==0.4.5
defusedxml==0.7.1
diff --git a/vulnerabilities/data_inference.py b/vulnerabilities/data_inference.py
new file mode 100644
index 000000000..3d3fdc10d
--- /dev/null
+++ b/vulnerabilities/data_inference.py
@@ -0,0 +1,98 @@
+import dataclasses
+import logging
+from typing import List
+from typing import Optional
+from uuid import uuid4
+
+from packageurl import PackageURL
+from django.db.models.query import QuerySet
+
+from vulnerabilities.data_source import Reference
+from vulnerabilities.data_source import AdvisoryData
+
+logger = logging.getLogger(__name__)
+
+MAX_CONFIDENCE = 100
+
+
+@dataclasses.dataclass(order=True)
+class Inference:
+ """
+ This data class expresses the contract between data improvers and the improve runner.
+
+ Only inferences with highest confidence for one vulnerability <-> package
+ relationship is to be inserted into the database
+ """
+
+ vulnerability_id: str = None
+ aliases: List[str] = dataclasses.field(default_factory=list)
+ confidence: int = MAX_CONFIDENCE
+ summary: Optional[str] = None
+ affected_purls: List[PackageURL] = dataclasses.field(default_factory=list)
+ fixed_purl: PackageURL = dataclasses.field(default_factory=list)
+ references: List[Reference] = dataclasses.field(default_factory=list)
+
+ def __post_init__(self):
+ if self.confidence > MAX_CONFIDENCE or self.confidence < 0:
+ raise ValueError
+
+ assert (
+ self.vulnerability_id
+ or self.aliases
+ or self.summary
+ or self.affected_purls
+ or self.fixed_purl
+ or self.references
+ )
+
+ versionless_purls = []
+ for purl in self.affected_purls + [self.fixed_purl]:
+ if not purl.version:
+ versionless_purls.append(purl)
+
+ assert (
+ not versionless_purls
+ ), f"Version-less purls are not supported in an Inference: {versionless_purls}"
+
+ @classmethod
+ def from_advisory_data(cls, advisory_data, confidence, affected_purls, fixed_purl):
+ """
+ Return an Inference object while keeping the same values as of advisory_data
+ for vulnerability_id, summary and references
+ """
+ return cls(
+ aliases=advisory_data.aliases,
+ confidence=confidence,
+ summary=advisory_data.summary,
+ affected_purls=affected_purls,
+ fixed_purl=fixed_purl,
+ references=advisory_data.references,
+ )
+
+
+class Improver:
+ """
+ Improvers are responsible to improve the already imported data by a datasource.
+ Inferences regarding the data could be generated based on multiple factors.
+ """
+
+ @property
+ def interesting_advisories(self) -> QuerySet:
+ """
+ Return QuerySet for the advisories this improver is interested in
+ """
+ raise NotImplementedError
+
+ def get_inferences(self, advisory_data: AdvisoryData) -> List[Inference]:
+ """
+ Generate and return Inferences for the given advisory data
+ """
+ raise NotImplementedError
+
+ @classmethod
+ def qualified_name(cls):
+ """
+ Fully qualified name prefixed with the module name of the improver
+ used in logging.
+ """
+ return f"{cls.__module__}.{cls.__qualname__}"
diff --git a/vulnerabilities/data_source.py b/vulnerabilities/data_source.py
index ff48a374a..02d9ff498 100644
--- a/vulnerabilities/data_source.py
+++ b/vulnerabilities/data_source.py
@@ -19,7 +19,6 @@
# for any legal advice.
# VulnerableCode is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/vulnerablecode/ for support and download.
-
import dataclasses
import logging
import os
@@ -27,8 +26,7 @@
import tempfile
import traceback
import xml.etree.ElementTree as ET
-from binaryornot.helpers import is_binary_string
-from datetime import datetime
+import datetime
from pathlib import Path
from typing import Any
from typing import ContextManager
@@ -37,17 +35,19 @@
from typing import Mapping
from typing import Optional
from typing import Set
+from typing import Iterable
from typing import Tuple
-from git import Repo, DiffIndex
-from packageurl import PackageURL
-from univers.version_specifier import VersionSpecifier
-from univers.versions import version_class_by_package_type
+from binaryornot.helpers import is_binary_string
+from git import DiffIndex
+from git import Repo
+from packageurl import PackageURL
+from univers.version_range import VersionRange
+from univers.versions import Version
+from vulnerabilities.helpers import nearest_patched_package
from vulnerabilities.oval_parser import OvalParser
from vulnerabilities.severity_systems import ScoringSystem
-from vulnerabilities.helpers import is_cve
-from vulnerabilities.helpers import nearest_patched_package
-from vulnerabilities.helpers import AffectedPackage
+from vulnerabilities.severity_systems import SCORING_SYSTEMS
logger = logging.getLogger(__name__)
@@ -57,6 +57,22 @@ class VulnerabilitySeverity:
system: ScoringSystem
value: str
+ def to_dict(self):
+ """
+ Return a serializable dict that can be converted back using self.from_dict
+ """
+ return {
+ "system": self.system.identifier,
+ "value": self.value,
+ }
+
+ @classmethod
+ def from_dict(cls, severity: dict):
+ """
+ Return a VulnerabilitySeverity object from dict generated by self.to_dict
+ """
+ return cls(system=SCORING_SYSTEMS[severity["system"]], value=severity["value"])
+
@dataclasses.dataclass(order=True)
class Reference:
@@ -73,41 +89,129 @@ def normalized(self):
severities = sorted(self.severities)
return Reference(reference_id=self.reference_id, url=self.url, severities=severities)
+ def to_dict(self):
+ """
+ Return a serializable dict that can be converted back using self.from_dict
+ """
+ return {
+ "reference_id": self.reference_id,
+ "url": self.url,
+ "severities": [severity.to_dict() for severity in self.severities],
+ }
+
+ @classmethod
+ def from_dict(cls, ref: dict):
+ """
+ Return a Reference object from dict generated by self.to_dict
+ """
+ return cls(
+ reference_id=ref["reference_id"],
+ url=ref["url"],
+ severities=[
+ VulnerabilitySeverity.from_dict(severity) for severity in ref["severities"]
+ ],
+ )
+
+
+@dataclasses.dataclass(order=True, frozen=True)
+class AffectedPackage:
+ """
+ Contains a range of affected versions and a fixed version of a given package
+ The PackageURL supplied must *not* have a version
+ """
+
+ package: PackageURL
+ affected_version_range: VersionRange
+ fixed_version: Optional[Version] = None
+
+ def __post_init__(self):
+ if self.package.version:
+ raise ValueError
+
+ def get_fixed_purl(self):
+ """
+ Return PackageURL corresponding to object's fixed_version
+ """
+ fixed_version = self.fixed_version
+ fixed_purl = self.package._replace(version=str(fixed_version))
+ return fixed_purl
+
+ @classmethod
+ def merge(cls, affected_packages: Iterable):
+ """
+ Return a tuple with all attributes of AffectedPackage as a set
+ for all values in the given iterable of AffectedPackage
+
+ This is useful where an iterable of AffectedPackage needs to be
+ converted into one tuple of structure similar to AffectedPackage
+ but with multiple fixed_versions, ie
+ package: PackageURL
+ affected_version_range: set(VersionRange)
+ fixed_versions: set(Version)
+ """
+ affected_version_ranges = set()
+ fixed_versions = set()
+ purls = set()
+ for pkg in affected_packages:
+ affected_version_ranges.add(pkg.affected_version_range)
+ if pkg.fixed_version:
+ fixed_versions.add(pkg.fixed_version)
+ purls.add(pkg.package)
+ if len(purls) > 1:
+ raise TypeError("Cannot merge with different purls", purls)
+ return purls.pop(), affected_version_ranges, fixed_versions
+
+ def to_dict(self):
+ """
+ Return a serializable dict that can be converted back using self.from_dict
+ """
+ return {
+ "package": self.package.to_dict(),
+ "affected_version_range": str(self.affected_version_range),
+ "fixed_version": str(self.fixed_version) if self.fixed_version else None,
+ }
+
+ @classmethod
+ def from_dict(cls, affected_pkg: dict):
+ """
+ Return an AffectedPackage object from dict generated by self.to_dict
+ """
+ package = PackageURL(**affected_pkg["package"])
+ affected_version_range = VersionRange.from_string(affected_pkg["affected_version_range"])
+ fixed_version = affected_pkg["fixed_version"]
+ if fixed_version:
+ # TODO: revisit after https://github.com/nexB/univers/issues/10
+ fixed_version = affected_version_range.version_class(fixed_version)
+
+ return cls(
+ package=package,
+ affected_version_range=affected_version_range,
+ fixed_version=fixed_version,
+ )
+
@dataclasses.dataclass(order=True)
-class Advisory:
+class AdvisoryData:
"""
This data class expresses the contract between data sources and the import runner.
- Data sources are expected to be usable as context managers and generators, yielding batches of
- Advisory sequences.
- NB: There are two representations for package URLs that are commonly used by code consuming this
- data class; PackageURL objects and strings. As a convention, the former is referred to in
- variable names, etc. as "package_urls" and the latter as "purls".
+ If a vulnerability_id is present then:
+ summary or affected_packages or references must be present
+ otherwise
+ either affected_package or references should be present
+
+ date_published must be aware datetime
"""
- summary: str
- vulnerability_id: Optional[str] = None
+ aliases: List[str] = dataclasses.field(default_factory=list)
+ summary: str = None
affected_packages: List[AffectedPackage] = dataclasses.field(default_factory=list)
references: List[Reference] = dataclasses.field(default_factory=list)
+ date_published: Optional[datetime.datetime] = None
def __post_init__(self):
- if self.vulnerability_id and not is_cve(self.vulnerability_id):
- raise ValueError("CVE expected, found: {}".format(self.vulnerability_id))
-
- def normalized(self):
- references = sorted(
- self.references, key=lambda reference: (reference.reference_id, reference.url)
- )
- for index, _ in enumerate(self.references):
- references[index] = references[index].normalized()
-
- return Advisory(
- summary=self.summary,
- vulnerability_id=self.vulnerability_id,
- affected_packages=sorted(self.affected_packages),
- references=references,
- )
+ if self.date_published and not self.date_published.tzinfo:
+ logger.warn(f"AdvisoryData with no tzinfo: {self!r}")
class InvalidConfigurationError(Exception):
@@ -131,22 +235,18 @@ class DataSource(ContextManager):
def __init__(
self,
- batch_size: int,
- last_run_date: Optional[datetime] = None,
- cutoff_date: Optional[datetime] = None,
+ last_run_date: Optional[datetime.datetime] = None,
+ cutoff_date: Optional[datetime.datetime] = None,
config: Optional[Mapping[str, Any]] = None,
):
"""
Create a DataSource instance.
- :param batch_size: Maximum number of records to return from added_advisories() and
- updated_advisories()
:param last_run_date: Optional timestamp when this data source was last inspected
:param cutoff_date: Optional timestamp, records older than this will be ignored
:param config: Optional dictionary with subclass-specific configuration
"""
config = config or {}
- self.batch_size = batch_size
try:
self.config = self.__class__.CONFIG_CLASS(**config)
# These really should be declared in DataSourceConfiguration above but that would
@@ -165,6 +265,14 @@ def __enter__(self):
def __exit__(self, exc_type, exc_val, exc_tb):
pass
+ @classmethod
+ def qualified_name(cls):
+ """
+ Fully qualified name prefixed with the module name of the data source
+ used in logging.
+ """
+ return f"{cls.__module__}.{cls.__qualname__}"
+
@property
def cutoff_timestamp(self) -> int:
"""
@@ -192,25 +300,12 @@ def validate_configuration(self) -> None:
This method is called in the constructor. It should raise InvalidConfigurationError with a
human-readable message.
"""
- pass
- def added_advisories(self) -> Set[Advisory]:
- """
- Subclasses yield batch_size sized batches of Advisory objects that have been added to the
- data source since the last run or self.cutoff_date.
+ def advisory_data(self) -> Iterable[AdvisoryData]:
"""
- return set()
-
- def updated_advisories(self) -> Set[Advisory]:
- """
- Subclasses yield batch_size sized batches of Advisory objects that have been modified since
- the last run or self.cutoff_date.
-
- NOTE: Data sources that do not enable detection of changes to existing records vs added
- records must only implement this method, not added_advisories(). The ImportRunner
- relies on this contract to decide between insert and update operations.
+ Subclasses return AdvisoryData objects
"""
- return set()
+ raise NotImplementedError
def error(self, msg: str) -> None:
"""
@@ -218,20 +313,6 @@ def error(self, msg: str) -> None:
"""
raise InvalidConfigurationError(f"{type(self).__name__}: {msg}")
- def batch_advisories(self, advisories: List[Advisory]) -> Set[Advisory]:
- """
- Yield batches of the passed in list of advisories.
- """
-
- # TODO make this less cryptic and efficient
-
- advisories = advisories[:]
- # copy the list as we are mutating it in the loop below
-
- while advisories:
- b, advisories = advisories[: self.batch_size], advisories[self.batch_size :]
- yield b
-
@dataclasses.dataclass
class GitDataSourceConfiguration(DataSourceConfiguration):
@@ -475,7 +556,7 @@ def _fetch(self) -> Tuple[Mapping, Iterable[ET.ElementTree]]:
# TODO: enforce that we receive the proper data here
raise NotImplementedError
- def updated_advisories(self) -> List[Advisory]:
+ def advisory_data(self) -> List[AdvisoryData]:
for metadata, oval_file in self._fetch():
try:
oval_data = self.get_data_from_xml_doc(oval_file, metadata)
@@ -500,7 +581,7 @@ def set_api(self, all_pkgs: Iterable[str]):
"""
raise NotImplementedError
- def get_data_from_xml_doc(self, xml_doc: ET.ElementTree, pkg_metadata={}) -> List[Advisory]:
+ def get_data_from_xml_doc(self, xml_doc: ET.ElementTree, pkg_metadata={}) -> List[AdvisoryData]:
"""
The orchestration method of the OvalDataSource. This method breaks an
OVAL xml ElementTree into a list of `Advisory`.
diff --git a/vulnerabilities/helpers.py b/vulnerabilities/helpers.py
index 95dc2d801..6767f1f3b 100644
--- a/vulnerabilities/helpers.py
+++ b/vulnerabilities/helpers.py
@@ -33,7 +33,6 @@
import toml
import urllib3
from packageurl import PackageURL
-from univers.versions import version_class_by_package_type
# TODO add logging here
diff --git a/vulnerabilities/import_runner.py b/vulnerabilities/import_runner.py
index 5b9a08d91..e31d0ed6d 100644
--- a/vulnerabilities/import_runner.py
+++ b/vulnerabilities/import_runner.py
@@ -23,33 +23,18 @@
import dataclasses
import datetime
+import json
import logging
-from itertools import chain
-from typing import Tuple
+from typing import Set
+from typing import Iterable
-from django.db import transaction
from vulnerabilities import models
-from vulnerabilities.data_source import Advisory, DataSource
-from vulnerabilities.data_source import PackageURL
+from vulnerabilities.models import Advisory
+from vulnerabilities.data_source import AdvisoryData
logger = logging.getLogger(__name__)
-# This *Inserter class is used to instantiate model objects.
-# Frozen dataclass store args required to store instantiate
-# model objects, this way model objects can be hashed indirectly which
-# is required in this implementation.
-
-
-@dataclasses.dataclass(frozen=True)
-class PackageRelatedVulnerabilityInserter:
- vulnerability: models.Vulnerability
- is_vulnerable: bool
- package: models.Package
-
- def to_model_object(self):
- return models.PackageRelatedVulnerability(**dataclasses.asdict(self))
-
class ImportRunner:
"""
@@ -68,9 +53,8 @@ class ImportRunner:
- All update and select operations must use indexed columns.
"""
- def __init__(self, importer: models.Importer, batch_size: int):
+ def __init__(self, importer: models.Importer):
self.importer = importer
- self.batch_size = batch_size
def run(self, cutoff_date: datetime.datetime = None) -> None:
"""
@@ -84,9 +68,11 @@ def run(self, cutoff_date: datetime.datetime = None) -> None:
from all Linux distributions that package this kernel version.
"""
logger.info(f"Starting import for {self.importer.name}.")
- data_source = self.importer.make_data_source(self.batch_size, cutoff_date=cutoff_date)
+ data_source = self.importer.make_data_source(cutoff_date=cutoff_date)
with data_source:
- process_advisories(data_source)
+ advisory_data = data_source.advisory_data()
+ importer_name = data_source.qualified_name()
+ process_advisories(advisory_datas=advisory_data, importer_name=importer_name)
self.importer.last_run = datetime.datetime.now(tz=datetime.timezone.utc)
self.importer.data_source_cfg = dataclasses.asdict(data_source.config)
self.importer.save()
@@ -107,84 +93,26 @@ def get_vuln_pkg_refs(vulnerability, package):
)
-@transaction.atomic
-def process_advisories(data_source: DataSource) -> None:
- bulk_create_vuln_pkg_refs = set()
- # Treat updated_advisories and added_advisories as same. Eventually
- # we want to refactor all data sources to provide advisories via a
- # single method.
- advisory_batches = chain(data_source.updated_advisories(), data_source.added_advisories())
- for batch in advisory_batches:
- for advisory in batch:
- vuln, vuln_created = _get_or_create_vulnerability(advisory)
- for vuln_ref in advisory.references:
- ref, _ = models.VulnerabilityReference.objects.get_or_create(
- vulnerability=vuln, reference_id=vuln_ref.reference_id, url=vuln_ref.url
- )
-
- for score in vuln_ref.severities:
- models.VulnerabilitySeverity.objects.update_or_create(
- vulnerability=vuln,
- scoring_system=score.system.identifier,
- reference=ref,
- defaults={"value": str(score.value)},
- )
-
- for aff_pkg_with_patched_pkg in advisory.affected_packages:
- vulnerable_package, _ = _get_or_create_package(
- aff_pkg_with_patched_pkg.vulnerable_package
- )
- patched_package = None
- if aff_pkg_with_patched_pkg.patched_package:
- patched_package, _ = _get_or_create_package(
- aff_pkg_with_patched_pkg.patched_package
- )
-
- prv, _ = models.PackageRelatedVulnerability.objects.get_or_create(
- vulnerability=vuln,
- package=vulnerable_package,
- )
-
- if patched_package:
- prv.patched_package = patched_package
- prv.save()
-
- models.PackageRelatedVulnerability.objects.bulk_create(
- [i.to_model_object() for i in bulk_create_vuln_pkg_refs]
- )
-
-
-def _get_or_create_vulnerability(
- advisory: Advisory,
-) -> Tuple[models.Vulnerability, bool]:
-
- vuln, created = models.Vulnerability.objects.get_or_create(
- vulnerability_id=advisory.vulnerability_id
- ) # nopep8
- # Eventually we only want to keep summary from NVD and ignore other descriptions.
- if advisory.summary and vuln.summary != advisory.summary:
- vuln.summary = advisory.summary
- vuln.save()
-
- return vuln, created
-
-
-def _get_or_create_package(p: PackageURL) -> Tuple[models.Package, bool]:
+def process_advisories(advisory_datas: Iterable[AdvisoryData], importer_name: str) -> None:
+ """
+ Insert advisories into the database
+ """
- query_kwargs = {}
- for key, val in p.to_dict().items():
- if not val:
- if key == "qualifiers":
- query_kwargs[key] = {}
- else:
- query_kwargs[key] = ""
+ for data in advisory_datas:
+ obj, created = Advisory.objects.get_or_create(
+ aliases=data.aliases,
+ summary=data.summary,
+ affected_packages=[pkg.to_dict() for pkg in data.affected_packages],
+ references=[ref.to_dict() for ref in data.references],
+ date_published=data.date_published,
+ defaults={
+ "created_by": importer_name,
+ "date_collected": datetime.datetime.now(tz=datetime.timezone.utc),
+ },
+ )
+ if created:
+ logger.info(
+ f"[*] New Advisory with aliases: {obj.aliases!r}, created_by: {obj.created_by}"
+ )
else:
- query_kwargs[key] = val
-
- return models.Package.objects.get_or_create(**query_kwargs)
-
-
-def _package_url_to_package(purl: PackageURL) -> models.Package:
- p = models.Package()
- p.set_package_url(purl)
- return p
+ logger.debug(f"Advisory with aliases: {obj.aliases!r} already exists. Skipped.")
diff --git a/vulnerabilities/importer_yielder.py b/vulnerabilities/importer_yielder.py
index 567755c3c..8fe372646 100644
--- a/vulnerabilities/importer_yielder.py
+++ b/vulnerabilities/importer_yielder.py
@@ -22,218 +22,221 @@
from vulnerabilities.models import Importer
+# TODO: This entire registry needs to go away in favor of a registry similar to
+# improvers.
+# See ./improvers/__init__.py
IMPORTER_REGISTRY = [
{
- "name": "rust",
- "license": "cc0-1.0",
- "last_run": None,
- "data_source": "RustDataSource",
- "data_source_cfg": {
- "branch": None,
- "repository_url": "https://github.com/RustSec/advisory-db",
- },
- },
- {
- "name": "alpine",
+ "name": "nginx",
"license": "",
"last_run": None,
- "data_source": "AlpineDataSource",
- "data_source_cfg": {},
- },
- {
- "name": "archlinux",
- "license": "mit",
- "last_run": None,
- "data_source": "ArchlinuxDataSource",
- "data_source_cfg": {"archlinux_tracker_url": "https://security.archlinux.org/json"},
- },
- {
- "name": "debian",
- "license": "mit",
- "last_run": None,
- "data_source": "DebianDataSource",
- "data_source_cfg": {
- "debian_tracker_url": "https://security-tracker.debian.org/tracker/data/json"
- },
+ "data_source": "NginxDataSource",
+ "data_source_cfg": {"etags": {}},
},
# {
- # "name": "safetydb",
- # "license": "cc-by-nc-4.0",
+ # "name": "rust",
+ # "license": "cc0-1.0",
+ # "last_run": None,
+ # "data_source": "RustDataSource",
+ # "data_source_cfg": {
+ # "branch": None,
+ # "repository_url": "https://github.com/RustSec/advisory-db",
+ # },
+ # },
+ # {
+ # "name": "alpine",
+ # "license": "",
+ # "last_run": None,
+ # "data_source": "AlpineDataSource",
+ # "data_source_cfg": {},
+ # },
+ # {
+ # "name": "archlinux",
+ # "license": "mit",
+ # "last_run": None,
+ # "data_source": "ArchlinuxDataSource",
+ # "data_source_cfg": {"archlinux_tracker_url": "https://security.archlinux.org/json"},
+ # },
+ # {
+ # "name": "debian",
+ # "license": "mit",
# "last_run": None,
- # "data_source": "SafetyDbDataSource",
+ # "data_source": "DebianDataSource",
+ # "data_source_cfg": {
+ # "debian_tracker_url": "https://security-tracker.debian.org/tracker/data/json"
+ # },
+ # },
+ # # {
+ # # "name": "safetydb",
+ # # "license": "cc-by-nc-4.0",
+ # # "last_run": None,
+ # # "data_source": "SafetyDbDataSource",
+ # # "data_source_cfg": {
+ # # "url": "https://raw.githubusercontent.com/pyupio/safety-db/master/data/insecure_full.json",
+ # # "etags": {},
+ # # },
+ # # },
+ # {
+ # "name": "npm",
+ # "license": "mit",
+ # "last_run": None,
+ # "data_source": "NpmDataSource",
+ # "data_source_cfg": {"repository_url": "https://github.com/nodejs/security-wg.git"},
+ # },
+ # {
+ # "name": "ruby",
+ # "license": "",
+ # "last_run": None,
+ # "data_source": "RubyDataSource",
+ # "data_source_cfg": {"repository_url": "https://github.com/rubysec/ruby-advisory-db.git"},
+ # },
+ # {
+ # "name": "ubuntu",
+ # "license": "gpl-2.0",
+ # "last_run": None,
+ # "data_source": "UbuntuDataSource",
# "data_source_cfg": {
- # "url": "https://raw.githubusercontent.com/pyupio/safety-db/master/data/insecure_full.json",
# "etags": {},
+ # "releases": ["bionic", "trusty", "focal", "eoan", "xenial"],
# },
# },
- {
- "name": "npm",
- "license": "mit",
- "last_run": None,
- "data_source": "NpmDataSource",
- "data_source_cfg": {"repository_url": "https://github.com/nodejs/security-wg.git"},
- },
- {
- "name": "ruby",
- "license": "",
- "last_run": None,
- "data_source": "RubyDataSource",
- "data_source_cfg": {"repository_url": "https://github.com/rubysec/ruby-advisory-db.git"},
- },
- {
- "name": "ubuntu",
- "license": "gpl-2.0",
- "last_run": None,
- "data_source": "UbuntuDataSource",
- "data_source_cfg": {
- "etags": {},
- "releases": ["bionic", "trusty", "focal", "eoan", "xenial"],
- },
- },
- {
- "name": "retiredotnet",
- "license": "mit",
- "last_run": None,
- "data_source": "RetireDotnetDataSource",
- "data_source_cfg": {"repository_url": "https://github.com/RetireNet/Packages.git"},
- },
# {
- # "name": "suse_backports",
+ # "name": "retiredotnet",
+ # "license": "mit",
+ # "last_run": None,
+ # "data_source": "RetireDotnetDataSource",
+ # "data_source_cfg": {"repository_url": "https://github.com/RetireNet/Packages.git"},
+ # },
+ # # {
+ # # "name": "suse_backports",
+ # # "license": "",
+ # # "last_run": None,
+ # # "data_source": "SUSEBackportsDataSource",
+ # # "data_source_cfg": {"url": "http://ftp.suse.com/pub/projects/security/yaml/", "etags": {}},
+ # # },
+ # {
+ # "name": "suse_scores",
# "license": "",
# "last_run": None,
- # "data_source": "SUSEBackportsDataSource",
- # "data_source_cfg": {"url": "http://ftp.suse.com/pub/projects/security/yaml/", "etags": {}},
+ # "data_source": "SUSESeverityScoreDataSource",
+ # "data_source_cfg": {},
+ # },
+ # {
+ # "name": "debian_oval",
+ # "license": "",
+ # "last_run": None,
+ # "data_source": "DebianOvalDataSource",
+ # "data_source_cfg": {"etags": {}, "releases": ["wheezy", "stretch", "jessie", "buster"]},
+ # },
+ # {
+ # "name": "redhat",
+ # "license": "cc-by-4.0",
+ # "last_run": None,
+ # "data_source": "RedhatDataSource",
+ # "data_source_cfg": {},
+ # },
+ # {
+ # "name": "nvd",
+ # "license": "",
+ # "last_run": None,
+ # "data_source": "NVDDataSource",
+ # "data_source_cfg": {"etags": {}},
+ # },
+ # {
+ # "name": "gentoo",
+ # "license": "",
+ # "last_run": None,
+ # "data_source": "GentooDataSource",
+ # "data_source_cfg": {"repository_url": "https://anongit.gentoo.org/git/data/glsa.git"},
+ # },
+ # {
+ # "name": "openssl",
+ # "license": "",
+ # "last_run": None,
+ # "data_source": "OpenSSLDataSource",
+ # "data_source_cfg": {"etags": {}},
+ # },
+ # {
+ # "name": "ubuntu_usn",
+ # "license": "gpl-2.0",
+ # "last_run": None,
+ # "data_source": "UbuntuUSNDataSource",
+ # "data_source_cfg": {
+ # "etags": {},
+ # "db_url": "https://usn.ubuntu.com/usn-db/database-all.json.bz2",
+ # },
+ # },
+ # {
+ # "name": "github",
+ # "license": "",
+ # "last_run": None,
+ # "data_source": "GitHubAPIDataSource",
+ # "data_source_cfg": {
+ # "endpoint": "https://api.github.com/graphql",
+ # "ecosystems": ["MAVEN", "NUGET", "COMPOSER", "PIP", "RUBYGEMS"],
+ # },
+ # },
+ # {
+ # "name": "msr2019",
+ # "license": "apache-2.0",
+ # "last_run": None,
+ # "data_source": "ProjectKBMSRDataSource",
+ # "data_source_cfg": {"etags": {}},
+ # },
+ # {
+ # "name": "apache_httpd",
+ # "license": "",
+ # "last_run": None,
+ # "data_source": "ApacheHTTPDDataSource",
+ # "data_source_cfg": {"etags": {}},
+ # },
+ # {
+ # "name": "kaybee",
+ # "license": "apache-2.0",
+ # "last_run": None,
+ # "data_source": "KaybeeDataSource",
+ # "data_source_cfg": {
+ # "repository_url": "https://github.com/SAP/project-kb.git",
+ # "branch": "vulnerability-data",
+ # },
+ # },
+ # {
+ # "name": "postgresql",
+ # "license": "",
+ # "last_run": None,
+ # "data_source": "PostgreSQLDataSource",
+ # "data_source_cfg": {},
+ # },
+ # {
+ # "name": "elixir_security",
+ # "license": "cc0-1.0",
+ # "last_run": None,
+ # "data_source": "ElixirSecurityDataSource",
+ # "data_source_cfg": {
+ # "repository_url": "https://github.com/dependabot/elixir-security-advisories"
+ # },
+ # },
+ # {
+ # "name": "apache_tomcat",
+ # "license": "",
+ # "last_run": None,
+ # "data_source": "ApacheTomcatDataSource",
+ # "data_source_cfg": {"etags": {}},
+ # },
+ # {
+ # "name": "apache_kafka",
+ # "license": "",
+ # "last_run": None,
+ # "data_source": "ApacheKafkaDataSource",
+ # "data_source_cfg": {},
+ # },
+ # {
+ # "name": "istio",
+ # "license": "apache-2.0",
+ # "last_run": None,
+ # "data_source": "IstioDataSource",
+ # "data_source_cfg": {"repository_url": "https://github.com/istio/istio.io"},
# },
- {
- "name": "suse_scores",
- "license": "",
- "last_run": None,
- "data_source": "SUSESeverityScoreDataSource",
- "data_source_cfg": {},
- },
- {
- "name": "debian_oval",
- "license": "",
- "last_run": None,
- "data_source": "DebianOvalDataSource",
- "data_source_cfg": {"etags": {}, "releases": ["wheezy", "stretch", "jessie", "buster"]},
- },
- {
- "name": "redhat",
- "license": "cc-by-4.0",
- "last_run": None,
- "data_source": "RedhatDataSource",
- "data_source_cfg": {},
- },
- {
- "name": "nvd",
- "license": "",
- "last_run": None,
- "data_source": "NVDDataSource",
- "data_source_cfg": {"etags": {}},
- },
- {
- "name": "gentoo",
- "license": "",
- "last_run": None,
- "data_source": "GentooDataSource",
- "data_source_cfg": {"repository_url": "https://anongit.gentoo.org/git/data/glsa.git"},
- },
- {
- "name": "openssl",
- "license": "",
- "last_run": None,
- "data_source": "OpenSSLDataSource",
- "data_source_cfg": {"etags": {}},
- },
- {
- "name": "ubuntu_usn",
- "license": "gpl-2.0",
- "last_run": None,
- "data_source": "UbuntuUSNDataSource",
- "data_source_cfg": {
- "etags": {},
- "db_url": "https://usn.ubuntu.com/usn-db/database-all.json.bz2",
- },
- },
- {
- "name": "github",
- "license": "",
- "last_run": None,
- "data_source": "GitHubAPIDataSource",
- "data_source_cfg": {
- "endpoint": "https://api.github.com/graphql",
- "ecosystems": ["MAVEN", "NUGET", "COMPOSER", "PIP", "RUBYGEMS"],
- },
- },
- {
- "name": "msr2019",
- "license": "apache-2.0",
- "last_run": None,
- "data_source": "ProjectKBMSRDataSource",
- "data_source_cfg": {"etags": {}},
- },
- {
- "name": "apache_httpd",
- "license": "",
- "last_run": None,
- "data_source": "ApacheHTTPDDataSource",
- "data_source_cfg": {"etags": {}},
- },
- {
- "name": "kaybee",
- "license": "apache-2.0",
- "last_run": None,
- "data_source": "KaybeeDataSource",
- "data_source_cfg": {
- "repository_url": "https://github.com/SAP/project-kb.git",
- "branch": "vulnerability-data",
- },
- },
- {
- "name": "nginx",
- "license": "",
- "last_run": None,
- "data_source": "NginxDataSource",
- "data_source_cfg": {"etags": {}},
- },
- {
- "name": "postgresql",
- "license": "",
- "last_run": None,
- "data_source": "PostgreSQLDataSource",
- "data_source_cfg": {},
- },
- {
- "name": "elixir_security",
- "license": "cc0-1.0",
- "last_run": None,
- "data_source": "ElixirSecurityDataSource",
- "data_source_cfg": {
- "repository_url": "https://github.com/dependabot/elixir-security-advisories"
- },
- },
- {
- "name": "apache_tomcat",
- "license": "",
- "last_run": None,
- "data_source": "ApacheTomcatDataSource",
- "data_source_cfg": {"etags": {}},
- },
- {
- "name": "apache_kafka",
- "license": "",
- "last_run": None,
- "data_source": "ApacheKafkaDataSource",
- "data_source_cfg": {},
- },
- {
- "name": "istio",
- "license": "apache-2.0",
- "last_run": None,
- "data_source": "IstioDataSource",
- "data_source_cfg": {"repository_url": "https://github.com/istio/istio.io"},
- },
]
diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py
index f7387df61..6d44dab21 100644
--- a/vulnerabilities/importers/__init__.py
+++ b/vulnerabilities/importers/__init__.py
@@ -21,31 +21,32 @@
# Visit https://github.com/nexB/vulnerablecode/ for support and download.
-from vulnerabilities.importers.alpine_linux import AlpineDataSource
-from vulnerabilities.importers.apache_httpd import ApacheHTTPDDataSource
-from vulnerabilities.importers.apache_kafka import ApacheKafkaDataSource
-from vulnerabilities.importers.apache_tomcat import ApacheTomcatDataSource
-from vulnerabilities.importers.archlinux import ArchlinuxDataSource
-from vulnerabilities.importers.debian import DebianDataSource
-from vulnerabilities.importers.debian_oval import DebianOvalDataSource
-from vulnerabilities.importers.elixir_security import ElixirSecurityDataSource
-from vulnerabilities.importers.gentoo import GentooDataSource
-from vulnerabilities.importers.github import GitHubAPIDataSource
-from vulnerabilities.importers.kaybee import KaybeeDataSource
+# from vulnerabilities.importers.alpine_linux import AlpineDataSource
+# from vulnerabilities.importers.apache_httpd import ApacheHTTPDDataSource
+# from vulnerabilities.importers.apache_kafka import ApacheKafkaDataSource
+# from vulnerabilities.importers.apache_tomcat import ApacheTomcatDataSource
+# from vulnerabilities.importers.archlinux import ArchlinuxDataSource
+# from vulnerabilities.importers.debian import DebianDataSource
+# from vulnerabilities.importers.debian_oval import DebianOvalDataSource
+# from vulnerabilities.importers.elixir_security import ElixirSecurityDataSource
+# from vulnerabilities.importers.gentoo import GentooDataSource
+# from vulnerabilities.importers.github import GitHubAPIDataSource
+# from vulnerabilities.importers.kaybee import KaybeeDataSource
from vulnerabilities.importers.nginx import NginxDataSource
-from vulnerabilities.importers.npm import NpmDataSource
-from vulnerabilities.importers.nvd import NVDDataSource
-from vulnerabilities.importers.openssl import OpenSSLDataSource
-from vulnerabilities.importers.postgresql import PostgreSQLDataSource
-from vulnerabilities.importers.project_kb_msr2019 import ProjectKBMSRDataSource
-from vulnerabilities.importers.redhat import RedhatDataSource
-from vulnerabilities.importers.retiredotnet import RetireDotnetDataSource
-from vulnerabilities.importers.ruby import RubyDataSource
-from vulnerabilities.importers.rust import RustDataSource
-from vulnerabilities.importers.safety_db import SafetyDbDataSource
-from vulnerabilities.importers.suse_scores import SUSESeverityScoreDataSource
-from vulnerabilities.importers.ubuntu import UbuntuDataSource
-from vulnerabilities.importers.ubuntu_usn import UbuntuUSNDataSource
-from vulnerabilities.importers.istio import IstioDataSource
+
+# from vulnerabilities.importers.npm import NpmDataSource
+# from vulnerabilities.importers.nvd import NVDDataSource
+# from vulnerabilities.importers.openssl import OpenSSLDataSource
+# from vulnerabilities.importers.postgresql import PostgreSQLDataSource
+# from vulnerabilities.importers.project_kb_msr2019 import ProjectKBMSRDataSource
+# from vulnerabilities.importers.redhat import RedhatDataSource
+# from vulnerabilities.importers.retiredotnet import RetireDotnetDataSource
+# from vulnerabilities.importers.ruby import RubyDataSource
+# from vulnerabilities.importers.rust import RustDataSource
+# from vulnerabilities.importers.safety_db import SafetyDbDataSource
+# from vulnerabilities.importers.suse_scores import SUSESeverityScoreDataSource
+# from vulnerabilities.importers.ubuntu import UbuntuDataSource
+# from vulnerabilities.importers.ubuntu_usn import UbuntuUSNDataSource
+# from vulnerabilities.importers.istio import IstioDataSource
# from vulnerabilities.importers.suse_backports import SUSEBackportsDataSource
diff --git a/vulnerabilities/importers/nginx.py b/vulnerabilities/importers/nginx.py
index c5372647d..a24f9c457 100644
--- a/vulnerabilities/importers/nginx.py
+++ b/vulnerabilities/importers/nginx.py
@@ -20,22 +20,34 @@
# VulnerableCode is a free software tool from nexB Inc. and others.
# Visit https://github.com/nexB/vulnerablecode/ for support and download.
-import asyncio
import dataclasses
+import datetime
+from typing import Iterable
+import logging
+import asyncio
import requests
-from packageurl import PackageURL
from bs4 import BeautifulSoup
-from univers.version_specifier import VersionSpecifier
+from packageurl import PackageURL
+from univers.version_range import NginxVersionRange
from univers.versions import SemverVersion
+from django.db.models.query import QuerySet
-from vulnerabilities.data_source import Advisory
+from vulnerabilities.data_source import AdvisoryData
+from vulnerabilities.data_source import AffectedPackage
from vulnerabilities.data_source import DataSource
from vulnerabilities.data_source import DataSourceConfiguration
from vulnerabilities.data_source import Reference
+from vulnerabilities.data_source import VulnerabilitySeverity
+from vulnerabilities.data_inference import Inference
+from vulnerabilities.data_inference import Improver
+from vulnerabilities.helpers import nearest_patched_package
+from vulnerabilities.models import Advisory
from vulnerabilities.package_managers import GitHubTagsAPI
from vulnerabilities.package_managers import Version
-from vulnerabilities.helpers import nearest_patched_package
+from vulnerabilities.severity_systems import SCORING_SYSTEMS
+
+logger = logging.getLogger(__name__)
@dataclasses.dataclass
@@ -46,156 +58,207 @@ class NginxDataSourceConfiguration(DataSourceConfiguration):
class NginxDataSource(DataSource):
CONFIG_CLASS = NginxDataSourceConfiguration
- url = "http://nginx.org/en/security_advisories.html"
-
- def set_api(self):
- self.version_api = GitHubTagsAPI()
- asyncio.run(self.version_api.load_api(["nginx/nginx"]))
-
- # For some reason nginx tags it's releases are in the form of `release-1.2.3`
- # Chop off the `release-` part here.
- normalized_versions = set()
- while self.version_api.cache["nginx/nginx"]:
- version = self.version_api.cache["nginx/nginx"].pop()
- normalized_version = Version(
- version.value.replace("release-", ""), version.release_date
- )
- normalized_versions.add(normalized_version)
- self.version_api.cache["nginx/nginx"] = normalized_versions
+ url = "https://nginx.org/en/security_advisories.html"
- def updated_advisories(self):
- advisories = []
- self.set_api()
+ def advisory_data(self) -> Iterable[AdvisoryData]:
data = requests.get(self.url).content
- advisories.extend(self.to_advisories(data))
- return self.batch_advisories(advisories)
-
- def to_advisories(self, data):
- advisories = []
soup = BeautifulSoup(data, features="lxml")
vuln_list = soup.select("li p")
-
- # Example value of `vuln_list` :
- # ['Excessive CPU usage in HTTP/2 with small window updates',
- #
,
- # 'Severity: medium',
- #
,
- # Advisory, # nopep8
- #
,
- # CVE-2019-9511,
- #
,
- # 'Not vulnerable: 1.17.3+, 1.16.1+',
- #
,
- # 'Vulnerable: 1.9.5-1.17.2']
-
for vuln_info in vuln_list:
- references = []
- for index, child in enumerate(vuln_info.children):
- if index == 0:
- # type of this child is bs4.element.NavigableString.
- # Hence cast it into standard string
- summary = str(child)
- continue
-
- # hasattr(child, "attrs") == False for bs4.element.NavigableString
- if hasattr(child, "attrs") and child.attrs.get("href"):
- link = child.attrs["href"]
- references.append(Reference(url=link))
- if "cve.mitre.org" in link:
- cve_id = child.text
- continue
-
- if "Not vulnerable" in child:
- fixed_packages = self.extract_fixed_pkgs(child)
- continue
-
- if "Vulnerable" in child:
- vulnerable_packages = self.extract_vuln_pkgs(child)
- continue
-
- advisories.append(
- Advisory(
- vulnerability_id=cve_id,
- summary=summary,
- affected_packages=nearest_patched_package(vulnerable_packages, fixed_packages),
+ yield to_advisory_data(**parse_advisory_data_from_paragraph(vuln_info))
+
+
+def to_advisory_data(
+ aliases, summary, advisory_severity, not_vulnerable, vulnerable, references
+) -> AdvisoryData:
+ """
+ Return AdvisoryData formed by given parameters
+ An advisory paragraph, without html markup, looks like:
+
+ 1-byte memory overwrite in resolver
+ Severity: medium
+ Advisory
+ CVE-2021-23017
+ Not vulnerable: 1.21.0+, 1.20.1+
+ Vulnerable: 0.6.18-1.20.0
+ The patch pgp
+ """
+
+ qualifiers = {}
+
+ _, _, affected_version_range = vulnerable.partition(":")
+ if "nginx/Windows" in affected_version_range:
+ qualifiers["os"] = "windows"
+ affected_version_range = affected_version_range.replace("nginx/Windows", "")
+ affected_version_range = NginxVersionRange.from_native(affected_version_range)
+
+ affected_packages = []
+ _, _, fixed_versions = not_vulnerable.partition(":")
+ for fixed_version in fixed_versions.split(","):
+ fixed_version = fixed_version.rstrip("+")
+
+ # TODO: Mail nginx for this anomaly (create ticket on our side)
+ if "none" in fixed_version:
+ affected_packages.append(
+ AffectedPackage(
+ package=PackageURL(type="generic", name="nginx", qualifiers=qualifiers),
+ affected_version_range=affected_version_range,
)
)
-
- return advisories
-
- def extract_fixed_pkgs(self, vuln_info):
- vuln_status, version_info = vuln_info.split(": ")
- if "none" in version_info:
- return {}
-
- raw_ranges = version_info.split(",")
- version_ranges = []
- for rng in raw_ranges:
- # Eg. "1.7.3+" gets converted to VersionSpecifier.from_scheme_version_spec_string("semver","^1.7.3")
- # The advisory in this case uses `+` in the sense that any version
- # with greater or equal `minor` version satisfies the range.
- # "1.7.4" satisifes "1.7.3+", but "1.8.4" does not. "1.7.3+" has same
- # semantics as that of "^1.7.3"
-
- version_ranges.append(
- VersionSpecifier.from_scheme_version_spec_string("semver", "^" + rng[:-1])
+ break
+
+ fixed_version = SemverVersion(fixed_version)
+ purl = PackageURL(type="generic", name="nginx", qualifiers=qualifiers)
+ affected_packages.append(
+ AffectedPackage(
+ package=purl,
+ affected_version_range=affected_version_range,
+ fixed_version=fixed_version,
)
-
- valid_versions = find_valid_versions(
- self.version_api.get("nginx/nginx").valid_versions, version_ranges
)
- return [
- PackageURL(type="generic", name="nginx", version=version) for version in valid_versions
- ]
-
- def extract_vuln_pkgs(self, vuln_info):
- vuln_status, version_infos = vuln_info.split(": ")
- if "none" in version_infos:
- return {}
-
- version_ranges = []
- windows_only = False
- for version_info in version_infos.split(", "):
- if version_info == "all":
- # This is misleading since eventually some version get fixed.
- continue
-
- if "-" not in version_info:
- # These are discrete versions
- version_ranges.append(
- VersionSpecifier.from_scheme_version_spec_string("semver", version_info[0])
- )
- continue
-
- windows_only = "nginx/Windows" in version_info
- version_info = version_info.replace("nginx/Windows", "")
- lower_bound, upper_bound = version_info.split("-")
-
- version_ranges.append(
- VersionSpecifier.from_scheme_version_spec_string(
- "semver", f">={lower_bound},<={upper_bound}"
- )
+ return AdvisoryData(
+ aliases=aliases,
+ summary=summary,
+ affected_packages=affected_packages,
+ references=references,
+ )
+
+
+def parse_advisory_data_from_paragraph(vuln_info):
+ """
+ Return a dict with keys (aliases, summary, advisory_severity,
+ not_vulnerable, vulnerable, references) from bs4 paragraph
+
+ For example:
+ >>> paragraph = '
1-byte memory overwrite in resolver
Severity: medium
Advisory
CVE-2021-23017
Not vulnerable: 1.21.0+, 1.20.1+
Vulnerable: 0.6.18-1.20.0
The patch pgp