Skip to content

Commit

Permalink
Dump importer_yielder in favor of IMPORTER_REGISTRY
Browse files Browse the repository at this point in the history
IMPORTER_REGISTRY is neater and does not do any magical string -> object
conversion. The registry looks more in sync with improvers.
Fixes: aboutcode-org#501

Signed-off-by: Hritik Vijay <[email protected]>
  • Loading branch information
Hritik14 committed Jan 26, 2022
1 parent 67fc00e commit 90fd963
Show file tree
Hide file tree
Showing 12 changed files with 76 additions and 513 deletions.
6 changes: 0 additions & 6 deletions vulnerabilities/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@

from vulnerabilities.models import (
PackageRelatedVulnerability,
Importer,
Package,
Vulnerability,
VulnerabilityReference,
Expand Down Expand Up @@ -55,11 +54,6 @@ class PackageRelatedVulnerabilityAdmin(admin.ModelAdmin):
search_fields = ["vulnerability__vulnerability_id", "package__name"]


@admin.register(Importer)
class ImporterAdmin(admin.ModelAdmin):
pass


@admin.register(VulnerabilitySeverity)
class VulnerabilitySeverityAdmin(admin.ModelAdmin):
pass
1 change: 1 addition & 0 deletions vulnerabilities/data_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def get_inferences(self, advisory_data: AdvisoryData) -> List[Inference]:
raise NotImplementedError

@classmethod
@property
def qualified_name(cls):
"""
Fully qualified name prefixed with the module name of the improver
Expand Down
96 changes: 16 additions & 80 deletions vulnerabilities/data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,13 @@
import xml.etree.ElementTree as ET
import datetime
from pathlib import Path
from typing import Any
from typing import ContextManager
from typing import Iterable
from typing import List
from typing import Mapping
from typing import Optional
from typing import Set
from typing import Iterable
from typing import Tuple
import warnings

from binaryornot.helpers import is_binary_string
from git import DiffIndex
Expand Down Expand Up @@ -214,104 +212,42 @@ def __post_init__(self):
logger.warn(f"AdvisoryData with no tzinfo: {self!r}")


class InvalidConfigurationError(Exception):
pass


@dataclasses.dataclass
class DataSourceConfiguration:
class NoLicenseWarning(Warning):
pass


class DataSource(ContextManager):
class DataSource:
"""
This class defines how importers consume advisories from a data source.
It makes a distinction between newly added records since the last run and modified records. This
allows the import logic to pick appropriate database operations.
A DataSource collects data from various upstreams and returns corresponding
AdvisoryData objects in its advisory_data method.
Subclass this class to implement an importer
"""

CONFIG_CLASS = DataSourceConfiguration
license = ""

def __init__(
self,
last_run_date: Optional[datetime.datetime] = None,
cutoff_date: Optional[datetime.datetime] = None,
config: Optional[Mapping[str, Any]] = None,
):
"""
Create a DataSource instance.
:param last_run_date: Optional timestamp when this data source was last inspected
:param cutoff_date: Optional timestamp, records older than this will be ignored
:param config: Optional dictionary with subclass-specific configuration
"""
config = config or {}
try:
self.config = self.__class__.CONFIG_CLASS(**config)
# These really should be declared in DataSourceConfiguration above but that would
# prevent DataSource subclasses from declaring mandatory parameters (i.e. positional
# arguments)
setattr(self.config, "last_run_date", last_run_date)
setattr(self.config, "cutoff_date", cutoff_date)
except Exception as e:
raise InvalidConfigurationError(str(e))

self.validate_configuration()

def __enter__(self):
pass

def __exit__(self, exc_type, exc_val, exc_tb):
pass
def __init__(self):
if not self.license:
warnings.warn(f"Running importer {self!r} without a license", NoLicenseWarning)

@classmethod
@property
def qualified_name(cls):
"""
Fully qualified name prefixed with the module name of the data source
used in logging.
"""
return f"{cls.__module__}.{cls.__qualname__}"

@property
def cutoff_timestamp(self) -> int:
"""
:return: An integer Unix timestamp of the last time this data source was queried or the
cutoff date passed in the constructor, whichever is more recent.
"""
if not hasattr(self, "_cutoff_timestamp"):
last_run = 0
if self.config.last_run_date is not None:
last_run = int(self.config.last_run_date.timestamp())

cutoff = 0
if self.config.cutoff_date is not None:
cutoff = int(self.config.cutoff_date.timestamp())

setattr(self, "_cutoff_timestamp", max(last_run, cutoff))

return self._cutoff_timestamp

def validate_configuration(self) -> None:
"""
Subclasses can perform more complex validation than what is handled by data classes and
their type annotations.
This method is called in the constructor. It should raise InvalidConfigurationError with a
human-readable message.
"""

def advisory_data(self) -> Iterable[AdvisoryData]:
"""
Subclasses return AdvisoryData objects
Return AdvisoryData objects corresponding to the data being imported
"""
raise NotImplementedError

def error(self, msg: str) -> None:
"""
Helper method for raising InvalidConfigurationError with the class name in the message.
"""
raise InvalidConfigurationError(f"{type(self).__name__}: {msg}")

# TODO: Adopt the same design as that for DataSource
class DataSourceConfiguration:
pass


@dataclasses.dataclass
Expand Down
51 changes: 10 additions & 41 deletions vulnerabilities/import_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,65 +32,34 @@
from vulnerabilities import models
from vulnerabilities.models import Advisory
from vulnerabilities.data_source import AdvisoryData
from vulnerabilities.data_source import DataSource

logger = logging.getLogger(__name__)


class ImportRunner:
"""
The ImportRunner is responsible for inserting and updating data about vulnerabilities and
affected/unaffected/fixed packages in the database. The two main goals for the implementation
are correctness and efficiency.
affected/unaffected/fixed packages in the database. The main goal for the implementation
is correctness
Correctness:
- There must be no duplicates in the database (should be enforced by the schema).
- No valid data from the data source must be skipped or truncated.
Efficiency:
- Bulk inserts should be used whenever possible.
- Checking whether a record already exists should be kept to a minimum
(the data source should know this instead).
- All update and select operations must use indexed columns.
"""

def __init__(self, importer: models.Importer):
def __init__(self, importer: DataSource):
self.importer = importer

def run(self, cutoff_date: datetime.datetime = None) -> None:
def run(self) -> None:
"""
Create a data source for the given importer and store the data retrieved in the database.
cutoff_date - optional timestamp of the oldest data to include in the import
NB: Data sources provide two kinds of records; vulnerabilities and packages. Vulnerabilities
are potentially shared across many packages, from the same data source and from different
data sources. For example, a vulnerability in the Linux kernel is mentioned by advisories
from all Linux distributions that package this kernel version.
"""
logger.info(f"Starting import for {self.importer.name}.")
data_source = self.importer.make_data_source(cutoff_date=cutoff_date)
with data_source:
advisory_data = data_source.advisory_data()
importer_name = data_source.qualified_name()
process_advisories(advisory_datas=advisory_data, importer_name=importer_name)
self.importer.last_run = datetime.datetime.now(tz=datetime.timezone.utc)
self.importer.data_source_cfg = dataclasses.asdict(data_source.config)
self.importer.save()

logger.info(f"Finished import for {self.importer.name}.")


def vuln_ref_exists(vulnerability, url, reference_id):
return models.VulnerabilityReference.objects.filter(
vulnerability=vulnerability, reference_id=reference_id, url=url
).exists()


def get_vuln_pkg_refs(vulnerability, package):
return models.PackageRelatedVulnerability.objects.filter(
vulnerability=vulnerability,
package=package,
)
logger.info(f"Starting import for {self.importer.qualified_name}")
advisory_datas = self.importer().advisory_data()
importer_name = self.importer.qualified_name
process_advisories(advisory_datas=advisory_datas, importer_name=importer_name)
logger.info(f"Finished import for {self.importer.qualified_name}.")


def process_advisories(advisory_datas: Iterable[AdvisoryData], importer_name: str) -> None:
Expand Down
Loading

0 comments on commit 90fd963

Please sign in to comment.