Skip to content

Commit

Permalink
Merge pull request #1049 from TG1999/migrate/ubuntu_usn
Browse files Browse the repository at this point in the history
Migrate ubuntu usn importer #1051
  • Loading branch information
TG1999 authored Jan 27, 2023
2 parents 80da375 + d588821 commit 26d45ed
Show file tree
Hide file tree
Showing 6 changed files with 150 additions and 99 deletions.
2 changes: 2 additions & 0 deletions vulnerabilities/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from vulnerabilities.importers import retiredotnet
from vulnerabilities.importers import suse_scores
from vulnerabilities.importers import ubuntu
from vulnerabilities.importers import ubuntu_usn
from vulnerabilities.importers import xen

IMPORTERS_REGISTRY = [
Expand Down Expand Up @@ -59,6 +60,7 @@
elixir_security.ElixirSecurityImporter,
apache_tomcat.ApacheTomcatImporter,
xen.XenImporter,
ubuntu_usn.UbuntuUSNImporter,
]

IMPORTERS_REGISTRY = {x.qualified_name: x for x in IMPORTERS_REGISTRY}
44 changes: 42 additions & 2 deletions vulnerabilities/importers/ubuntu.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,48 @@


class UbuntuImporter(OvalImporter):
spdx_license_expression = "GPL"
license_url = "https://ubuntu.com/legal/terms"
spdx_license_expression = "LicenseRef-scancode-other-permissive"
notice = """
From: Seth Arnold <[email protected]>
Date: Wed, Jan 25, 2023 at 2:02 AM
Subject: Re: [ubuntu-hardened] Usage of Ubuntu Security Data in VulnerableCode
To: Tushar Goel <[email protected]>
Cc: <[email protected]>, Philippe Ombredanne <[email protected]>, [email protected] <[email protected]>
On Wed, Jan 11, 2023 at 06:27:38PM +0530, Tushar Goel wrote:
> We would like to integrate the Ubuntu usn data[1][2] and
> Ubuntu security data (OVAL format)[3] in vulnerablecode[4]
> which is a FOSS db of FOSS vulnerability data. We were not
> able to know under which license this security data comes.
> We would be grateful to have your acknowledgement over usage of
> the ubuntu security data in vulnerablecode and have
> some kind of licensing declaration from your side.
Hello Tushar, we do not have an explicit license on this data.
We share our data with the intention that others will use it. Please
feel free to use it for the general furtherance of security.
Much of the data that's contained within our databases is sourced from
third parties, who also shared their data with the intention that others
will use it. I'm not sure what it would look like to try to put a license
on data that is crowd-sourced from thousands of contributors. (If you were
to start such a project today, it'd probably be one of the first things to
formalize. But when CVE was started two decades ago, the primary goal was
sharing knowledge and simplifying the vulnerability remediation process,
and licensing the data was, as far as I can remember, not considered.
Sharing was the goal.)
I will ask that vulnerablecode 'be nice' to our infrastructure that
hosts the databases -- some automated uses of our infrastructure by
vulnerability scanner tools has lead to significant load and engineering
effort. In general, please prefer a small handful of systems updating
mirrors roughly twice a day rather than thousands of hosts pulling
data hourly.
Thanks
"""

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
Expand Down
90 changes: 58 additions & 32 deletions vulnerabilities/importers/ubuntu_usn.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,55 +15,81 @@
from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import Importer
from vulnerabilities.importer import Reference
from vulnerabilities.utils import create_etag
from vulnerabilities.utils import is_cve


class UbuntuUSNImporter(Importer):
def updated_advisories(self):
advisories = []
if create_etag(data_src=self, url=self.config.db_url, etag_key="etag"):
advisories.extend(self.to_advisories(fetch(self.config.db_url)))

return self.batch_advisories(advisories)

def create_etag(self, url):
etag = requests.head(url).headers.get("etag")
if not etag:
return True

elif url in self.config.etags:
if self.config.etags[url] == etag:
return False

self.config.etags[url] = etag
return True
db_url = "https://usn.ubuntu.com/usn-db/database-all.json.bz2"
spdx_license_expression = "LicenseRef-scancode-other-permissive"
notice = """
From: Seth Arnold <[email protected]>
Date: Wed, Jan 25, 2023 at 2:02 AM
Subject: Re: [ubuntu-hardened] Usage of Ubuntu Security Data in VulnerableCode
To: Tushar Goel <[email protected]>
Cc: <[email protected]>, Philippe Ombredanne <[email protected]>, [email protected] <[email protected]>
On Wed, Jan 11, 2023 at 06:27:38PM +0530, Tushar Goel wrote:
> We would like to integrate the Ubuntu usn data[1][2] and
> Ubuntu security data (OVAL format)[3] in vulnerablecode[4]
> which is a FOSS db of FOSS vulnerability data. We were not
> able to know under which license this security data comes.
> We would be grateful to have your acknowledgement over usage of
> the ubuntu security data in vulnerablecode and have
> some kind of licensing declaration from your side.
Hello Tushar, we do not have an explicit license on this data.
We share our data with the intention that others will use it. Please
feel free to use it for the general furtherance of security.
Much of the data that's contained within our databases is sourced from
third parties, who also shared their data with the intention that others
will use it. I'm not sure what it would look like to try to put a license
on data that is crowd-sourced from thousands of contributors. (If you were
to start such a project today, it'd probably be one of the first things to
formalize. But when CVE was started two decades ago, the primary goal was
sharing knowledge and simplifying the vulnerability remediation process,
and licensing the data was, as far as I can remember, not considered.
Sharing was the goal.)
I will ask that vulnerablecode 'be nice' to our infrastructure that
hosts the databases -- some automated uses of our infrastructure by
vulnerability scanner tools has lead to significant load and engineering
effort. In general, please prefer a small handful of systems updating
mirrors roughly twice a day rather than thousands of hosts pulling
data hourly.
Thanks
"""

def advisory_data(self):
usn_db = fetch(self.db_url)
yield from self.to_advisories(usn_db=usn_db)

@staticmethod
def to_advisories(usn_db):
advisories = []
for usn in usn_db:
reference = get_usn_references(usn_db[usn]["id"])
for cve in usn_db[usn].get("cves", [""]):
usn_data = usn_db[usn]
references = get_usn_references(usn_data.get("id"))
for cve in usn_data.get("cves", []):
# The db sometimes contains entries like
# {'cves': ['python-pgsql vulnerabilities', 'CVE-2006-2313', 'CVE-2006-2314']}
# This `if` filters entries like 'python-pgsql vulnerabilities'
if not is_cve(cve):
cve = ""
continue

advisories.append(
AdvisoryData(
vulnerability_id=cve,
summary="",
references=[reference],
)
yield AdvisoryData(
aliases=[cve],
summary="",
references=references,
)

return advisories


def get_usn_references(usn_id):
return Reference(reference_id="USN-" + usn_id, url="https://usn.ubuntu.com/{}/".format(usn_id))
if not usn_id:
return []
return [Reference(reference_id=f"USN-{usn_id}", url=f"https://usn.ubuntu.com/{usn_id}/")]


def fetch(url):
Expand Down
1 change: 0 additions & 1 deletion vulnerabilities/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,5 @@ def no_rmtree(monkeypatch):
"test_rust.py",
"test_suse_backports.py",
"test_suse.py",
"test_ubuntu_usn.py",
"test_upstream.py",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
[
{
"aliases": [
"CVE-2009-0698"
],
"summary": "",
"affected_packages": [],
"references": [
{
"reference_id": "USN-763-1",
"url": "https://usn.ubuntu.com/763-1/",
"severities": []
}
],
"date_published": null,
"weaknesses": []
},
{
"aliases": [
"CVE-2009-1274"
],
"summary": "",
"affected_packages": [],
"references": [
{
"reference_id": "USN-763-1",
"url": "https://usn.ubuntu.com/763-1/",
"severities": []
}
],
"date_published": null,
"weaknesses": []
}
]
78 changes: 14 additions & 64 deletions vulnerabilities/tests/test_ubuntu_usn.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,71 +17,21 @@

from packageurl import PackageURL

import vulnerabilities.importers.ubuntu_usn as ubuntu_usn
from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import Reference
from vulnerabilities.importers.ubuntu_usn import UbuntuUSNImporter
from vulnerabilities.tests import util_tests

BASE_DIR = os.path.dirname(os.path.abspath(__file__))
TEST_DATA = os.path.join(BASE_DIR, "test_data/", "ubuntu_usn_db", "database-all.json.bz2")


class TestUbuntuUSNImporter(TestCase):
@classmethod
def setUpClass(cls):
data_src_cfg = {"etags": {}, "db_url": "http://exampledb.com"}
cls.data_src = ubuntu_usn.UbuntuUSNImporter(batch_size=1, config=data_src_cfg)
with open(TEST_DATA, "rb") as f:
cls.raw_data = f.read()
cls.db = json.loads(bz2.decompress(cls.raw_data))

def test_get_usn_references(self):

eg_usn = "435-1"
expected_references = Reference(
reference_id="USN-435-1", url="https://usn.ubuntu.com/435-1/"
)

found_references = ubuntu_usn.get_usn_references(eg_usn)
assert found_references == expected_references

def test_fetch(self):

mock_response = MagicMock()
mock_response.content = self.raw_data
with patch("vulnerabilities.importers.ubuntu_usn.requests.get", return_value=mock_response):
assert ubuntu_usn.fetch("www.db.com") == self.db

def test_to_advisories(self):

expected_advisories = [
Advisory(
summary="",
references=[
Reference(url="https://usn.ubuntu.com/763-1/", reference_id="USN-763-1")
],
vulnerability_id="CVE-2009-0698",
),
Advisory(
summary="",
references=[
Reference(url="https://usn.ubuntu.com/763-1/", reference_id="USN-763-1")
],
vulnerability_id="CVE-2009-1274",
),
]
found_advisories = self.data_src.to_advisories(self.db)

found_advisories = list(map(Advisory.normalized, found_advisories))
expected_advisories = list(map(Advisory.normalized, expected_advisories))
assert sorted(found_advisories) == sorted(expected_advisories)

def test_create_etag(self):
assert self.data_src.config.etags == {}

mock_response = MagicMock()
mock_response.headers = {"etag": "2131151243&2191"}

with patch("vulnerabilities.importers.ubuntu.requests.head", return_value=mock_response):
assert self.data_src.create_etag("https://example.org")
assert self.data_src.config.etags == {"https://example.org": "2131151243&2191"}
assert not self.data_src.create_etag("https://example.org")
TEST_DIR = os.path.join(BASE_DIR, "test_data", "ubuntu_usn_db")


def test_ubuntu_usn():
database = os.path.join(TEST_DIR, "database-all.json.bz2")
with open(database, "rb") as f:
raw_data = f.read()
db = json.loads(bz2.decompress(raw_data))
advisories = UbuntuUSNImporter().to_advisories(db)
expected_file = os.path.join(TEST_DIR, f"ubuntu-usn-expected.json")
result = [data.to_dict() for data in list(advisories)]
util_tests.check_results_against_json(result, expected_file)

0 comments on commit 26d45ed

Please sign in to comment.