Skip to content

Commit

Permalink
Add CLI for ingestion and build container image (#6)
Browse files Browse the repository at this point in the history
  • Loading branch information
fwilhe authored Sep 18, 2024
1 parent 6ece0f1 commit 1c6d1db
Show file tree
Hide file tree
Showing 24 changed files with 1,686 additions and 143 deletions.
46 changes: 46 additions & 0 deletions .github/workflows/container.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: Data ingestion Container
on:
workflow_dispatch:
push:
permissions:
# Checkout repository
contents: read
# Push container images
packages: write
jobs:
data_ingestion:
name: data_ingestion
runs-on: ubuntu-24.04
env:
IMAGE_NAME: gardenlinux/glvd-data-ingestion
IMAGE_TAG: latest

steps:
- name: Checkout repo
uses: actions/checkout@v4

- name: Install qemu dependency for multi-arch build
run: |
sudo apt-get update
sudo apt-get install -y qemu-user-static
- name: Build Image
id: build_image
uses: redhat-actions/buildah-build@v2
with:
image: ${{ env.IMAGE_NAME }}
tags: ${{ env.IMAGE_TAG }}
platforms: linux/amd64, linux/arm64
containerfiles: |
./Containerfile
- name: Push To ghcr.io
id: push-to-ghcr
if: ${{ github.event_name != 'pull_request' }}
uses: redhat-actions/push-to-registry@v2
with:
image: ${{ steps.build_image.outputs.image }}
tags: ${{ steps.build_image.outputs.tags }}
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
143 changes: 0 additions & 143 deletions .github/workflows/data_ingestion.yml

This file was deleted.

9 changes: 9 additions & 0 deletions Containerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
FROM docker.io/library/debian:trixie-slim

# XXX: Debian unstable required for python3-sqlalchemy (>= 2)
RUN sed -i -e 's/Suites: trixie trixie-updates/\0 unstable/' /etc/apt/sources.list.d/debian.sources
RUN apt-get update && \
apt-get upgrade -y --no-install-recommends python3-asyncpg python3-pip python3-poetry-core python3-requests python3-sqlalchemy/unstable && \
apt-get upgrade -y --no-install-recommends git curl debian-archive-keyring postgresql-client
COPY . /usr/local/src
RUN pip install --break-system-packages --no-deps --editable /usr/local/src
13 changes: 13 additions & 0 deletions gardenlinux-versions.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"1443": [
"1443.0",
"1443.1",
"1443.2",
"1443.3",
"1443.4",
"1443.5",
"1443.7",
"1443.8",
"1443.9"
]
}
32 changes: 32 additions & 0 deletions gardenlinux-versions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import requests
import json

# automatically create a list of minor versions for a given list of major version

major_versions = ['1443']

found_versions = {}

for m in major_versions:
found_versions[m] = []

for v in major_versions:
skipped_versions = []
for p in range(0,30):
candidate_version = f'{v}.{p}'
print(f'testing {candidate_version}')
r = requests.head(f'https://packages.gardenlinux.io/gardenlinux/dists/{v}.{p}/main/binary-amd64/Packages.gz')
if r.status_code == 200:
print(f'found {candidate_version}')
found_versions[v].append(candidate_version)
skipped_versions = []
else:
skipped_versions.append(candidate_version)

if len(skipped_versions) > 4:
break

print(found_versions)

with open('gardenlinux-versions.json', 'w', encoding='utf-8') as f:
json.dump(found_versions, f, ensure_ascii=False, indent=4)
71 changes: 71 additions & 0 deletions ingest-postgres.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!/bin/bash

set -euo pipefail

mkdir -p data/ingest-debsec/{debian,gardenlinux}/CVE
mkdir -p data/ingest-debsec/debian/CVE
mkdir -p data/ingest-debsrc/debian
mkdir -p data/ingest-debsrc/var/lib/dpkg
touch data/ingest-debsrc/var/lib/dpkg/status
curl https://salsa.debian.org/security-tracker-team/security-tracker/-/raw/master/data/CVE/list?ref_type=heads \
--output data/ingest-debsec/debian/CVE/list
mkdir -p conf/ingest-debsrc/
curl https://raw.githubusercontent.com/gardenlinux/glvd-data-ingestion/main/conf/ingest-debsrc/apt.conf \
--output conf/ingest-debsrc/apt.conf
curl https://raw.githubusercontent.com/gardenlinux/glvd-data-ingestion/main/conf/ingest-debsrc/debian.sources \
--output conf/ingest-debsrc/debian.sources
APT_CONFIG=conf/ingest-debsrc/apt.conf apt-get -q update \
-o Dir="$PWD/data/ingest-debsrc/" \
-o Dir::Etc::sourcelist="$PWD/conf/ingest-debsrc/debian.sources" \
-o Dir::State="$PWD/data/ingest-debsrc/"
git clone --depth=1 https://salsa.debian.org/security-tracker-team/security-tracker

mkdir -p gardenlinux-packages
curl -s https://packages.gardenlinux.io/gardenlinux/dists/1443.0/main/binary-amd64/Packages.gz > gardenlinux-packages/1443.0.gz
curl -s https://packages.gardenlinux.io/gardenlinux/dists/1443.1/main/binary-amd64/Packages.gz > gardenlinux-packages/1443.1.gz
curl -s https://packages.gardenlinux.io/gardenlinux/dists/1443.2/main/binary-amd64/Packages.gz > gardenlinux-packages/1443.2.gz
curl -s https://packages.gardenlinux.io/gardenlinux/dists/1443.3/main/binary-amd64/Packages.gz > gardenlinux-packages/1443.3.gz
curl -s https://packages.gardenlinux.io/gardenlinux/dists/1443.5/main/binary-amd64/Packages.gz > gardenlinux-packages/1443.5.gz
curl -s https://packages.gardenlinux.io/gardenlinux/dists/1443.7/main/binary-amd64/Packages.gz > gardenlinux-packages/1443.7.gz
curl -s https://packages.gardenlinux.io/gardenlinux/dists/1443.8/main/binary-amd64/Packages.gz > gardenlinux-packages/1443.8.gz
curl -s https://packages.gardenlinux.io/gardenlinux/dists/1443.9/main/binary-amd64/Packages.gz > gardenlinux-packages/1443.9.gz
curl -s https://packages.gardenlinux.io/gardenlinux/dists/today/main/binary-amd64/Packages.gz > gardenlinux-packages/today.gz
gunzip gardenlinux-packages/1443*.gz
gunzip gardenlinux-packages/today.gz

echo "Run data ingestion (ingest-debsrc - debian trixie)"
glvd-data ingest-debsrc debian trixie data/ingest-debsrc/lists/deb.debian.org_debian_dists_trixie_main_source_Sources
echo "Run data ingestion (ingest-debsrc - debian bookworm)"
glvd-data ingest-debsrc debian bookworm data/ingest-debsrc/lists/deb.debian.org_debian_dists_bookworm_main_source_Sources
echo "Run data ingestion (ingest-debsec - debian)"
glvd-data ingest-debsec debian security-tracker/data
echo "Run data ingestion (ingest-debsrc - gardenlinux today)"
glvd-data ingest-debsrc gardenlinux today ./gardenlinux-packages/today
echo "Run data ingestion (ingest-debsrc - gardenlinux 1443.0)"
glvd-data ingest-debsrc gardenlinux 1443.0 ./gardenlinux-packages/1443.0
echo "Run data ingestion (ingest-debsrc - gardenlinux 1443.1)"
glvd-data ingest-debsrc gardenlinux 1443.1 ./gardenlinux-packages/1443.1
echo "Run data ingestion (ingest-debsrc - gardenlinux 1443.2)"
glvd-data ingest-debsrc gardenlinux 1443.2 ./gardenlinux-packages/1443.2
echo "Run data ingestion (ingest-debsrc - gardenlinux 1443.3)"
glvd-data ingest-debsrc gardenlinux 1443.3 ./gardenlinux-packages/1443.3
echo "Run data ingestion (ingest-debsrc - gardenlinux 1443.5)"
glvd-data ingest-debsrc gardenlinux 1443.5 ./gardenlinux-packages/1443.5
echo "Run data ingestion (ingest-debsrc - gardenlinux 1443.7)"
glvd-data ingest-debsrc gardenlinux 1443.7 ./gardenlinux-packages/1443.7
echo "Run data ingestion (ingest-debsrc - gardenlinux 1443.8)"
glvd-data ingest-debsrc gardenlinux 1443.8 ./gardenlinux-packages/1443.8
echo "Run data ingestion (ingest-debsrc - gardenlinux 1443.9)"
glvd-data ingest-debsrc gardenlinux 1443.9 ./gardenlinux-packages/1443.9
echo "Run data ingestion (ingest-debsrc - gardenlinux 1443.10)"
glvd-data ingest-debsrc gardenlinux 1443 ./gardenlinux-packages/1443.10
echo "Run data ingestion (ingest-debsrc - gardenlinux 1592.0)"
glvd-data ingest-debsrc gardenlinux 1592.0 ./gardenlinux-packages/1592.0
echo "Run data ingestion (ingest-debsrc - gardenlinux 1592.1)"
glvd-data ingest-debsrc gardenlinux 1592 ./gardenlinux-packages/1592.1
echo "Run data ingestion (nvd)"
glvd-data ingest-nvd
echo "Run data combination (combine-deb)"
glvd-data combine-deb
echo "Run data combination (combine-all)"
glvd-data combine-all
46 changes: 46 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
[tool.poetry]
name = "glvd-data-ingestion"
version = "0.1.0"
description = "Garden Linux Vulnerability Database"
license = "MIT"
authors = []
readme = "README.md"
homepage = "https://gardenlinux.io/"
packages = [
{ include = "glvd", from = "src" },
]

[tool.poetry.scripts]
glvd-data = 'glvd.cli.data.__main__:main'

[tool.poetry.dependencies]
python = ">=3.11"
asyncpg = ">=0.28"
requests = ">=2"
SQLAlchemy = "^2"
greenlet = "^3.0.3"

[tool.poetry.group.dev.dependencies]
pytest = ">=7"
pytest-asyncio = ">=0.20"
requests-mock = ">=1.9"

[tool.pytest.ini_options]
minversion = "7.0"
addopts = [
"--import-mode=importlib",
]
asyncio_mode = "auto"

[[tool.mypy.overrides]]
module = [
"apt",
"requests",
"requests.adapters",
"urllib3"
]
ignore_missing_imports = true

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
1 change: 1 addition & 0 deletions src/glvd/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# SPDX-License-Identifier: MIT
19 changes: 19 additions & 0 deletions src/glvd/cli/data/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# SPDX-License-Identifier: MIT

from __future__ import annotations

from ..registry import CliRegistry


cli = CliRegistry('glvd-data')

cli.add_argument(
'--database',
default='postgresql+asyncpg:///',
help='the database to use, must use asyncio compatible SQLAlchemy driver',
)
cli.add_argument(
'--debug',
action='store_true',
help='enable debug output',
)
Loading

0 comments on commit 1c6d1db

Please sign in to comment.