Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pipeline to publish scan to federatedcode #1400

Merged
merged 19 commits into from
Nov 12, 2024
Merged
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions scancodeio/settings.py
Original file line number Diff line number Diff line change
@@ -418,3 +418,10 @@
MATCHCODEIO_USER = env.str("MATCHCODEIO_USER", default="")
MATCHCODEIO_PASSWORD = env.str("MATCHCODEIO_PASSWORD", default="")
MATCHCODEIO_API_KEY = env.str("MATCHCODEIO_API_KEY", default="")

# FederatedCode integration

FEDERATEDCODE_GIT_ACCOUNT = env.str("FEDERATEDCODE_GIT_ACCOUNT", default="")
FEDERATEDCODE_GIT_SERVICE_TOKEN = env.str("FEDERATEDCODE_GIT_SERVICE_TOKEN", default="")
FEDERATEDCODE_GIT_SERVICE_NAME = env.str("FEDERATEDCODE_GIT_SERVICE_NAME", default="")
FEDERATEDCODE_GIT_SERVICE_EMAIL = env.str("FEDERATEDCODE_GIT_SERVICE_EMAIL", default="")
keshav-space marked this conversation as resolved.
Show resolved Hide resolved
1 change: 1 addition & 0 deletions scanpipe/api/serializers.py
Original file line number Diff line number Diff line change
@@ -203,6 +203,7 @@ class Meta:
"name",
"url",
"uuid",
"project_purl",
"upload_file",
"upload_file_tag",
"input_urls",
15 changes: 15 additions & 0 deletions scanpipe/forms.py
Original file line number Diff line number Diff line change
@@ -25,6 +25,7 @@
from django.core.exceptions import ObjectDoesNotExist
from django.core.exceptions import ValidationError

from packageurl import PackageURL
from taggit.forms import TagField
from taggit.forms import TagWidget

@@ -458,12 +459,26 @@ class Meta:
fields = [
"name",
"notes",
"project_purl",
]
widgets = {
"name": forms.TextInput(attrs={"class": "input"}),
"notes": forms.Textarea(attrs={"rows": 3, "class": "textarea is-dynamic"}),
"project_purl": forms.TextInput(attrs={"class": "input"}),
}

def clean_project_purl(self):
"""Validate the Project PURL."""
project_purl = self.cleaned_data.get("project_purl")

if project_purl:
try:
PackageURL.from_string(project_purl)
except ValueError:
raise forms.ValidationError("Project PURL must be a valid PackageURL")

return project_purl

def __init__(self, *args, **kwargs):
"""Load initial values from Project ``settings`` field."""
super().__init__(*args, **kwargs)
18 changes: 18 additions & 0 deletions scanpipe/migrations/0068_project_project_purl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 5.0.7 on 2024-10-22 14:37

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('scanpipe', '0067_discoveredpackage_notes'),
]

operations = [
migrations.AddField(
model_name='project',
name='project_purl',
field=models.CharField(blank=True, help_text='Project Package URL.', max_length=2048),
),
]
6 changes: 6 additions & 0 deletions scanpipe/models.py
Original file line number Diff line number Diff line change
@@ -561,6 +561,11 @@ class Project(UUIDPKModel, ExtraDataFieldMixin, UpdateMixin, models.Model):
notes = models.TextField(blank=True)
settings = models.JSONField(default=dict, blank=True)
labels = TaggableManager(through=UUIDTaggedItem)
project_purl = models.CharField(
keshav-space marked this conversation as resolved.
Show resolved Hide resolved
max_length=2048,
blank=True,
help_text=_("Project Package URL."),
keshav-space marked this conversation as resolved.
Show resolved Hide resolved
)

objects = ProjectQuerySet.as_manager()

@@ -704,6 +709,7 @@ def clone(
"""Clone this project using the provided ``clone_name`` as new project name."""
new_project = Project.objects.create(
name=clone_name,
project_purl=self.project_purl,
settings=self.settings if copy_settings else {},
)

116 changes: 116 additions & 0 deletions scanpipe/pipelines/publish_to_federatedcode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# SPDX-License-Identifier: Apache-2.0
#
# http://nexb.com and https://github.com/aboutcode-org/scancode.io
# The ScanCode.io software is licensed under the Apache License version 2.0.
# Data generated with ScanCode.io is provided as-is without warranties.
# ScanCode is a trademark of nexB Inc.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
#
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/aboutcode-org/scancode.io for support and download.


from packageurl import PackageURL

from scanpipe.pipelines import Pipeline
from scanpipe.pipes import federatedcode


class PublishToFederatedCode(Pipeline):
"""Publish package scan to FederatedCode."""
keshav-space marked this conversation as resolved.
Show resolved Hide resolved

download_inputs = False
is_addon = True

@classmethod
def steps(cls):
return (
cls.get_project_purl,
cls.get_package_repository,
cls.clone_repository,
cls.add_scan_result,
cls.commit_and_push_changes,
cls.delete_local_clone,
)

def get_project_purl(self):
"""Get the PURL for the project."""
keshav-space marked this conversation as resolved.
Show resolved Hide resolved
all_executed_pipeline_successful = all(
run.task_succeeded for run in self.project.runs.executed()
)

source_is_download_url = any(
source.download_url for source in self.project.inputsources.all()
)

if not all_executed_pipeline_successful:
raise Exception("Make sure all the pipelines has completed successfully.")

if not source_is_download_url:
raise Exception("Project input should be download_url.")

if not self.project.project_purl:
raise Exception("Missing Project PURL.")

project_package_url = PackageURL.from_string(self.project.project_purl)

if not project_package_url.version:
raise Exception("Missing version in Project PURL.")

configured, error = federatedcode.is_configured()
if not configured:
raise Exception(error)

self.project_package_url = project_package_url

def get_package_repository(self):
"""Get the Git repository URL and scan path for a given package."""
self.package_git_repo, self.package_scan_file = (
federatedcode.get_package_repository(
project_purl=self.project_package_url, logger=self.log
)
)

def clone_repository(self):
"""Clone repository to local_path."""
self.repo = federatedcode.clone_repository(
repo_url=self.package_git_repo,
logger=self.log,
)

def add_scan_result(self):
"""Add package scan result to the local Git repository."""
self.relative_file_path = federatedcode.add_scan_result(
project=self.project,
repo=self.repo,
package_scan_file=self.package_scan_file,
logger=self.log,
)

def commit_and_push_changes(self):
"""Commit and push changes to remote repository."""
federatedcode.commit_and_push_changes(
repo=self.repo,
file_to_commit=str(self.relative_file_path),
purl=str(self.project_package_url),
logger=self.log,
)
self.log(
f"Scan result for '{str(self.project_package_url)}' "
f"pushed to '{self.package_git_repo}'"
)

def delete_local_clone(self):
"""Remove local clone."""
federatedcode.delete_local_clone(repo=self.repo)
132 changes: 132 additions & 0 deletions scanpipe/pipes/federatedcode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# SPDX-License-Identifier: Apache-2.0
#
# http://nexb.com and https://github.com/aboutcode-org/scancode.io
# The ScanCode.io software is licensed under the Apache License version 2.0.
# Data generated with ScanCode.io is provided as-is without warranties.
# ScanCode is a trademark of nexB Inc.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
#
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/aboutcode-org/scancode.io for support and download.


import shutil
import tempfile
import textwrap
from pathlib import Path
from urllib.parse import urljoin

from git import Repo

from aboutcode import hashid
from scancodeio import VERSION
from scancodeio import settings
from scanpipe.pipes.output import JSONResultsGenerator


def is_configured():
"""Return True if the required FederatedCode settings have been set."""
missing_vars = []
if not settings.FEDERATEDCODE_GIT_ACCOUNT:
missing_vars.append("FEDERATEDCODE_GIT_ACCOUNT")
if not settings.FEDERATEDCODE_GIT_SERVICE_TOKEN:
missing_vars.append("FEDERATEDCODE_GIT_SERVICE_TOKEN")
if not settings.FEDERATEDCODE_GIT_SERVICE_NAME:
missing_vars.append("FEDERATEDCODE_GIT_SERVICE_NAME")
if not settings.FEDERATEDCODE_GIT_SERVICE_EMAIL:
missing_vars.append("FEDERATEDCODE_GIT_SERVICE_EMAIL")

if missing_vars:
return False, f'Missing environment variables: {", ".join(missing_vars)}'

return True, ""
keshav-space marked this conversation as resolved.
Show resolved Hide resolved


def get_package_repository(project_purl, logger=None):
"""Return the Git repository URL and scan path for a given package."""
FEDERATEDCODE_GIT_ACCOUNT_URL = f'{settings.FEDERATEDCODE_GIT_ACCOUNT.rstrip("/")}/'
package_base_dir = hashid.get_package_base_dir(purl=str(project_purl))
package_repo_name = package_base_dir.parts[0]

package_scan_path = package_base_dir / project_purl.version / "scancodeio.json"
package_git_repo_url = urljoin(
FEDERATEDCODE_GIT_ACCOUNT_URL, f"{package_repo_name}.git"
)

return package_git_repo_url, package_scan_path


def clone_repository(repo_url, logger=None):
"""Clone repository to local_path."""
local_dir = tempfile.mkdtemp()

authenticated_repo_url = repo_url.replace(
"https://",
f"https://{settings.FEDERATEDCODE_GIT_SERVICE_TOKEN}@",
)
repo = Repo.clone_from(url=authenticated_repo_url, to_path=local_dir, depth=1)

repo.config_writer(config_level="repository").set_value(
"user", "name", settings.FEDERATEDCODE_GIT_SERVICE_NAME
).release()

repo.config_writer(config_level="repository").set_value(
"user", "email", settings.FEDERATEDCODE_GIT_SERVICE_EMAIL
).release()

return repo


def add_scan_result(project, repo, package_scan_file, logger=None):
"""Add package scan result to the local Git repository."""
relative_scan_file_path = Path(*package_scan_file.parts[1:])

write_to = Path(repo.working_dir) / relative_scan_file_path

write_to.parent.mkdir(parents=True, exist_ok=True)
results_generator = JSONResultsGenerator(project)
with open(write_to, encoding="utf-8", mode="w") as file:
for chunk in results_generator:
file.write(chunk)

return relative_scan_file_path


def commit_and_push_changes(
repo, file_to_commit, purl, remote_name="origin", logger=None
):
"""Commit and push changes to remote repository."""
author_name = settings.FEDERATEDCODE_GIT_SERVICE_NAME
author_email = settings.FEDERATEDCODE_GIT_SERVICE_EMAIL

change_type = "Add" if file_to_commit in repo.untracked_files else "Update"
commit_message = f"""\
{change_type} scan result for {purl}
Tool: pkg:github/aboutcode-org/scancode.io@v{VERSION}
Reference: https://{settings.ALLOWED_HOSTS[0]}/
Signed-off-by: {author_name} <{author_email}>
"""

default_branch = repo.active_branch.name

repo.index.add([file_to_commit])
repo.index.commit(textwrap.dedent(commit_message))
repo.git.push(remote_name, default_branch, "--no-verify")


def delete_local_clone(repo):
"""Remove local clone."""
shutil.rmtree(repo.working_dir)
8 changes: 8 additions & 0 deletions scanpipe/templates/scanpipe/project_settings.html
Original file line number Diff line number Diff line change
@@ -26,6 +26,14 @@
{{ form.name }}
</div>
</div>
<div class="field">
<label class="label" for="{{ form.name.id_for_label }}">
Project PURL
</label>
<div class="control">
{{ form.project_purl }}
</div>
</div>
<div class="field">
<label class="label" for="{{ form.notes.id_for_label }}">
{{ form.notes.label }}
78 changes: 78 additions & 0 deletions scanpipe/tests/pipes/test_federatedcode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# SPDX-License-Identifier: Apache-2.0
#
# http://nexb.com and https://github.com/nexB/scancode.io
# The ScanCode.io software is licensed under the Apache License version 2.0.
# Data generated with ScanCode.io is provided as-is without warranties.
# ScanCode is a trademark of nexB Inc.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
#
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/scancode.io for support and download.

import shutil
import tempfile
from pathlib import Path
from unittest.mock import patch

from django.test import TestCase

import git
from packageurl import PackageURL

from scanpipe import models
from scanpipe.pipes import federatedcode
from scanpipe.tests import make_package


class ScanPipeFederatedCodeTest(TestCase):
def setUp(self):
self.project1 = models.Project.objects.create(name="Analysis")

@patch(
"scanpipe.pipes.federatedcode.settings.FEDERATEDCODE_GIT_ACCOUNT",
"https://github.com/test/",
)
def test_scanpipe_pipes_federatedcode_get_package_repository(self):
make_package(
project=self.project1,
package_url="pkg:npm/foobar@v1.2.3",
version="v.1.2.3",
)
project_purl = PackageURL.from_string("pkg:npm/foobar@v1.2.3")
expected_git_repo = "https://github.com/test/aboutcode-packages-03f1.git"
expected_scan_path = "aboutcode-packages-03f1/npm/foobar/v1.2.3/scancodeio.json"
git_repo, scan_path = federatedcode.get_package_repository(
project_purl=project_purl
)

self.assertEqual(expected_git_repo, git_repo)
self.assertEqual(expected_scan_path, str(scan_path))

def test_scanpipe_pipes_federatedcode_add_scan_result(self):
local_dir = tempfile.mkdtemp()
repo = git.Repo.init(local_dir)

federatedcode.add_scan_result(
self.project1, repo, Path("repo/npm/foobar/v1.2.3/scancodeio.json")
)

self.assertIn("npm/foobar/v1.2.3/scancodeio.json", repo.untracked_files)
shutil.rmtree(repo.working_dir)

def test_scancpipe_pipes_federatedcode_delete_local_clone(self):
keshav-space marked this conversation as resolved.
Show resolved Hide resolved
local_dir = tempfile.mkdtemp()
repo = git.Repo.init(local_dir)
federatedcode.delete_local_clone(repo)

self.assertEqual(False, Path(local_dir).exists())
1 change: 1 addition & 0 deletions scanpipe/tests/test_models.py
Original file line number Diff line number Diff line change
@@ -2007,6 +2007,7 @@ def test_scanpipe_webhook_subscription_model_get_payload(self):
"project": {
"name": "Analysis",
"uuid": str(self.project1.uuid),
"project_purl": "",
"is_archived": False,
"notes": "",
"labels": [],
3 changes: 3 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -105,6 +105,8 @@ install_requires =
bleach==6.1.0
# Antivirus
clamd==1.0.2
# FederatedCode
aboutcode.hashid==0.1.0

[options.extras_require]
dev =
@@ -146,6 +148,7 @@ scancodeio_pipelines =
map_deploy_to_develop = scanpipe.pipelines.deploy_to_develop:DeployToDevelop
match_to_matchcode = scanpipe.pipelines.match_to_matchcode:MatchToMatchCode
populate_purldb = scanpipe.pipelines.populate_purldb:PopulatePurlDB
publish_to_federatedcode = scanpipe.pipelines.publish_to_federatedcode:PublishToFederatedCode
resolve_dependencies = scanpipe.pipelines.resolve_dependencies:ResolveDependencies
scan_codebase = scanpipe.pipelines.scan_codebase:ScanCodebase
scan_for_virus = scanpipe.pipelines.scan_for_virus:ScanForVirus