Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pipeline to publish scan to federatedcode #1400

Merged
merged 19 commits into from
Nov 12, 2024
Merged
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -18,6 +18,12 @@ v34.9.0 (unreleased)
"policies.yml" files, or global app settings.
https://github.com/aboutcode-org/scancode.io/issues/386

- Add a new ``PublishToFederatedCode`` pipeline (addon) to push scan result
to FederatedCode.
https://github.com/nexB/scancode.io/pull/1400

- Add new ``purl`` field to project model. https://github.com/nexB/scancode.io/pull/1400

v34.8.3 (2024-10-30)
--------------------

Empty file removed aboutcode/__init__.py
Empty file.
22 changes: 22 additions & 0 deletions docs/application-settings.rst
Original file line number Diff line number Diff line change
@@ -338,6 +338,28 @@ API key using ``MATCHCODEIO_API_KEY``::

MATCHCODEIO_API_KEY=insert_your_api_key_here

.. _scancodeio_settings_federatedcode:

FEDERATEDCODE
^^^^^^^^^^^^^

FederatedCode is decentralized and federated metadata for software applications
stored in Git repositories.


To configure your local environment, set the following in your ``.env`` file::

FEDERATEDCODE_GIT_ACCOUNT_URL=https://<Address to your git account>/

FEDERATEDCODE_GIT_SERVICE_TOKEN=insert_your_git_api_key_here

Also provide the name and email that will be used to sign off on commits to Git repositories::

FEDERATEDCODE_GIT_SERVICE_NAME=insert_name_here

FEDERATEDCODE_GIT_SERVICE_EMAIL=insert_email_here


.. _scancodeio_settings_fetch_authentication:

Fetch Authentication
14 changes: 14 additions & 0 deletions docs/built-in-pipelines.rst
Original file line number Diff line number Diff line change
@@ -188,6 +188,20 @@ Populate PurlDB (addon)
:members:
:member-order: bysource

.. _pipeline_publish_to_federatedcode:

Publish To FederatedCode (addon)
--------------------------------

.. warning::
This pipeline requires access to a FederatedCode service.
Refer to :ref:`scancodeio_settings_federatedcode` to configure access to
FederatedCode in your ScanCode.io instance.

.. autoclass:: scanpipe.pipelines.publish_to_federatedcode.PublishToFederatedCode()
:members:
:member-order: bysource

.. _pipeline_scan_codebase:

Scan Codebase
7 changes: 7 additions & 0 deletions scancodeio/settings.py
Original file line number Diff line number Diff line change
@@ -418,3 +418,10 @@
MATCHCODEIO_USER = env.str("MATCHCODEIO_USER", default="")
MATCHCODEIO_PASSWORD = env.str("MATCHCODEIO_PASSWORD", default="")
MATCHCODEIO_API_KEY = env.str("MATCHCODEIO_API_KEY", default="")

# FederatedCode integration

FEDERATEDCODE_GIT_ACCOUNT_URL = env.str("FEDERATEDCODE_GIT_ACCOUNT_URL", default="")
FEDERATEDCODE_GIT_SERVICE_TOKEN = env.str("FEDERATEDCODE_GIT_SERVICE_TOKEN", default="")
FEDERATEDCODE_GIT_SERVICE_NAME = env.str("FEDERATEDCODE_GIT_SERVICE_NAME", default="")
FEDERATEDCODE_GIT_SERVICE_EMAIL = env.str("FEDERATEDCODE_GIT_SERVICE_EMAIL", default="")
1 change: 1 addition & 0 deletions scanpipe/api/serializers.py
Original file line number Diff line number Diff line change
@@ -209,6 +209,7 @@ class Meta:
"name",
"url",
"uuid",
"purl",
"upload_file",
"upload_file_tag",
"input_urls",
15 changes: 15 additions & 0 deletions scanpipe/forms.py
Original file line number Diff line number Diff line change
@@ -25,6 +25,7 @@
from django.core.exceptions import ObjectDoesNotExist
from django.core.exceptions import ValidationError

from packageurl import PackageURL
from taggit.forms import TagField
from taggit.forms import TagWidget

@@ -480,12 +481,26 @@ class Meta:
fields = [
"name",
"notes",
"purl",
]
widgets = {
"name": forms.TextInput(attrs={"class": "input"}),
"notes": forms.Textarea(attrs={"rows": 3, "class": "textarea is-dynamic"}),
"purl": forms.TextInput(attrs={"class": "input"}),
}

def clean_purl(self):
"""Validate the Project PURL."""
purl = self.cleaned_data.get("purl")

if purl:
try:
PackageURL.from_string(purl)
except ValueError:
raise forms.ValidationError("PURL must be a valid PackageURL")

return purl
keshav-space marked this conversation as resolved.
Show resolved Hide resolved

def __init__(self, *args, **kwargs):
"""Load initial values from Project ``settings`` field."""
super().__init__(*args, **kwargs)
18 changes: 18 additions & 0 deletions scanpipe/migrations/0069_project_purl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 5.1.3 on 2024-11-08 12:47

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('scanpipe', '0068_rename_discovered_dependencies_attribute'),
]

operations = [
migrations.AddField(
model_name='project',
name='purl',
field=models.CharField(blank=True, help_text='Package URL for the project, used for pushing project scan result to FederatedCode. This should be the PURL of the input.', max_length=2048),
),
]
9 changes: 9 additions & 0 deletions scanpipe/models.py
Original file line number Diff line number Diff line change
@@ -562,6 +562,14 @@ class Project(UUIDPKModel, ExtraDataFieldMixin, UpdateMixin, models.Model):
notes = models.TextField(blank=True)
settings = models.JSONField(default=dict, blank=True)
labels = TaggableManager(through=UUIDTaggedItem)
purl = models.CharField(
max_length=2048,
blank=True,
help_text=_(
"Package URL for the project, used for pushing project scan result to "
"FederatedCode. This should be the PURL of the input."
keshav-space marked this conversation as resolved.
Show resolved Hide resolved
),
)

objects = ProjectQuerySet.as_manager()

@@ -705,6 +713,7 @@ def clone(
"""Clone this project using the provided ``clone_name`` as new project name."""
new_project = Project.objects.create(
name=clone_name,
purl=self.purl,
settings=self.settings if copy_settings else {},
)

97 changes: 97 additions & 0 deletions scanpipe/pipelines/publish_to_federatedcode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# SPDX-License-Identifier: Apache-2.0
#
# http://nexb.com and https://github.com/aboutcode-org/scancode.io
# The ScanCode.io software is licensed under the Apache License version 2.0.
# Data generated with ScanCode.io is provided as-is without warranties.
# ScanCode is a trademark of nexB Inc.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
#
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/aboutcode-org/scancode.io for support and download.


from scanpipe.pipelines import Pipeline
from scanpipe.pipes import federatedcode


class PublishToFederatedCode(Pipeline):
"""
Publish package scan to FederatedCode.

This pipeline commits the project scan result in FederatedCode Git repository.
It uses ``Project PURL`` to determine the Git repository and the
exact directory path where the scan should be stored.
"""

download_inputs = False
is_addon = True

@classmethod
def steps(cls):
return (
cls.check_federatedcode_eligibility,
cls.get_package_repository,
cls.clone_repository,
cls.add_scan_result,
cls.commit_and_push_changes,
cls.delete_local_clone,
)

def check_federatedcode_eligibility(self):
"""
Check if the project fulfills the following criteria for
pushing the project result to FederatedCode.
"""
federatedcode.check_federatedcode_eligibility(project=self.project)

def get_package_repository(self):
"""Get the Git repository URL and scan path for a given package."""
self.package_git_repo, self.package_scan_file = (
federatedcode.get_package_repository(
project_purl=self.project.purl, logger=self.log
)
)

def clone_repository(self):
"""Clone repository to local_path."""
self.repo = federatedcode.clone_repository(
repo_url=self.package_git_repo,
logger=self.log,
)

def add_scan_result(self):
"""Add package scan result to the local Git repository."""
self.relative_file_path = federatedcode.add_scan_result(
project=self.project,
repo=self.repo,
package_scan_file=self.package_scan_file,
logger=self.log,
)

def commit_and_push_changes(self):
"""Commit and push changes to remote repository."""
federatedcode.commit_and_push_changes(
repo=self.repo,
file_to_commit=str(self.relative_file_path),
purl=self.project.purl,
logger=self.log,
)
self.log(
f"Scan result for '{self.project.purl}' "
f"pushed to '{self.package_git_repo}'"
)

def delete_local_clone(self):
"""Remove local clone."""
federatedcode.delete_local_clone(repo=self.repo)
190 changes: 190 additions & 0 deletions scanpipe/pipes/federatedcode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
# SPDX-License-Identifier: Apache-2.0
#
# http://nexb.com and https://github.com/aboutcode-org/scancode.io
# The ScanCode.io software is licensed under the Apache License version 2.0.
# Data generated with ScanCode.io is provided as-is without warranties.
# ScanCode is a trademark of nexB Inc.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
#
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/aboutcode-org/scancode.io for support and download.


import logging
import shutil
import tempfile
import textwrap
from pathlib import Path
from urllib.parse import urljoin

import requests
from git import Repo
from packageurl import PackageURL

from aboutcode import hashid
from scancodeio import VERSION
from scancodeio import settings
from scanpipe.pipes.output import JSONResultsGenerator

logger = logging.getLogger(__name__)


def is_configured():
"""Return True if the required FederatedCode settings have been set."""
if all(
[
settings.FEDERATEDCODE_GIT_ACCOUNT_URL,
settings.FEDERATEDCODE_GIT_SERVICE_TOKEN,
settings.FEDERATEDCODE_GIT_SERVICE_EMAIL,
settings.FEDERATEDCODE_GIT_SERVICE_NAME,
]
):
return True
return False


def is_available():
"""Return True if the configured Git account is available."""
if not is_configured():
return False

try:
response = requests.head(settings.FEDERATEDCODE_GIT_ACCOUNT_URL, timeout=5)
response.raise_for_status()
except requests.exceptions.RequestException as request_exception:
logger.debug(f"FederatedCode is_available() error: {request_exception}")
return False

return response.status_code == requests.codes.ok


def get_package_repository(project_purl, logger=None):
"""Return the Git repository URL and scan path for a given package."""
project_package_url = PackageURL.from_string(project_purl)

git_account_url = f'{settings.FEDERATEDCODE_GIT_ACCOUNT_URL.rstrip("/")}/'
package_base_dir = hashid.get_package_base_dir(purl=project_purl)
package_repo_name = package_base_dir.parts[0]

package_scan_path = (
package_base_dir / project_package_url.version / "scancodeio.json"
)
package_git_repo_url = urljoin(git_account_url, f"{package_repo_name}.git")

return package_git_repo_url, package_scan_path


def check_federatedcode_eligibility(project):
"""
Check if the project fulfills the following criteria for
pushing the project result to FederatedCode.
Criteria:
- FederatedCode is configured and available.
- All pipelines have completed successfully.
- Source is a download_url.
- Must have ``project_purl`` with version.
"""
if not is_configured():
raise Exception("FederatedCode is not configured.")

if not is_available():
raise Exception("FederatedCode Git account is not available.")

all_executed_pipeline_successful = all(
run.task_succeeded for run in project.runs.executed()
)

source_is_download_url = any(
source.download_url for source in project.inputsources.all()
)

if not all_executed_pipeline_successful:
raise Exception("Make sure all the pipelines has completed successfully.")

if not source_is_download_url:
raise Exception("Project input should be download_url.")

if not project.purl:
raise Exception("Missing Project PURL.")

project_package_url = PackageURL.from_string(project.purl)

if not project_package_url.version:
raise Exception("Missing version in Project PURL.")


def clone_repository(repo_url, logger=None):
"""Clone repository to local_path."""
local_dir = tempfile.mkdtemp()

authenticated_repo_url = repo_url.replace(
"https://",
f"https://{settings.FEDERATEDCODE_GIT_SERVICE_TOKEN}@",
)
repo = Repo.clone_from(url=authenticated_repo_url, to_path=local_dir, depth=1)

repo.config_writer(config_level="repository").set_value(
"user", "name", settings.FEDERATEDCODE_GIT_SERVICE_NAME
).release()

repo.config_writer(config_level="repository").set_value(
"user", "email", settings.FEDERATEDCODE_GIT_SERVICE_EMAIL
).release()

return repo


def add_scan_result(project, repo, package_scan_file, logger=None):
"""Add package scan result to the local Git repository."""
relative_scan_file_path = Path(*package_scan_file.parts[1:])

write_to = Path(repo.working_dir) / relative_scan_file_path

write_to.parent.mkdir(parents=True, exist_ok=True)
results_generator = JSONResultsGenerator(project)
with open(write_to, encoding="utf-8", mode="w") as file:
for chunk in results_generator:
file.write(chunk)

return relative_scan_file_path


def commit_and_push_changes(
repo, file_to_commit, purl, remote_name="origin", logger=None
):
"""Commit and push changes to remote repository."""
author_name = settings.FEDERATEDCODE_GIT_SERVICE_NAME
author_email = settings.FEDERATEDCODE_GIT_SERVICE_EMAIL

change_type = "Add" if file_to_commit in repo.untracked_files else "Update"
commit_message = f"""\
{change_type} scan result for {purl}
Tool: pkg:github/aboutcode-org/scancode.io@v{VERSION}
Reference: https://{settings.ALLOWED_HOSTS[0]}/
Signed-off-by: {author_name} <{author_email}>
"""

default_branch = repo.active_branch.name

repo.index.add([file_to_commit])
repo.index.commit(textwrap.dedent(commit_message))
repo.git.push(remote_name, default_branch, "--no-verify")


def delete_local_clone(repo):
"""Remove local clone."""
shutil.rmtree(repo.working_dir)
8 changes: 8 additions & 0 deletions scanpipe/templates/scanpipe/project_settings.html
Original file line number Diff line number Diff line change
@@ -26,6 +26,14 @@
{{ form.name }}
</div>
</div>
<div class="field">
<label class="label" for="{{ form.name.id_for_label }}">
PURL
</label>
<div class="control">
{{ form.purl }}
</div>
</div>
keshav-space marked this conversation as resolved.
Show resolved Hide resolved
<div class="field">
<label class="label" for="{{ form.notes.id_for_label }}">
{{ form.notes.label }}
77 changes: 77 additions & 0 deletions scanpipe/tests/pipes/test_federatedcode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# SPDX-License-Identifier: Apache-2.0
#
# http://nexb.com and https://github.com/nexB/scancode.io
# The ScanCode.io software is licensed under the Apache License version 2.0.
# Data generated with ScanCode.io is provided as-is without warranties.
# ScanCode is a trademark of nexB Inc.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
#
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/scancode.io for support and download.

import shutil
import tempfile
from pathlib import Path
from unittest.mock import patch

from django.test import TestCase

import git

from scanpipe import models
from scanpipe.pipes import federatedcode
from scanpipe.tests import make_package


class ScanPipeFederatedCodeTest(TestCase):
def setUp(self):
self.project1 = models.Project.objects.create(name="Analysis")

@patch(
"scanpipe.pipes.federatedcode.settings.FEDERATEDCODE_GIT_ACCOUNT_URL",
"https://github.com/test/",
)
def test_scanpipe_pipes_federatedcode_get_package_repository(self):
make_package(
project=self.project1,
package_url="pkg:npm/foobar@v1.2.3",
version="v.1.2.3",
)
project_purl = "pkg:npm/foobar@v1.2.3"
expected_git_repo = "https://github.com/test/aboutcode-packages-03f1.git"
expected_scan_path = "aboutcode-packages-03f1/npm/foobar/v1.2.3/scancodeio.json"
git_repo, scan_path = federatedcode.get_package_repository(
project_purl=project_purl
)

self.assertEqual(expected_git_repo, git_repo)
self.assertEqual(expected_scan_path, str(scan_path))

def test_scanpipe_pipes_federatedcode_add_scan_result(self):
local_dir = tempfile.mkdtemp()
repo = git.Repo.init(local_dir)

federatedcode.add_scan_result(
self.project1, repo, Path("repo/npm/foobar/v1.2.3/scancodeio.json")
)

self.assertIn("npm/foobar/v1.2.3/scancodeio.json", repo.untracked_files)
shutil.rmtree(repo.working_dir)

def test_scancpipe_pipes_federatedcode_delete_local_clone(self):
keshav-space marked this conversation as resolved.
Show resolved Hide resolved
local_dir = tempfile.mkdtemp()
repo = git.Repo.init(local_dir)
federatedcode.delete_local_clone(repo)

self.assertEqual(False, Path(local_dir).exists())
1 change: 1 addition & 0 deletions scanpipe/tests/test_models.py
Original file line number Diff line number Diff line change
@@ -2047,6 +2047,7 @@ def test_scanpipe_webhook_subscription_model_get_payload(self):
"project": {
"name": "Analysis",
"uuid": str(self.project1.uuid),
"purl": "",
"is_archived": False,
"notes": "",
"labels": [],
3 changes: 3 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -105,6 +105,8 @@ install_requires =
bleach==6.2.0
# Antivirus
clamd==1.0.2
# FederatedCode
aboutcode.hashid==0.1.0

[options.extras_require]
dev =
@@ -146,6 +148,7 @@ scancodeio_pipelines =
map_deploy_to_develop = scanpipe.pipelines.deploy_to_develop:DeployToDevelop
match_to_matchcode = scanpipe.pipelines.match_to_matchcode:MatchToMatchCode
populate_purldb = scanpipe.pipelines.populate_purldb:PopulatePurlDB
publish_to_federatedcode = scanpipe.pipelines.publish_to_federatedcode:PublishToFederatedCode
resolve_dependencies = scanpipe.pipelines.resolve_dependencies:ResolveDependencies
scan_codebase = scanpipe.pipelines.scan_codebase:ScanCodebase
scan_for_virus = scanpipe.pipelines.scan_for_virus:ScanForVirus