Merge branch 'main' into exclusion-framework-ruby

Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
aboutcode-org · Feb 3, 2025 · d9afb6c · d9afb6c
2 parents a185d7d + d9f50b1
commit d9afb6c
Show file tree

Hide file tree

Showing 62 changed files with 1,278 additions and 316 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -1,7 +1,17 @@
 Changelog
 =========
 
-v34.9.4 (unreleased)
+v34.9.5 (unreleased)
+--------------------
+
+- Add support for the XLSX report in REST API.
+  https://github.com/aboutcode-org/scancode.io/issues/1524
+
+- Add options to the Project reset action.
+  Also, the Project labels are kept during reset.
+  https://github.com/aboutcode-org/scancode.io/issues/1568
+
+v34.9.4 (2025-01-21)
 --------------------
 
 - Improve Project list page navigation.
@@ -47,6 +57,27 @@ v34.9.4 (unreleased)
   sheets with a dedicated VULNERABILITIES sheet.
   https://github.com/aboutcode-org/scancode.io/issues/1519
 
+- Keep the InputSource objects when using ``reset`` on Projects.
+  https://github.com/aboutcode-org/scancode.io/issues/1536
+
+- Add a ``report`` management command that allows to generate XLSX reports for
+  multiple projects at once using labels and searching by project name.
+  https://github.com/aboutcode-org/scancode.io/issues/1524
+
+- Add the ability to "select across" in Projects list when using the "select all"
+  checkbox on paginated list.
+  https://github.com/aboutcode-org/scancode.io/issues/1524
+
+- Update scancode-toolkit to v32.3.2. See CHANGELOG for updates:
+  https://github.com/aboutcode-org/scancode-toolkit/releases/tag/v32.3.2
+  https://github.com/aboutcode-org/scancode-toolkit/releases/tag/v32.3.1
+
+- Adds  a project settings ``scan_max_file_size`` and a scancode.io settings field
+  ``SCANCODEIO_SCAN_MAX_FILE_SIZE`` to skip scanning files above a certain
+  file size (in bytes) as a temporary fix for large memory spikes while
+  scanning for licenses in certain large files.
+  https://github.com/aboutcode-org/scancode-toolkit/issues/3711
+
 v34.9.3 (2024-12-31)
 --------------------
 

diff --git a/docs/application-settings.rst b/docs/application-settings.rst
@@ -165,6 +165,18 @@ The value unit is second and is defined as an integer::
 
 Default: ``120`` (2 minutes)
 
+SCANCODEIO_SCAN_MAX_FILE_SIZE
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Maximum file size allowed for a file to be scanned when scanning a codebase.
+
+The value unit is bytes and is defined as an integer, see the following
+example of setting this at 5 MB::
+
+    SCANCODEIO_SCAN_MAX_FILE_SIZE=5242880
+
+Default: ``None`` (all files will be scanned)
+
 .. _scancodeio_settings_pipelines_dirs:
 
 SCANCODEIO_PIPELINES_DIRS

diff --git a/docs/command-line-interface.rst b/docs/command-line-interface.rst
@@ -68,6 +68,7 @@ ScanPipe's own commands are listed under the ``[scanpipe]`` section::
       list-project
       output
       purldb-scan-worker
+      report
       reset-project
       run
       show-pipeline
@@ -174,6 +175,10 @@ Required arguments (one of):
   | project-2      | pkg:deb/debian/[email protected]      |
   +----------------+---------------------------------+
 
+.. tip::
+    In place of a local path, a download URL to the CSV file is supported for the
+    ``--input-list`` argument.
+
 Optional arguments:
 
 - ``--project-name-suffix`` Optional custom suffix to append to project names.
@@ -194,14 +199,15 @@ Optional arguments:
 Example: Processing Multiple Docker Images
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Assume multiple Docker images are available in a directory named ``local-data/`` on
+Suppose you have multiple Docker images stored in a directory named ``local-data/`` on
 the host machine.
-To process these images with the ``analyze_docker_image`` pipeline using asynchronous
-execution::
+To process these images using the ``analyze_docker_image`` pipeline with asynchronous
+execution, you can use this command::
 
     $ docker compose run --rm \
-        --volume local-data/:/input-data:ro \
-        web scanpipe batch-create input-data/ \
+        --volume local-data/:/input-data/:ro \
+        web scanpipe batch-create
+            --input-directory /input-data/ \
             --pipeline analyze_docker_image \
             --label "Docker" \
             --execute --async
@@ -224,6 +230,19 @@ Each Docker image in the ``local-data/`` directory will result in the creation o
 project with the specified pipeline (``analyze_docker_image``) executed by worker
 services.
 
+Example: Processing Multiple Develop to Deploy Mapping
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+To process an input list CSV file with the ``map_deploy_to_develop`` pipeline using
+asynchronous execution::
+
+    $ docker compose run --rm \
+        web scanpipe batch-create \
+            --input-list https://url/input_list.csv \
+            --pipeline map_deploy_to_develop \
+            --label "d2d_mapping" \
+            --execute --async
+
 `$ scanpipe list-pipeline [--verbosity {0,1,2,3}]`
 --------------------------------------------------
 
@@ -375,6 +394,46 @@ your outputs on the host machine when running with Docker.
 .. tip:: To specify a CycloneDX spec version (default to latest), use the syntax
   ``cyclonedx:VERSION`` as format value. For example: ``--format cyclonedx:1.5``.
 
+.. _cli_report:
+
+`$ scanpipe report --model MODEL`
+---------------------------------
+
+Generates an XLSX report of selected projects based on the provided criteria.
+
+Required arguments:
+
+- ``--model {package,dependency,resource,relation,message,todo}``
+  Specifies the model to include in the XLSX report. Available choices are based on
+  predefined object types.
+
+Optional arguments:
+
+- ``--output-directory OUTPUT_DIRECTORY``
+  The path to the directory where the report file will be created. If not provided,
+  the report file will be created in the current working directory.
+
+- ``--search SEARCH``
+  Filter projects by searching for the provided string in their name.
+
+- ``--label LABELS``
+  Filter projects by the provided label(s). Multiple labels can be provided by using
+  this argument multiple times.
+
+.. note::
+    Either ``--label`` or ``--search`` must be provided to select projects.
+
+Example usage:
+
+1. Generate a report for all projects tagged with "d2d" and include the **TODOS**
+worksheet::
+
+   $ scanpipe report --model todo --label d2d
+
+2. Generate a report for projects whose names contain the word "audit" and include the
+**PACKAGES** worksheet::
+
+   $ scanpipe report --model package --search audit
 
 .. _cli_check_compliance:
 

diff --git a/docs/installation.rst b/docs/installation.rst
@@ -35,6 +35,10 @@ Build the Image
 ScanCode.io is distributed with ``Dockerfile`` and ``docker-compose.yml`` files
 required for the creation of the Docker image.
 
+.. note::
+    On **Windows**, ensure to use the **wsl** (Windows Subsystem for Linux) for
+    the installation process.
+
 .. warning:: On **Windows**, ensure that git ``autocrlf`` configuration is set to
    ``false`` before cloning the repository::
 

diff --git a/docs/project-configuration.rst b/docs/project-configuration.rst
@@ -54,6 +54,7 @@ Content of a ``scancode-config.yml`` file:
     ignored_patterns:
      - '*.tmp'
      - 'tests/*'
+    scan_max_file_size: 5242880
     ignored_dependency_scopes:
      - package_type: npm
        scope: devDependencies
@@ -86,6 +87,24 @@ product_version
 
 The product version of this project, as specified within the DejaCode application.
 
+scan_max_file_size
+^^^^^^^^^^^^^^^^^^
+
+Maximum file size allowed for a file to be scanned when scanning a codebase.
+
+The value unit is bytes and is defined as an integer, see the following
+example of setting this at 5 MB::
+
+    scan_max_file_size=5242880
+
+Default is ``None``, in which case all files will be scanned.
+
+.. note::
+    This is the same as the scancodeio setting ``SCANCODEIO_SCAN_MAX_FILE_SIZE``
+    set using the .env file, and the project setting ``scan_max_file_size`` takes
+    precedence over the scancodeio setting ``SCANCODEIO_SCAN_MAX_FILE_SIZE``.
+
+
 ignored_patterns
 ^^^^^^^^^^^^^^^^
 

diff --git a/docs/rest-api.rst b/docs/rest-api.rst
@@ -587,3 +587,40 @@ This action deletes a "not started" or "queued" pipeline run.
     {
         "status": "Pipeline pipeline_name deleted."
     }
+
+XLSX Report
+-----------
+
+Generates an XLSX report for selected projects based on specified criteria. The
+``model`` query parameter is required to determine the type of data to include in the
+report.
+
+Endpoint:
+``GET /api/projects/report/?model=MODEL``
+
+Parameters:
+
+- ``model``: Defines the type of data to include in the report.
+  Accepted values: ``package``, ``dependency``, ``resource``, ``relation``, ``message``,
+  ``todo``.
+
+.. note::
+
+   You can apply any available filters to select the projects to include in the
+   report. Filters can be based on project attributes, such as a substring in the
+   name or specific labels.
+
+Example Usage:
+
+1. Generate a report for projects tagged with "d2d" and include the ``TODOS`` worksheet:
+
+   .. code-block::
+
+      GET /api/projects/report/?model=todo&label=d2d
+
+2. Generate a report for projects whose names contain "audit" and include the
+   ``PACKAGES`` worksheet:
+
+   .. code-block::
+
+      GET /api/projects/report/?model=package&name__contains=audit
diff --git a/scancodeio/__init__.py b/scancodeio/__init__.py
@@ -28,7 +28,7 @@
 
 import git
 
-VERSION = "34.9.3"
+VERSION = "34.9.4"
 
 PROJECT_DIR = Path(__file__).resolve().parent
 ROOT_DIR = PROJECT_DIR.parent

diff --git a/scancodeio/settings.py b/scancodeio/settings.py
@@ -100,6 +100,9 @@
 # Default to 2 minutes.
 SCANCODEIO_SCAN_FILE_TIMEOUT = env.int("SCANCODEIO_SCAN_FILE_TIMEOUT", default=120)
 
+# Default to None which scans all files
+SCANCODEIO_SCAN_MAX_FILE_SIZE = env.int("SCANCODEIO_SCAN_MAX_FILE_SIZE", default=None)
+
 # List views pagination, controls the number of items displayed per page.
 # Syntax in .env: SCANCODEIO_PAGINATE_BY=project=10,project_error=10
 SCANCODEIO_PAGINATE_BY = env.dict(

diff --git a/scancodeio/static/main.js b/scancodeio/static/main.js
@@ -232,7 +232,8 @@ function setupSelectCheckbox() {
       updateButtonAndDropdownState();
 
       // Check if all row checkboxes are checked and update the "Select All" checkbox accordingly
-      selectAllCheckbox.checked = Array.from(rowCheckboxes).every((cb) => cb.checked);
+      const allRowCheckboxesChecked = Array.from(rowCheckboxes).every((cb) => cb.checked);
+      selectAllCheckbox.checked = allRowCheckboxesChecked;
     });
   });
 

diff --git a/scanpipe/api/views.py b/scanpipe/api/views.py
@@ -52,6 +52,7 @@
 from scanpipe.models import Project
 from scanpipe.models import Run
 from scanpipe.models import RunInProgressError
+from scanpipe.pipes import filename_now
 from scanpipe.pipes import output
 from scanpipe.pipes.compliance import get_project_compliance_alerts
 from scanpipe.views import project_results_json_response
@@ -79,6 +80,11 @@ class ProjectFilterSet(django_filters.rest_framework.FilterSet):
         method="filter_names",
     )
     uuid = django_filters.CharFilter()
+    label = django_filters.CharFilter(
+        label="Label",
+        field_name="labels__slug",
+        distinct=True,
+    )
 
     class Meta:
         model = Project
@@ -90,6 +96,7 @@ class Meta:
             "names",
             "uuid",
             "is_archived",
+            "label",
         ]
 
     def filter_names(self, qs, name, value):
@@ -195,6 +202,40 @@ def pipelines(self, request, *args, **kwargs):
         ]
         return Response(pipeline_data)
 
+    @action(detail=False)
+    def report(self, request, *args, **kwargs):
+        project_qs = self.filter_queryset(self.get_queryset())
+
+        model_choices = list(output.object_type_to_model_name.keys())
+        model = request.GET.get("model")
+        if not model:
+            message = {
+                "error": (
+                    "Specifies the model to include in the XLSX report. "
+                    "Using: ?model=MODEL"
+                ),
+                "choices": ", ".join(model_choices),
+            }
+            return Response(message, status=status.HTTP_400_BAD_REQUEST)
+
+        if model not in model_choices:
+            message = {
+                "error": f"{model} is not on of the valid choices",
+                "choices": ", ".join(model_choices),
+            }
+            return Response(message, status=status.HTTP_400_BAD_REQUEST)
+
+        output_file = output.get_xlsx_report(
+            project_qs=project_qs,
+            model_short_name=model,
+        )
+        output_file.seek(0)
+        return FileResponse(
+            output_file,
+            filename=f"scancodeio-report-{filename_now()}.xlsx",
+            as_attachment=True,
+        )
+
     def get_filtered_response(
         self, request, queryset, filterset_class, serializer_class
     ):