Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Report packages at top level with file level package_manifests #2710

Merged
merged 9 commits into from
Oct 8, 2021
22 changes: 21 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ Changelog
Important API changes:
~~~~~~~~~~~~~~~~~~~~~~~~

- Main package API function `get_package_infos` is now deprecated, and is replaced by
`get_package_manifests`.

- The data structure of the JSON output has changed for copyrights, authors
and holders: we now use proper name for attributes and not a generic "value".

Expand Down Expand Up @@ -54,7 +57,14 @@ Package detection:
- OpenWRT packages.
- Yocto/BitBake .bb recipes.

- We now support track the files of Package types.
- Major changes in packages detection and reporting, codebase-level attribute `packages`
with one or more package_manifests and files for the packages will be reported.
The specific changes made are:

- The resource level attribute `packages` has been renamed to `package_manifests`,
as these are really package manifests that are being detected.
- A new codebase level attribute `packages` has been added which contains package
instances created from package_manifests detected in the codebase.


Outputs:
Expand All @@ -63,6 +73,16 @@ Outputs:
- There is a new CycloneDX 1.2 output as XML and JSON.


Output version
--------------

Scancode Data Output Version is now 2.0.0.

Changes:

- rename resource level attribute `packages` to `package_manifests`.
- add codebase level attribute `packages`.


30.1.0 - 2021-09-25
--------------------
Expand Down
2 changes: 1 addition & 1 deletion src/formattedcode/output_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def collect_keys(mapping, key_group):
collect_keys(url_info, 'url')
yield url_info

for package in scanned_file.get('packages', []):
for package in scanned_file.get('package_manifests', []):
flat = flatten_package(package, path)
collect_keys(flat, 'package')
yield flat
Expand Down
4 changes: 2 additions & 2 deletions src/formattedcode/output_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def generate_output(results, version, template):

LICENSES = 'licenses'
COPYRIGHTS = 'copyrights'
PACKAGES = 'packages'
PACKAGES = 'package_manifests'

# Create a flattened data dict keyed by path
for scanned_file in results:
Expand Down Expand Up @@ -207,7 +207,7 @@ def generate_output(results, version, template):
files = {
'license_copyright': converted,
'infos': converted_infos,
'packages': converted_packages
'package_manifests': converted_packages
}

return template.generate(files=files, licenses=licenses, version=version)
Expand Down
4 changes: 2 additions & 2 deletions src/formattedcode/templates/html/template.html
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@
</table>
{% endif %}

{% if files.packages %}
{% if files.package_manifests %}
<table>
<caption>Package Information</caption>
<thead>
Expand All @@ -245,7 +245,7 @@
</tr>
</thead>
<tbody>
{% for path, data in files.packages.items() %}
{% for path, data in files.package_manifests.items() %}
{% for row in data %}
<tr>
<td>{{ path }}</td>
Expand Down
2 changes: 1 addition & 1 deletion src/packagedcode/maven.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def get_package_root(cls, manifest_resource, codebase):
if manifest_resource.name.endswith(('pom.xml', '.pom',)):
# the root is either the parent or further up for poms stored under
# a META-INF dir
package_data = manifest_resource.packages
package_data = manifest_resource.package_manifests
if not package_data:
return manifest_resource
package_data = package_data[0]
Expand Down
33 changes: 25 additions & 8 deletions src/packagedcode/plugin_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ class PackageScanner(ScanPlugin):
"""

resource_attributes = {}
resource_attributes['packages'] = attr.ib(default=attr.Factory(list), repr=False)
codebase_attributes = {}
resource_attributes['package_manifests'] = attr.ib(default=attr.Factory(list), repr=False)
codebase_attributes['packages'] = attr.ib(default=attr.Factory(list), repr=False)

sort_order = 6

Expand Down Expand Up @@ -78,13 +80,15 @@ def get_scanner(self, **kwargs):
"""
Return a scanner callable to scan a Resource for packages.
"""
from scancode.api import get_package_info
return get_package_info
from scancode.api import get_package_manifests
return get_package_manifests

def process_codebase(self, codebase, **kwargs):
"""
Set the package root given a package "type".
"""
create_packages_from_manifests(codebase, **kwargs)

if codebase.has_single_resource:
# What if we scanned a single file and we do not have a root proper?
return
Expand All @@ -93,6 +97,19 @@ def process_codebase(self, codebase, **kwargs):
set_packages_root(resource, codebase)


def create_packages_from_manifests(codebase, **kwargs):
"""
Create package instances from package manifests present in the codebase.
"""
package_manifests = []

for resource in codebase.walk(topdown=False):
if resource.package_manifests:
package_manifests.extend(resource.package_manifests)

codebase.attributes.packages.extend(package_manifests)


def set_packages_root(resource, codebase):
"""
Set the root_path attribute as the path to the root Resource for a given
Expand All @@ -102,25 +119,25 @@ def set_packages_root(resource, codebase):
if not resource.is_file:
return

packages = resource.packages
if not packages:
package_manifests = resource.package_manifests
if not package_manifests:
return
# NOTE: we are dealing with a single file therefore there should be only be
# a single package detected. But some package manifests can document more
# than one package at a time such as multiple arches/platforms for a gempsec
# or multiple sub package (with "%package") in an RPM .spec file.

modified = False
for package in packages:
package_instance = get_package_instance(package)
for package_manifest in package_manifests:
package_instance = get_package_instance(package_manifest)
package_root = package_instance.get_package_root(resource, codebase)
if not package_root:
# this can happen if we scan a single resource that is a package package
continue
# What if the target resource (e.g. a parent) is the root and we are in stripped root mode?
if package_root.is_root and codebase.strip_root:
continue
package['root_path'] = package_root.path
package_manifest['root_path'] = package_root.path
modified = True

if modified:
Expand Down
16 changes: 8 additions & 8 deletions src/packagedcode/recognize.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ def logger_debug(*args):
"""


def recognize_packages(location):
def recognize_package_manifests(location):
"""
Return a list of Package object if any packages were recognized for this
Return a list of Package objects if any package_manifests were recognized for this
`location`, or None if there were no Packages found. Raises Exceptions on errors.
"""

Expand All @@ -67,7 +67,7 @@ def recognize_packages(location):
'fname:', filename, 'ext:', extension,
)

recognized_packages = []
recognized_package_manifests = []
for package_type in PACKAGE_TYPES:
# Note: default to True if there is nothing to match against
metafiles = package_type.metafiles
Expand All @@ -86,8 +86,8 @@ def recognize_packages(location):
'recognize_packages: recognized.license_expression:',
recognized.license_expression,
)
recognized_packages.append(recognized)
return recognized_packages
recognized_package_manifests.append(recognized)
return recognized_package_manifests

type_matched = False
if package_type.filetypes:
Expand Down Expand Up @@ -124,19 +124,19 @@ def recognize_packages(location):
if TRACE:
logger_debug('recognize_packages: recognized', recognized)

recognized_packages.append(recognized)
recognized_package_manifests.append(recognized)

except NotImplementedError:
# build a plain package if recognize is not yet implemented
recognized = package_type()
if TRACE:
logger_debug('recognize_packages: recognized', recognized)

recognized_packages.append(recognized)
recognized_package_manifests.append(recognized)

if SCANCODE_DEBUG_PACKAGE_API:
raise

return recognized_packages
return recognized_package_manifests

if TRACE: logger_debug('recognize_packages: no match for type:', package_type)
52 changes: 44 additions & 8 deletions src/scancode/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,32 +287,68 @@ def _licenses_data_from_match(
SCANCODE_DEBUG_PACKAGE_API = os.environ.get('SCANCODE_DEBUG_PACKAGE_API', False)


def get_package_info(location, **kwargs):
def _get_package_manifests(location):
"""
Return a mapping of package manifest information detected in the
file at `location`.
Return a mapping of package manifest information detected in the file at `location`.

Note that all exceptions are caught if there are any errors while parsing a
package manifest.
"""
from packagedcode.recognize import recognize_packages
from packagedcode.recognize import recognize_package_manifests
try:
recognized_packages = recognize_packages(location)
if recognized_packages:
return dict(packages=[package.to_dict() for package in recognized_packages])
recognized_package_manifests = recognize_package_manifests(location)
if recognized_package_manifests:
return recognized_package_manifests
except Exception as e:
if TRACE:
logger.error('get_package_info: {}: Exception: {}'.format(location, e))
logger.error('_get_package_manifests: {}: Exception: {}'.format(location, e))

if SCANCODE_DEBUG_PACKAGE_API:
raise
else:
# attention: we are swallowing ALL exceptions here!
pass


def get_package_info(location, **kwargs):
"""
Return a mapping of package information detected in the file at `location`.

This API function is DEPRECATED, use `get_package_manifests` instead.
"""
import warnings
warnings.warn(
"`get_package_info` is deprecated. Use `get_package_manifests` instead.",
DeprecationWarning,
stacklevel=1
)

recognized_packages = _get_package_manifests(location)

if recognized_packages:
return dict(packages=[
packages.to_dict()
for packages in recognized_packages
])

return dict(packages=[])


def get_package_manifests(location, **kwargs):
"""
Return a mapping of package manifest information detected in the file at `location`.
"""
recognized_package_manifests = _get_package_manifests(location)

if recognized_package_manifests:
return dict(package_manifests=[
package_manifests.to_dict()
for package_manifests in recognized_package_manifests
])

return dict(package_manifests=[])


def get_file_info(location, **kwargs):
"""
Return a mapping of file information collected for the file at `location`.
Expand Down
2 changes: 1 addition & 1 deletion src/scancode_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def _create_dir(location):

# See https://github.com/nexB/scancode-toolkit/issues/2653 for more information
# on the data format version
__output_format_version__ = '1.0.0'
__output_format_version__ = '2.0.0'

#
spdx_license_list_version = '3.14'
Expand Down
12 changes: 6 additions & 6 deletions src/summarycode/classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,25 +169,25 @@ def process_codebase(self, codebase, classify, **kwargs):

root_path = codebase.root.path

has_packages = hasattr(codebase.root, 'packages')
if not has_packages:
has_package_manifests = hasattr(codebase.root, 'package_manifests')
if not has_package_manifests:
# FIXME: this is not correct... we may still have cases where this
# is wrong: e.g. a META-INF directory and we may not have a package
return


for resource in codebase.walk(topdown=True):
packages_info = resource.packages or []
package_manifests_info = resource.package_manifests or []

if not packages_info:
if not package_manifests_info:
continue
if not resource.has_children():
continue

descendants = None

for package_info in packages_info:
package_class = get_package_class(package_info)
for package_manifest_info in package_manifests_info:
package_class = get_package_class(package_manifest_info)
extra_root_dirs = package_class.extra_root_dirs()
extra_key_files = package_class.extra_key_files()
if TRACE:
Expand Down
2 changes: 1 addition & 1 deletion src/summarycode/summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ def summarize_codebase_by_facet(codebase, **kwargs):
def add_files(packages, resource):
"""
Update in-place every package mapping in the `packages` list by updating or
creatig the the "files" attribute from the `resource`. Yield back the
creating the the "files" attribute from the `resource`. Yield back the
packages.
"""
for package in packages:
Expand Down
Loading