Skip to content

Commit

Permalink
Modify file level attribute packages to package_manifests
Browse files Browse the repository at this point in the history
See #2694

Signed-off-by: Ayan Sinha <[email protected]>
  • Loading branch information
AyanSinhaMahapatra committed Sep 22, 2021
1 parent 891d99e commit fbb33bd
Show file tree
Hide file tree
Showing 8 changed files with 95 additions and 91 deletions.
2 changes: 1 addition & 1 deletion src/packagedcode/maven.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def get_package_root(cls, manifest_resource, codebase):
if manifest_resource.name.endswith(('pom.xml', '.pom',)):
# the root is either the parent or further up for poms stored under
# a META-INF dir
package_data = manifest_resource.packages
package_data = manifest_resource.package_manifests
if not package_data:
return manifest_resource
package_data = package_data[0]
Expand Down
16 changes: 8 additions & 8 deletions src/packagedcode/plugin_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class PackageScanner(ScanPlugin):
"""

resource_attributes = {}
resource_attributes['packages'] = attr.ib(default=attr.Factory(list), repr=False)
resource_attributes['package_manifests'] = attr.ib(default=attr.Factory(list), repr=False)

sort_order = 6

Expand Down Expand Up @@ -78,8 +78,8 @@ def get_scanner(self, **kwargs):
"""
Return a scanner callable to scan a Resource for packages.
"""
from scancode.api import get_package_info
return get_package_info
from scancode.api import get_package_manifest_info
return get_package_manifest_info

def process_codebase(self, codebase, **kwargs):
"""
Expand All @@ -102,25 +102,25 @@ def set_packages_root(resource, codebase):
if not resource.is_file:
return

packages = resource.packages
if not packages:
package_manifests = resource.package_manifests
if not package_manifests:
return
# NOTE: we are dealing with a single file therefore there should be only be
# a single package detected. But some package manifests can document more
# than one package at a time such as multiple arches/platforms for a gempsec
# or multiple sub package (with "%package") in an RPM .spec file.

modified = False
for package in packages:
package_instance = get_package_instance(package)
for package_manifest in package_manifests:
package_instance = get_package_instance(package_manifest)
package_root = package_instance.get_package_root(resource, codebase)
if not package_root:
# this can happen if we scan a single resource that is a package package
continue
# What if the target resource (e.g. a parent) is the root and we are in stripped root mode?
if package_root.is_root and codebase.strip_root:
continue
package['root_path'] = package_root.path
package_manifest['root_path'] = package_root.path
modified = True

if modified:
Expand Down
16 changes: 8 additions & 8 deletions src/packagedcode/recognize.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ def logger_debug(*args):
"""


def recognize_packages(location):
def recognize_package_manifests(location):
"""
Return a list of Package object if any packages were recognized for this
Return a list of Package objects if any package_manifests were recognized for this
`location`, or None if there were no Packages found. Raises Exceptions on errors.
"""

Expand All @@ -67,7 +67,7 @@ def recognize_packages(location):
'fname:', filename, 'ext:', extension,
)

recognized_packages = []
recognized_package_manifests = []
for package_type in PACKAGE_TYPES:
# Note: default to True if there is nothing to match against
metafiles = package_type.metafiles
Expand All @@ -86,8 +86,8 @@ def recognize_packages(location):
'recognize_packages: recognized.license_expression:',
recognized.license_expression,
)
recognized_packages.append(recognized)
return recognized_packages
recognized_package_manifests.append(recognized)
return recognized_package_manifests

type_matched = False
if package_type.filetypes:
Expand Down Expand Up @@ -124,19 +124,19 @@ def recognize_packages(location):
if TRACE:
logger_debug('recognize_packages: recognized', recognized)

recognized_packages.append(recognized)
recognized_package_manifests.append(recognized)

except NotImplementedError:
# build a plain package if recognize is not yet implemented
recognized = package_type()
if TRACE:
logger_debug('recognize_packages: recognized', recognized)

recognized_packages.append(recognized)
recognized_package_manifests.append(recognized)

if SCANCODE_DEBUG_PACKAGE_API:
raise

return recognized_packages
return recognized_package_manifests

if TRACE: logger_debug('recognize_packages: no match for type:', package_type)
15 changes: 9 additions & 6 deletions src/scancode/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,19 +287,22 @@ def _licenses_data_from_match(
SCANCODE_DEBUG_PACKAGE_API = os.environ.get('SCANCODE_DEBUG_PACKAGE_API', False)


def get_package_info(location, **kwargs):
def get_package_manifest_info(location, **kwargs):
"""
Return a mapping of package manifest information detected in the
file at `location`.
Note that all exceptions are caught if there are any errors while parsing a
package manifest.
"""
from packagedcode.recognize import recognize_packages
from packagedcode.recognize import recognize_package_manifests
try:
recognized_packages = recognize_packages(location)
if recognized_packages:
return dict(packages=[package.to_dict() for package in recognized_packages])
recognized_package_manifests = recognize_package_manifests(location)
if recognized_package_manifests:
return dict(package_manifests=[
package_manifest.to_dict()
for package_manifest in recognized_package_manifests
])
except Exception as e:
if TRACE:
logger.error('get_package_info: {}: Exception: {}'.format(location, e))
Expand All @@ -310,7 +313,7 @@ def get_package_info(location, **kwargs):
# attention: we are swallowing ALL exceptions here!
pass

return dict(packages=[])
return dict(package_manifests=[])


def get_file_info(location, **kwargs):
Expand Down
12 changes: 6 additions & 6 deletions src/summarycode/classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,25 +169,25 @@ def process_codebase(self, codebase, classify, **kwargs):

root_path = codebase.root.path

has_packages = hasattr(codebase.root, 'packages')
if not has_packages:
has_package_manifests = hasattr(codebase.root, 'package_manifests')
if not has_package_manifests:
# FIXME: this is not correct... we may still have cases where this
# is wrong: e.g. a META-INF directory and we may not have a package
return


for resource in codebase.walk(topdown=True):
packages_info = resource.packages or []
package_manifests_info = resource.package_manifests or []

if not packages_info:
if not package_manifests_info:
continue
if not resource.has_children():
continue

descendants = None

for package_info in packages_info:
package_class = get_package_class(package_info)
for package_manifest_info in package_manifests_info:
package_class = get_package_class(package_manifest_info)
extra_root_dirs = package_class.extra_root_dirs()
extra_key_files = package_class.extra_key_files()
if TRACE:
Expand Down
59 changes: 30 additions & 29 deletions src/summarycode/plugin_consolidate.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,21 +98,21 @@ def to_dict(self, **kwargs):


@attr.s
class ConsolidatedPackage(object):
package = attr.ib()
class ConsolidatedPackageManifest(object):
package_manifest = attr.ib()
consolidation = attr.ib()

def to_dict(self, **kwargs):
package = self.package.to_dict()
package.update(self.consolidation.to_dict())
return package
package_manifest = self.package_manifest.to_dict()
package_manifest.update(self.consolidation.to_dict())
return package_manifest


@post_scan_impl
class Consolidator(PostScanPlugin):
"""
A ScanCode post-scan plugin to return consolidated components and consolidated
packages for different types of codebase summarization.
package_manifests for different types of codebase summarization.
A consolidated component is a group of Resources that have the same origin.
Currently, a ConsolidatedComponent is created for each detected copyright holder
Expand All @@ -127,7 +127,7 @@ class Consolidator(PostScanPlugin):
"""
codebase_attributes = dict(
consolidated_components=attr.ib(default=attr.Factory(list)),
consolidated_packages=attr.ib(default=attr.Factory(list))
consolidated_package_manifests=attr.ib(default=attr.Factory(list))
)

resource_attributes = dict(
Expand All @@ -140,9 +140,10 @@ class Consolidator(PostScanPlugin):
PluggableCommandLineOption(('--consolidate',),
is_flag=True, default=False,
help='Group resources by Packages or license and copyright holder and '
'return those groupings as a list of consolidated packages and '
'return those groupings as a list of consolidated package_manifests and '
'a list of consolidated components. '
'This requires the scan to have/be run with the copyright, license, and package options active',
'This requires the scan to have/be run with the copyright, license, and '
'package options active',
help_group=POST_SCAN_GROUP
)
]
Expand All @@ -151,12 +152,12 @@ def is_enabled(self, consolidate, **kwargs):
return consolidate

def process_codebase(self, codebase, **kwargs):
# Collect ConsolidatedPackages and ConsolidatedComponents
# Collect ConsolidatedPackageManifests and ConsolidatedComponents
# TODO: Have a "catch-all" Component for the things that we haven't grouped
consolidations = []
root = codebase.root
if hasattr(root, 'packages') and hasattr(root, 'copyrights') and hasattr(root, 'licenses'):
consolidations.extend(get_consolidated_packages(codebase))
if hasattr(root, 'package_manifests') and hasattr(root, 'copyrights') and hasattr(root, 'licenses'):
consolidations.extend(get_consolidated_package_manifests(codebase))
if hasattr(root, 'copyrights') and hasattr(root, 'licenses'):
consolidations.extend(get_holders_consolidated_components(codebase))

Expand All @@ -166,24 +167,24 @@ def process_codebase(self, codebase, **kwargs):
# Sort consolidations by holders for consistent ordering before enumeration
consolidations = sorted(consolidations, key=lambda c: '_'.join(h.key for h in c.consolidation.core_holders))

# Add ConsolidatedPackages and ConsolidatedComponents to top-level codebase attributes
codebase.attributes.consolidated_packages = consolidated_packages = []
# Add ConsolidatedPackageManifests and ConsolidatedComponents to top-level codebase attributes
codebase.attributes.consolidated_package_manifests = consolidated_package_manifests = []
codebase.attributes.consolidated_components = consolidated_components = []
identifier_counts = Counter()
for index, c in enumerate(consolidations, start=1):
# Skip consolidation if it does not have any Files
if c.consolidation.files_count == 0:
continue
if isinstance(c, ConsolidatedPackage):
if isinstance(c, ConsolidatedPackageManifest):
# We use the purl as the identifier for ConsolidatedPackages
purl = c.package.purl
purl = c.package_manifest.purl
identifier_counts[purl] += 1
identifier = python_safe_name('{}_{}'.format(purl, identifier_counts[purl]))
c.consolidation.identifier = identifier
for resource in c.consolidation.resources:
resource.consolidated_to.append(identifier)
resource.save(codebase)
consolidated_packages.append(c.to_dict())
consolidated_package_manifests.append(c.to_dict())
elif isinstance(c, ConsolidatedComponent):
consolidation_identifier = c.consolidation.identifier
if consolidation_identifier:
Expand Down Expand Up @@ -218,20 +219,20 @@ def process_codebase(self, codebase, **kwargs):
resource.save(codebase)


def get_consolidated_packages(codebase):
def get_consolidated_package_manifests(codebase):
"""
Yield a ConsolidatedPackage for each detected package in the codebase
Yield a ConsolidatedPackageManifest for each detected package_manifest in the codebase
"""
for resource in codebase.walk(topdown=False):
for package_data in resource.packages:
package = get_package_instance(package_data)
package_root = package.get_package_root(resource, codebase)
for package_manifest_data in resource.package_manifests:
package_manifest = get_package_instance(package_manifest_data)
package_root = package_manifest.get_package_root(resource, codebase)
package_root.extra_data['package_root'] = True
package_root.save(codebase)
is_build_file = isinstance(package, BaseBuildManifestPackage)
package_resources = list(package.get_package_resources(package_root, codebase))
package_license_expression = package.license_expression
package_copyright = package.copyright
is_build_file = isinstance(package_manifest, BaseBuildManifestPackage)
package_resources = list(package_manifest.get_package_resources(package_root, codebase))
package_license_expression = package_manifest.license_expression
package_copyright = package_manifest.copyright

package_holders = []
if package_copyright:
Expand Down Expand Up @@ -273,14 +274,14 @@ def get_consolidated_packages(codebase):
resources=package_resources,
)
if is_build_file:
c.identifier = package.name
c.identifier = package_manifest.name
yield ConsolidatedComponent(
type='build',
consolidation=c
)
else:
yield ConsolidatedPackage(
package=package,
yield ConsolidatedPackageManifest(
package_manifest=package_manifest,
consolidation=c
)

Expand Down
16 changes: 8 additions & 8 deletions src/summarycode/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,31 +166,31 @@ def compute_license_score(codebase):

def get_declared_license_keys(codebase):
"""
Return a list of declared license keys found in packages and key files.
Return a list of declared license keys found in package_manifests and key files.
"""
return (
get_declared_license_keys_in_key_files(codebase) +
get_declared_license_keys_in_packages(codebase)
get_declared_license_keys_in_package_manifests(codebase)
)


def get_declared_license_keys_in_packages(codebase):
def get_declared_license_keys_in_package_manifests(codebase):
"""
Return a list of declared license keys found in packages.
Return a list of declared license keys found in package_manifests.
A package manifest (such as Maven POM file or an npm package.json file)
contains structured declared license information. This is further normalized
as a license_expression. We extract the list of licenses from the normalized
license expressions.
"""
packages = chain.from_iterable(
getattr(res, 'packages', []) or []
package_manifests = chain.from_iterable(
getattr(res, 'package_manifests', []) or []
for res in codebase.walk(topdown=True))

licensing = Licensing()
detected_good_licenses = []
for package in packages:
expression = package.get('license_expression')
for package_manifest in package_manifests:
expression = package_manifest.get('license_expression')
if expression:
exp = licensing.parse(
expression, validate=False, strict=False, simple=True)
Expand Down
Loading

0 comments on commit fbb33bd

Please sign in to comment.