Skip to content

Commit

Permalink
Improved Maven POM detection
Browse files Browse the repository at this point in the history
 * rename class to MavenPomPackage for clarity
 * fix the metafiles and the recognition for #631
 * add additional api and recognize tests
 * clean up the API doc and otehr minor refactorings

Signed-off-by: Philippe Ombredanne <[email protected]>
  • Loading branch information
pombredanne committed Jun 23, 2017
1 parent ce2eff8 commit a79dd9d
Show file tree
Hide file tree
Showing 511 changed files with 755 additions and 566 deletions.
2 changes: 1 addition & 1 deletion src/packagedcode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
models.JavaJar,
models.JavaEar,
models.JavaWar,
maven.MavenJar,
maven.MavenPomPackage,
models.IvyJar,
models.JBossSar,
models.Axis2Mar,
Expand Down
12 changes: 7 additions & 5 deletions src/packagedcode/maven.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,13 @@
Attempts to resolve Maven properties when possible.
"""


class MavenJar(models.JavaJar):
metafiles = ('META-INF/**/*.pom', 'pom.xml',)
class MavenPomPackage(models.Package):
metafiles = ('.pom', 'pom.xml',)
extensions = ('.pom','.xml', )
repo_types = (models.repo_maven,)
type = models.StringType(default='Apache Maven')
type = models.StringType(default='Apache Maven POM')
packaging = models.StringType(default=models.as_archive)
primary_language = models.StringType(default='Java')

@classmethod
def recognize(cls, location):
Expand Down Expand Up @@ -650,7 +652,7 @@ def parse(location):
))

# FIXME: there are still a lot of other data to map in a Package
package = MavenJar(
package = MavenPomPackage(
location=location,
name='{group_id}:{artifact_id}'.format(**pom),
version=pom['version'],
Expand Down
2 changes: 1 addition & 1 deletion src/packagedcode/recognize.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
"""


def recognize_packaged_archives(location):
def recognize_package(location):
"""
Return a Package object if one was recognized or None for this `location`.
"""
Expand Down
57 changes: 25 additions & 32 deletions src/scancode/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,24 +36,19 @@

def extract_archives(location, recurse=True):
"""
Extract recursively any archives found at location and yield an iterable of
ExtractEvents.
If verbose is False, only the "done" event is returned at extraction
completion.
If verbose is True, both "start" and "done" events are returned.
Extract any archives found at `location` and yield ExtractEvents. If
`recurse` is True, extracts nested archives-in- archives
recursively.
"""
from extractcode.extract import extract
from extractcode import default_kinds

for xevent in extract(location, kinds=default_kinds, recurse=recurse):
yield xevent


def get_copyrights(location):
"""
Yield an iterable of dictionaries of copyright data detected in the file at
location. Each item contains a list of copyright statements and a start and
end line.
Yield mappings of copyright data detected in the file at `location`.
"""
from cluecode.copyrights import detect_copyrights

Expand All @@ -72,8 +67,7 @@ def get_copyrights(location):

def get_emails(location):
"""
Yield an iterable of dictionaries of emails detected in the file at
location.
Yield mappings of emails detected in the file at `location`.
"""
from cluecode.finder import find_emails
for email, line_num in find_emails(location):
Expand All @@ -88,8 +82,7 @@ def get_emails(location):

def get_urls(location):
"""
Yield an iterable of dictionaries of urls detected in the file at
location.
Yield mappings of urls detected in the file at `location`.
"""
from cluecode.finder import find_urls
for urls, line_num in find_urls(location):
Expand All @@ -108,14 +101,18 @@ def get_urls(location):

def get_licenses(location, min_score=0, include_text=False, diag=False):
"""
Yield dictionaries of license data detected in the file at location.
Yield mappings of license data detected in the file at `location`.
`minimum_score` is a minimum score threshold from 0 to 100. The
default is 0 means that all license matches will be returned. With
any other value matches that have a score below minimum score with
not be returned.
`minimum_score` is a minimum score threshold from 0 to 100. The default is 0
means that all license matches will be returned. With any other value matches
that have a score below minimum score with not be returned.
if `include_text` is True, the matched text is included in the
returned data.
If `diag` is True, additional match details are returned with the matched_rule
key of the returned mapping.
If `diag` is True, additional match details are returned with the
matched_rule key of the returned mapping.
"""
from licensedcode.cache import get_index
from licensedcode.cache import get_licenses_db
Expand Down Expand Up @@ -164,15 +161,14 @@ def get_licenses(location, min_score=0, include_text=False, diag=False):
yield result


def get_file_infos(location, as_list=True):
def get_file_infos(location):
"""
Return a list of dictionaries of informations collected from the file or
directory at location.
Return a mapping of file information collected from the file or
directory at `location`.
"""
from commoncode import fileutils
from commoncode import filetype
from commoncode.hash import multi_checksums
from scancode import utils
from typecode import contenttype

infos = OrderedDict()
Expand All @@ -198,16 +194,13 @@ def get_file_infos(location, as_list=True):
infos['is_source'] = bool(is_file and T.is_source)
infos['is_script'] = bool(is_file and T.is_script)

if as_list:
return [infos]
else:
return infos
return infos


# FIXME: this smells bad
def _empty_file_infos():
"""
Return an empty mapping of info key/values, used in case of failure.
Return an empty mapping of file info, used in case of failure.
"""
infos = OrderedDict()
infos['type'] = None
Expand All @@ -232,11 +225,11 @@ def _empty_file_infos():

def get_package_infos(location):
"""
Return a list of dictionaries of package information
collected from the location or an empty list.
Return a list of mappings of package information collected from the
`location` or an empty list.
"""
from packagedcode.recognize import recognize_packaged_archives
package = recognize_packaged_archives(location)
from packagedcode.recognize import recognize_package
package = recognize_package(location)
if not package:
return []
return [package.to_dict()]
2 changes: 1 addition & 1 deletion src/scancode/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -689,7 +689,7 @@ def scan_infos(input_file, diag=False):
"""
errors = []
try:
infos = get_file_infos(input_file, as_list=False)
infos = get_file_infos(input_file)
except Exception as e:
# never fail but instead add an error message.
infos = _empty_file_infos()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "ant:ant-launcher",
"version": "1.6.5",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "ant:ant",
"version": "1.6.5",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "antlr:antlr",
"version": "2.7.6",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "aopalliance:aopalliance",
"version": "1.0",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "asm:asm-attrs",
"version": "1.5.3",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "asm:asm-parent",
"version": "3.0",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "asm:asm",
"version": "1.5.3",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "asm:asm",
"version": "3.0",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "aspectj:aspectjrt",
"version": "1.5.3",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "aspectj:aspectjrt",
"version": "1.5.4",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "aspectj:aspectjtools",
"version": "1.5.4",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "aspectj:aspectjweaver",
"version": "1.5.4",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "avalon-framework:avalon-framework",
"version": "4.1.3",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "bcel:bcel",
"version": "5.1",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "commons-beanutils:commons-beanutils",
"version": "1.6",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "commons-beanutils:commons-beanutils",
"version": "1.7.0",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "biz.aQute:bndlib",
"version": "0.0.203",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "biz.aQute:bndlib",
"version": "0.0.238",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "c3p0:c3p0",
"version": "0.9.0.4",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "cglib:cglib-nodep",
"version": "2.1_3",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "cglib:cglib",
"version": "2.1_3",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "classworlds:classworlds",
"version": "1.1-alpha-2",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "classworlds:classworlds",
"version": "1.1",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "commons-cli:commons-cli",
"version": "1.0",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "commons-codec:commons-codec",
"version": "1.3",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "commons-collections:commons-collections",
"version": "2.0",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "commons-collections:commons-collections",
"version": "2.1",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "commons-collections:commons-collections",
"version": "3.1",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "commons-collections:commons-collections",
"version": "3.2",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "com.google.collect:com.ss.c.g.common.collect",
"version": "0.5.0.alpha",
"primary_language": "Java",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"type": "Apache Maven",
"type": "Apache Maven POM",
"name": "com.jcraft:jsch",
"version": "0.1.23",
"primary_language": "Java",
Expand Down
Loading

0 comments on commit a79dd9d

Please sign in to comment.