Skip to content

Commit

Permalink
Add CLI option to detect generated code #377
Browse files Browse the repository at this point in the history
Signed-off-by: Philippe Ombredanne <[email protected]>
  • Loading branch information
pombredanne committed Jul 18, 2018
1 parent 0b3f119 commit 12ba2c5
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 13 deletions.
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ def read(*names, **kwargs):
'packages = scancode.plugin_package:PackageManifestScanner',
'emails = scancode.plugin_email:EmailScanner',
'urls = scancode.plugin_url:UrlScanner',
'generated = summarycode.generated:GeneratedCodeDetector',
],

# scancode_post_scan is the entry point for post_scan plugins executed
Expand Down
76 changes: 72 additions & 4 deletions src/summarycode/generated.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,79 @@
from __future__ import print_function
from __future__ import unicode_literals

from collections import OrderedDict
from itertools import islice

from commoncode.datautils import Boolean
from commoncode.text import toascii
from plugincode.scan import ScanPlugin
from plugincode.scan import scan_impl
from scancode import CommandLineOption
from scancode import OTHER_SCAN_GROUP
import typecode.contenttype
import commoncode.text

generated_keywords = (
"""
Tag files as generated.
"""

# Tracing flag
TRACE = False


def logger_debug(*args):
pass


if TRACE:
import logging
import sys

logger = logging.getLogger(__name__)
logging.basicConfig(stream=sys.stdout)
logger.setLevel(logging.DEBUG)

def logger_debug(*args):
return logger.debug(' '.join(isinstance(a, unicode) and a or repr(a) for a in args))


@scan_impl
class GeneratedCodeDetector(ScanPlugin):
"""
Tag a file as generated.
"""
attributes = OrderedDict([
('is_generated',
Boolean(help='True if this file is likely an automatically generated file.')),
])

sort_order = 50

options = [
CommandLineOption(('--generated',),
is_flag=True, default=False,
help='Classify automatically generated code files with a flag.',
help_group=OTHER_SCAN_GROUP,
sort_order=50,
)
]

def is_enabled(self, generated, **kwargs):
return generated

def get_scanner(self, **kwargs):
return generated_scanner


def generated_scanner(location, **kwargs):
is_generated = False
for _clue in get_generated_code_hint(location):
# TODO: consider returning the "clue"
is_generated = True
break
return dict(is_generated=is_generated)


GENERATED_KEYWORDS = (
'generated by',
'auto-generated',
'automatically generated',
Expand Down Expand Up @@ -83,7 +150,7 @@
max_lines = 150


def generated_code(location):
def get_generated_code_hint(location, generated_keywords=GENERATED_KEYWORDS):
"""
Return a line of extracted text from a file if that file is likely
generated source code.
Expand All @@ -97,8 +164,9 @@ def generated_code(location):
return
with open(location, 'rb') as filein:
for line in islice(filein, max_lines):
text = commoncode.text.toascii(line.strip())
text = toascii(line.strip())
textl = text.lower()
if any(kw in textl for kw in generated_keywords):
# yield only the first 100 chars..
yield text[:100]

1 change: 1 addition & 0 deletions tests/scancode/data/help/help.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Options:

other scans:
-i, --info Scan <input> for file information (size, checksums, etc).
--generated Classify automatically generated code files with a flag.
-e, --email Scan <input> for emails.
-u, --url Scan <input> for urls.

Expand Down
59 changes: 59 additions & 0 deletions tests/summarycode/data/generated/cli.expected.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
{
"scancode_notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
"scancode_options": {
"input": "<path>",
"--generated": true,
"--json-pp": "<file>"
},
"files_count": 7,
"files": [
{
"path": "simple",
"type": "directory",
"is_generated": false,
"scan_errors": []
},
{
"path": "simple/configure",
"type": "file",
"is_generated": true,
"scan_errors": []
},
{
"path": "simple/generated_1.java",
"type": "file",
"is_generated": true,
"scan_errors": []
},
{
"path": "simple/generated_2.java",
"type": "file",
"is_generated": true,
"scan_errors": []
},
{
"path": "simple/generated_3.java",
"type": "file",
"is_generated": true,
"scan_errors": []
},
{
"path": "simple/generated_4.java",
"type": "file",
"is_generated": true,
"scan_errors": []
},
{
"path": "simple/generated_5.java",
"type": "file",
"is_generated": true,
"scan_errors": []
},
{
"path": "simple/generated_6.c",
"type": "file",
"is_generated": true,
"scan_errors": []
}
]
}
29 changes: 20 additions & 9 deletions tests/summarycode/test_generated.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,14 @@
# ScanCode is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/scancode-toolkit/ for support and download.

from __future__ import absolute_import, print_function
from __future__ import absolute_import
from __future__ import unicode_literals

import os

from commoncode.testcase import FileBasedTesting
from scancode.cli_test_utils import check_json_scan
from scancode.cli_test_utils import run_scan_click
from summarycode import generated


Expand All @@ -39,22 +42,22 @@ def test_basic(self):
'for XML Binding(JAXB) Reference Implementation'
]
test_file = self.get_test_loc('generated/simple/generated_1.java')
result = list(generated.generated_code(location=test_file))
result = list(generated.get_generated_code_hint(location=test_file))
assert expected == result
test_file = self.get_test_loc('generated/simple/generated_3.java')
result = list(generated.generated_code(location=test_file))
result = list(generated.get_generated_code_hint(location=test_file))
assert expected == result

def test_basic2(self):
expected = ['* This class was generated by the JAX-WS RI.']
test_file = self.get_test_loc('generated/simple/generated_2.java')
result = list(generated.generated_code(location=test_file))
result = list(generated.get_generated_code_hint(location=test_file))
assert expected == result

def test_basic3(self):
expected = ['/* This class was automatically generated']
test_file = self.get_test_loc('generated/simple/generated_4.java')
result = list(generated.generated_code(location=test_file))
result = list(generated.get_generated_code_hint(location=test_file))
assert expected == result

def test_basic4(self):
Expand All @@ -63,27 +66,35 @@ def test_basic4(self):
'expected content contained within this class.'
]
test_file = self.get_test_loc('generated/simple/generated_5.java')
result = list(generated.generated_code(location=test_file))
result = list(generated.get_generated_code_hint(location=test_file))
assert expected == result

def test_basic5(self):
expected = ['/* DO NOT EDIT THIS FILE - it is machine generated */']
test_file = self.get_test_loc('generated/simple/generated_6.c')
result = list(generated.generated_code(location=test_file))
result = list(generated.get_generated_code_hint(location=test_file))
assert expected == result

def test_configure(self):
expected = [
'# Generated by GNU Autoconf 2.64 for Apache CouchDB 1.0.1.'
]
test_file = self.get_test_loc('generated/simple/configure')
result = list(generated.generated_code(location=test_file))
result = list(generated.get_generated_code_hint(location=test_file))
assert expected == result

def test_tomcat_jspc(self):
expected = [
'<!--Automatically created by Apache Jakarta Tomcat JspC.'
]
test_file = self.get_test_loc('generated/jspc/web.xml')
result = list(generated.generated_code(location=test_file))
result = list(generated.get_generated_code_hint(location=test_file))
assert expected == result

def test_generated_cli_option(self):
test_dir = self.get_test_loc('generated/simple')
result_file = self.get_temp_file('json')
expected_file = self.get_test_loc('generated/cli.expected.json')
run_scan_click(['--generated', '--json-pp', result_file, test_dir])
check_json_scan(expected_file, result_file, strip_dates=True, regen=False)

0 comments on commit 12ba2c5

Please sign in to comment.