Skip to content

Commit

Permalink
#253 Recognize Python packages using setup.py files
Browse files Browse the repository at this point in the history
Signed-off-by: Philippe Ombredanne <[email protected]>
  • Loading branch information
rakesh balusa authored and pombredanne committed Dec 6, 2016
1 parent 4574f3b commit 7025368
Show file tree
Hide file tree
Showing 4 changed files with 384 additions and 0 deletions.
85 changes: 85 additions & 0 deletions src/packagedcode/pypi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#
# Copyright (c) 2015 nexB Inc. and others. All rights reserved.
# http://nexb.com and https://github.com/nexB/scancode-toolkit/
# The ScanCode software is licensed under the Apache License version 2.0.
# Data generated with ScanCode require an acknowledgment.
# ScanCode is a trademark of nexB Inc.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# When you publish or redistribute any data created with ScanCode or any ScanCode
# derivative work, you must accompany this data with the following acknowledgment:
#
# Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# ScanCode should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
# ScanCode is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/scancode-toolkit/ for support and download.

from __future__ import absolute_import
from __future__ import print_function

import logging
import re

from packagedcode.models import AssertedLicense
from packagedcode.models import PythonPackage

"""
Handle Python PyPi packages
"""


logger = logging.getLogger(__name__)
# import sys
# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
# logger.setLevel(logging.DEBUG)


def get_attribute(setup_location, attribute):
"""
Return the value specified for a given 'attribute' mentioned in a 'setup.py'
file.
Example :
setup(
name='requests',
version='1.0',
)
'requests' is returned for the attribute 'name'
"""
setup_text = open(setup_location, 'rb').read()
setup_text = setup_text.replace('\n', '')
# FIXME Use a valid parser for parsing 'setup.py'
values = re.findall('setup\(.*?'+attribute+'=[\"\']{1}.*?\',', setup_text)
if len(values) > 1:
return
else:
values = ''.join(values)
output = re.sub('setup\(.*?'+attribute+'=[\"\']{1}', '', values)
if output.endswith('\','):
return output.replace('\',', '')
else:
return output


def parse(location):
"""
Parse a 'setup.py' and return a PythonPackage object.
"""
if not location.endswith('setup.py'):
return
package = PythonPackage(
name=get_attribute(location, 'name'),
homepage_url=get_attribute(location, 'url'),
description=get_attribute(location, 'description'),
version=get_attribute(location, 'version'),
authors=[get_attribute(location, 'author')],
asserted_licenses=[AssertedLicense(license=get_attribute(location, 'license'))],
)
return package
124 changes: 124 additions & 0 deletions tests/packagedcode/data/pypi/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
from __future__ import absolute_import, print_function

import io
import os
import re
from glob import glob
from os.path import basename
from os.path import dirname
from os.path import join
from os.path import splitext

from setuptools import find_packages
from setuptools import setup


def read(*names, **kwargs):
return io.open(
join(dirname(__file__), *names),
encoding=kwargs.get('encoding', 'utf8')
).read()


long_description = '%s\n%s' % (
read('README.rst'),
re.sub(':obj:`~?(.*?)`', r'``\1``', read('CHANGELOG.rst'))
)

setup(
name='scancode-toolkit',
version='1.5.0',
license='Apache-2.0 with ScanCode acknowledgment and CC0-1.0 and others',
description='ScanCode is a tool to scan code for license, copyright and other interesting facts.',
long_description=long_description,
author='ScanCode',
author_email='[email protected]',
url='https://github.com/nexB/scancode-toolkit',
packages=find_packages('src'),
package_dir={'': 'src'},
py_modules=[splitext(basename(path))[0] for path in glob('src/*.py')],
include_package_data=True,
zip_safe=False,
classifiers=[
# complete classifier list: http://pypi.python.org/pypi?%3Aaction=list_classifiers
'Development Status :: 4 - Beta',
'Intended Audience :: Developers',
'License :: OSI Approved :: Apache Software License',
'License :: OSI Approved :: CC0',
'Programming Language :: Python',
'Programming Language :: Python :: 2.7',
'Topic :: Utilities',
],
keywords=[
'license', 'filetype', 'urn', 'date', 'codec',
],
install_requires=[
# cluecode
'py2-ipaddress >= 2.0, <3.0',
'url >= 0.1.4',
'publicsuffix2',
# TODO: upgrade to nltk==3.0.1
'nltk >= 2.0b4, <3.0.0',

# extractcode
'patch >= 1.14.2, < 1.15 ',
# to work around bug http://bugs.python.org/issue19839
# on multistream bzip2 files
'bz2file >= 0.98',

# licensedcode
'PyYAML >= 3.0, <4.0',

# textcode
'Beautifulsoup >= 3.2.0, <4.0.0',
'Beautifulsoup4 >= 4.3.0, <5.0.0',
'html5lib',
'six',

# typecode and textcode
'pygments >= 2.0.0, <3.0.0',
'pdfminer >= 20140328',

# typecode
'chardet >= 2.1.1, <3.0.0',
'binaryornot >= 0.4.0',

# scancode and AboutCode
'click >= 4.0.0, < 5.0.0',
'jinja2 >= 2.7.0, < 3.0.0',
'MarkupSafe >= 0.23',
'colorama',

# AboutCode
'about-code-tool >= 0.9.0',

# packagedcode
'requests >= 2.7.0, < 3.0.0',
],

extras_require={
'base': [
'certifi',
'setuptools',
'wheel',
'pip',
'wincertstore',
],
'dev': [
'pytest',
'execnet',
'py',
'pytest-xdist',
'bumpversion',
],

},
entry_points={
'console_scripts': [
'scancode = scancode.cli:scancode',
'extractcode = scancode.extract_cli:extractcode',
],
},
)
124 changes: 124 additions & 0 deletions tests/packagedcode/data/pypi/setup2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
from __future__ import absolute_import, print_function

import io
import os
import re
from glob import glob
from os.path import basename
from os.path import dirname
from os.path import join
from os.path import splitext

from setuptools import find_packages
from setuptools import setup


def read(*names, **kwargs):
return io.open(
join(dirname(__file__), *names),
encoding=kwargs.get('encoding', 'utf8')
).read()


long_description = '%s\n%s' % (
read('README.rst'),
re.sub(':obj:`~?(.*?)`', r'``\1``', read('CHANGELOG.rst'))
)

setup(
name='scancode-toolkit',
version='1.5.0',
license='Apache-2.0 with ScanCode acknowledgment and CC0-1.0 and others',
description='ScanCode is a tool to scan code for license, copyright and other interesting facts.',
long_description=long_description,
author='ScanCode',
author_email='[email protected]',
url='https://github.com/nexB/scancode-toolkit',
packages=find_packages('src'),
package_dir={'': 'src'},
py_modules=[splitext(basename(path))[0] for path in glob('src/*.py')],
include_package_data=True,
zip_safe=False,
classifiers=[
# complete classifier list: http://pypi.python.org/pypi?%3Aaction=list_classifiers
'Development Status :: 4 - Beta',
'Intended Audience :: Developers',
'License :: OSI Approved :: Apache Software License',
'License :: OSI Approved :: CC0',
'Programming Language :: Python',
'Programming Language :: Python :: 2.7',
'Topic :: Utilities',
],
keywords=[
'license', 'filetype', 'urn', 'date', 'codec',
],
install_requires=[
# cluecode
'py2-ipaddress >= 2.0, <3.0',
'url >= 0.1.4',
'publicsuffix2',
# TODO: upgrade to nltk==3.0.1
'nltk >= 2.0b4, <3.0.0',

# extractcode
'patch >= 1.14.2, < 1.15 ',
# to work around bug http://bugs.python.org/issue19839
# on multistream bzip2 files
'bz2file >= 0.98',

# licensedcode
'PyYAML >= 3.0, <4.0',

# textcode
'Beautifulsoup >= 3.2.0, <4.0.0',
'Beautifulsoup4 >= 4.3.0, <5.0.0',
'html5lib',
'six',

# typecode and textcode
'pygments >= 2.0.0, <3.0.0',
'pdfminer >= 20140328',

# typecode
'chardet >= 2.1.1, <3.0.0',
'binaryornot >= 0.4.0',

# scancode and AboutCode
'click >= 4.0.0, < 5.0.0',
'jinja2 >= 2.7.0, < 3.0.0',
'MarkupSafe >= 0.23',
'colorama',

# AboutCode
'about-code-tool >= 0.9.0',

# packagedcode
'requests >= 2.7.0, < 3.0.0',
],

extras_require={
'base': [
'certifi',
'setuptools',
'wheel',
'pip',
'wincertstore',
],
'dev': [
'pytest',
'execnet',
'py',
'pytest-xdist',
'bumpversion',
],

},
entry_points={
'console_scripts': [
'scancode = scancode.cli:scancode',
'extractcode = scancode.extract_cli:extractcode',
],
},
)
51 changes: 51 additions & 0 deletions tests/packagedcode/test_pypi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#
# Copyright (c) 2015 nexB Inc. and others. All rights reserved.
# http://nexb.com and https://github.com/nexB/scancode-toolkit/
# The ScanCode software is licensed under the Apache License version 2.0.
# Data generated with ScanCode require an acknowledgment.
# ScanCode is a trademark of nexB Inc.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# When you publish or redistribute any data created with ScanCode or any ScanCode
# derivative work, you must accompany this data with the following acknowledgment:
#
# Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# ScanCode should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
# ScanCode is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/scancode-toolkit/ for support and download.

from __future__ import absolute_import
from __future__ import print_function

import os.path

from commoncode.testcase import FileBasedTesting

from packagedcode import pypi


class TestPyPi(FileBasedTesting):
test_data_dir = os.path.join(os.path.dirname(__file__), 'data')

def test_parse(self):
test_file = self.get_test_loc('pypi/setup.py')
package = pypi.parse(test_file)
assert 'scancode-toolkit' == package.name
assert '1.5.0' == package.version
assert 'ScanCode' == package.authors[0]
assert 'ScanCode is a tool to scan code for license, copyright and other interesting facts.' == package.description
assert 'https://github.com/nexB/scancode-toolkit' == package.homepage_url

def test_get_attribute(self):
test_file = self.get_test_loc('pypi/setup2.py')
assert 'scancode-toolkit' == pypi.get_attribute(test_file, 'name')
assert '1.5.0' == pypi.get_attribute(test_file, 'version')
assert 'ScanCode' == pypi.get_attribute(test_file, 'author')

0 comments on commit 7025368

Please sign in to comment.