Skip to content

Commit

Permalink
Add new parser2 function for pypi #253
Browse files Browse the repository at this point in the history
Signed-off-by: Li <[email protected]>
  • Loading branch information
Li committed Jun 3, 2019
1 parent 794a684 commit d7ebe5a
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 28 deletions.
61 changes: 36 additions & 25 deletions src/packagedcode/pypi.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
from pkginfo import UnpackedSDist
from pkginfo import Wheel

from commoncode import filetype
from commoncode import fileutils
from packagedcode import models
from packagedcode.utils import build_description
Expand Down Expand Up @@ -331,7 +332,25 @@ def parse(location):


def parse2(location):
pass
"""
Parse using the pkginfo library according the file types and return package.
"""
is_dir = filetype.is_dir(location)
if is_dir:
parser = parse_unpackaged_source
return parser(location)
else:
file_name = fileutils.file_name(location)
parsers = {
'setup.py': parse_unpackaged_source,
'.whl': parse_wheel,
'.egg': parse_egg_binary,
'.tar.gz': parse_source_distribution,
'.zip': parse_source_distribution,
}
for name, parser in parsers.items():
if file_name.endswith(name):
return parser(location)


def parse_source_distribution(location):
Expand Down Expand Up @@ -361,51 +380,43 @@ def parse_unpackaged_source(location):
except ValueError:
pass

if unpackaged_dist and unpackaged_dist.name:
common_data = dict(
name=unpackaged_dist.name,
version=unpackaged_dist.version,
)
package = PythonPackage(**common_data)
return package
return parse_with_pkginfo(unpackaged_dist)


def parse_egg_binary(location):
"""
Passing wheel file location which is generated via setup.py bdist_wheel.
"""
binary_dist = BDist(location)
if binary_dist:
common_data = dict(
name=binary_dist.name,
version=binary_dist.version,
)
package = PythonPackage(**common_data)
return package
return parse_with_pkginfo(binary_dist)


def parse_wheel(location):
"""
Passing wheel file location which is generated via setup.py bdist_wheel.
"""
wheel = Wheel(location)
if wheel and wheel.name:
return parse_with_pkginfo(wheel)


def parse_with_pkginfo(object):
if object and object.name:
common_data = dict(
name=wheel.name,
version=wheel.version,
description = wheel.description,
download_url = wheel.download_url,
homepage_url = wheel.home_page,
name=object.name,
version=object.version,
description = object.description,
download_url = object.download_url,
homepage_url = object.home_page,
)
package = PythonPackage(**common_data)
if wheel.license:
if object.license:
#TODO: We should make the declared license as it is, this should be updated in scancode to parse a pure string
package.declared_license = {'license': wheel.license}
package.declared_license = {'license': object.license}

if wheel.maintainer:
if object.maintainer:
common_data['parties'] = []
common_data['parties'].append(models.Party(
type=models.party_person, name=wheel.maintainer, role='author', email=wheel.maintainer_email))
type=models.party_person, name=object.maintainer, role='author', email=object.maintainer_email))
return package


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"release_date": null,
"parties": [],
"keywords": [],
"homepage_url": null,
"homepage_url": "http://nexb.com",
"download_url": null,
"size": null,
"sha1": null,
Expand All @@ -21,8 +21,10 @@
"code_view_url": null,
"vcs_url": null,
"copyright": null,
"license_expression": null,
"declared_license": null,
"license_expression": "unknown",
"declared_license": {
"license": "BSD"
},
"notice_text": null,
"manifest_path": null,
"dependencies": [],
Expand Down
Binary file not shown.
37 changes: 37 additions & 0 deletions tests/packagedcode/data/pypi/wheel/parse-wheel-expected.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"type": "pypi",
"namespace": null,
"name": "atomicwrites",
"version": "1.2.1",
"qualifiers": null,
"subpath": null,
"primary_language": "Python",
"description": "===================\npython-atomicwrites\n===================\n\n.. image:: https://travis-ci.org/untitaker/python-atomicwrites.svg?branch=master\n :target: https://travis-ci.org/untitaker/python-atomicwrites\n\n.. image:: https://ci.appveyor.com/api/projects/status/vadc4le3c27to59x/branch/master?svg=true\n :target: https://ci.appveyor.com/project/untitaker/python-atomicwrites/branch/master\n\nAtomic file writes.\n\n.. code-block:: python\n\n from atomicwrites import atomic_write\n\n with atomic_write('foo.txt', overwrite=True) as f:\n f.write('Hello world.')\n # \"foo.txt\" doesn't exist yet.\n\n # Now it does.\n\n\nFeatures that distinguish it from other similar libraries (see `Alternatives and Credit`_):\n\n- Race-free assertion that the target file doesn't yet exist. This can be\n controlled with the ``overwrite`` parameter.\n\n- Windows support, although not well-tested. The MSDN resources are not very\n explicit about which operations are atomic. I'm basing my assumptions off `a\n comment\n <https://social.msdn.microsoft.com/Forums/windowsdesktop/en-US/449bb49d-8acc-48dc-a46f-0760ceddbfc3/movefileexmovefilereplaceexisting-ntfs-same-volume-atomic?forum=windowssdk#a239bc26-eaf0-4920-9f21-440bd2be9cc8>`_\n by `Doug Crook\n <https://social.msdn.microsoft.com/Profile/doug%20e.%20cook>`_, who appears\n to be a Microsoft employee:\n\n FAQ: Is MoveFileEx atomic\n Frequently asked question: Is MoveFileEx atomic if the existing and new\n files are both on the same drive?\n\n The simple answer is \"usually, but in some cases it will silently fall-back\n to a non-atomic method, so don't count on it\".\n\n The implementation of MoveFileEx looks something like this: [...]\n\n The problem is if the rename fails, you might end up with a CopyFile, which\n is definitely not atomic.\n\n If you really need atomic-or-nothing, you can try calling\n NtSetInformationFile, which is unsupported but is much more likely to be\n atomic. \n\n- Simple high-level API that wraps a very flexible class-based API.\n\n- Consistent error handling across platforms.\n\n\nHow it works\n============\n\nIt uses a temporary file in the same directory as the given path. This ensures\nthat the temporary file resides on the same filesystem.\n\nThe temporary file will then be atomically moved to the target location: On\nPOSIX, it will use ``rename`` if files should be overwritten, otherwise a\ncombination of ``link`` and ``unlink``. On Windows, it uses MoveFileEx_ through\nstdlib's ``ctypes`` with the appropriate flags.\n\nNote that with ``link`` and ``unlink``, there's a timewindow where the file\nmight be available under two entries in the filesystem: The name of the\ntemporary file, and the name of the target file.\n\nAlso note that the permissions of the target file may change this way. In some\nsituations a ``chmod`` can be issued without any concurrency problems, but\nsince that is not always the case, this library doesn't do it by itself.\n\n.. _MoveFileEx: https://msdn.microsoft.com/en-us/library/windows/desktop/aa365240%28v=vs.85%29.aspx\n\nfsync\n-----\n\nOn POSIX, ``fsync`` is invoked on the temporary file after it is written (to\nflush file content and metadata), and on the parent directory after the file is\nmoved (to flush filename).\n\n``fsync`` does not take care of disks' internal buffers, but there don't seem\nto be any standard POSIX APIs for that. On OS X, ``fcntl`` is used with\n``F_FULLFSYNC`` instead of ``fsync`` for that reason.\n\nOn Windows, `_commit <https://msdn.microsoft.com/en-us/library/17618685.aspx>`_\nis used, but there are no guarantees about disk internal buffers.\n\nAlternatives and Credit\n=======================\n\nAtomicwrites is directly inspired by the following libraries (and shares a\nminimal amount of code):\n\n- The Trac project's `utility functions\n <http://www.edgewall.org/docs/tags-trac-0.11.7/epydoc/trac.util-pysrc.html>`_,\n also used in `Werkzeug <http://werkzeug.pocoo.org/>`_ and\n `mitsuhiko/python-atomicfile\n <https://github.com/mitsuhiko/python-atomicfile>`_. The idea to use\n ``ctypes`` instead of ``PyWin32`` originated there.\n\n- `abarnert/fatomic <https://github.com/abarnert/fatomic>`_. Windows support\n (based on ``PyWin32``) was originally taken from there.\n\nOther alternatives to atomicwrites include:\n\n- `sashka/atomicfile <https://github.com/sashka/atomicfile>`_. Originally I\n considered using that, but at the time it was lacking a lot of features I\n needed (Windows support, overwrite-parameter, overriding behavior through\n subclassing).\n\n- The `Boltons library collection <https://github.com/mahmoud/boltons>`_\n features a class for atomic file writes, which seems to have a very similar\n ``overwrite`` parameter. It is lacking Windows support though.\n\nLicense\n=======\n\nLicensed under the MIT, see ``LICENSE``.\n\n\n",
"release_date": null,
"parties": [],
"keywords": [],
"homepage_url": "https://github.com/untitaker/python-atomicwrites",
"download_url": null,
"size": null,
"sha1": null,
"md5": null,
"sha256": null,
"sha512": null,
"bug_tracking_url": null,
"code_view_url": null,
"vcs_url": null,
"copyright": null,
"license_expression": "mit",
"declared_license": {
"license": "MIT"
},
"notice_text": null,
"manifest_path": null,
"dependencies": [],
"contains_source_code": null,
"source_packages": [],
"purl": "pkg:pypi/[email protected]",
"repository_homepage_url": null,
"repository_download_url": null,
"api_data_url": null
}
6 changes: 6 additions & 0 deletions tests/packagedcode/test_pypi.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,3 +261,9 @@ def test_pkginfo_parse_with_unpackaged_source(self):
package = pypi.parse_unpackaged_source(test_file)
expected_loc = self.get_test_loc('pypi/unpackage_source_parser-expected.json')
self.check_package(package, expected_loc, regen=False)

def test_pkginfo_parse_with_wheelfile(self):
test_file = self.get_test_loc('pypi/wheel/atomicwrites-1.2.1-py2.py3-none-any.whl')
package = pypi.parse_wheel(test_file)
expected_loc = self.get_test_loc('pypi/wheel/parse-wheel-expected.json')
self.check_package(package, expected_loc, regen=False)

0 comments on commit d7ebe5a

Please sign in to comment.