Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Verify dependencies before uploading a package #1

Merged
merged 13 commits into from
Feb 1, 2021
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/check_scripts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
- uses: actions/checkout@v2
- run: |
pip install mypy
mypy -p scripts
mypy -p scripts -p tests

tests:
name: Run integration and unit tests
Expand All @@ -36,4 +36,4 @@ jobs:
run: |
pip install pytest toml requests setuptools wheel
cd main
python -m pytest tests/*
python -m pytest tests
2 changes: 1 addition & 1 deletion .github/workflows/force_update.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@ jobs:
TWINE_PASSWORD: ${{ secrets.TYPESHED_BOT_API_TOKEN }}
run: |
cd main
python -m scripts.upload_some ../typeshed ${{ github.event.inputs.distribution }}
python -m scripts.upload_some ../typeshed ${{ github.event.inputs.distribution }} data/uploaded_packages.txt
2 changes: 1 addition & 1 deletion .github/workflows/update_stubs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
TWINE_PASSWORD: ${{ secrets.TYPESHED_BOT_API_TOKEN }}
run: |
cd main
python -m scripts.upload_changed ../typeshed $(cat data/last_typeshed_commit.sha1) --dry-run
python -m scripts.upload_changed ../typeshed $(cat data/last_typeshed_commit.sha1) data/uploaded_packages.txt --dry-run
(cd ../typeshed; git rev-parse HEAD) > data/last_typeshed_commit.sha1
if [ -z "$(git status --porcelain)" ]; then
exit 0;
Expand Down
9 changes: 9 additions & 0 deletions data/uploaded_packages.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
types-Routes
types-attrs
types-enum34
types-first
types-mypy-extensions
types-six
types-toml
types-typed-ast
types-typing-extensions
93 changes: 85 additions & 8 deletions scripts/build_wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
The types stubs live in https://github.com/python/typeshed/tree/master/stubs,
all fixes for types and metadata should be contributed there, see
https://github.com/python/typeshed/blob/master/CONTRIBUTING.md for more details.

This file also contains some helper functions related to wheel validation and upload.
"""

import argparse
Expand All @@ -20,8 +22,12 @@
import shutil
import tempfile
import subprocess
from collections import defaultdict
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: two spaces after 'from'

from functools import cmp_to_key
from textwrap import dedent
from typing import List, Dict, Any, Tuple
from typing import List, Dict, Any, Tuple, Set

from scripts import get_version

import toml

Expand Down Expand Up @@ -165,6 +171,83 @@ def collect_setup_entries(
return packages, package_data


def verify_dependency(typeshed_dir: str, dependency: str, uploaded: str) -> None:
"""Verify this is a valid dependency, i.e. a stub package uploaded by us."""
known_distributions = set(os.listdir(os.path.join(typeshed_dir, THIRD_PARTY_NAMESPACE)))
dependency = get_version.strip_dep_version(dependency)
assert dependency.startswith("types-"), "Currently only dependencies on stub packages are supported"
assert dependency[len("types-"):] in known_distributions, "Only dependencies on typeshed stubs are allowed"
with open(uploaded) as f:
uploaded_distributions = set(f.read().splitlines())

msg = f"{dependency} looks like a foreign distribution."
uploaded_distributions_lower = [d.lower() for d in uploaded_distributions]
if dependency not in uploaded_distributions and dependency.lower() in uploaded_distributions_lower:
msg += " Note: list is case sensitive"
assert dependency in uploaded_distributions, msg


def update_uploaded(uploaded: str, distribution: str) -> None:
with open(uploaded) as f:
current = set(f.read().splitlines())
if f"types-{distribution}" not in current:
with open(uploaded, "w") as f:
f.writelines(sorted(current | {f"types-{distribution}"}))


def make_dependency_map(typeshed_dir: str, distributions: List[str]) -> Dict[str, Set[str]]:
"""Return relative dependency map among distributions.

Important: this only includes dependencies *within* the given
list of distributions.
"""
result: Dict[str, Set[str]] = {d: set() for d in distributions}
for distribution in distributions:
data = read_matadata(
os.path.join(typeshed_dir, THIRD_PARTY_NAMESPACE, distribution, META)
)
for dependency in data.get("requires", []):
dependency = get_version.strip_dep_version(dependency)
assert dependency.startswith("types-"), "Currently only dependencies on stub packages are supported"
if dependency[len("types-"):] in distributions:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could be worth adding a helper that does s[len("types-"):], since we do it several times.

result[distribution].add(dependency[len("types-"):])
return result


def transitive_deps(dep_map: Dict[str, Set[str]]) -> Dict[str, Set[str]]:
"""Propagate dependencies to compute a transitive dependency map.

Note: this algorithm is O(N**2) in general case, but we don't worry,
because N is small (less than 1000). So it will take few seconds at worst,
while building/uploading 1000 packages will take minutes.
"""
transitive: Dict[str, Set[str]] = defaultdict(set)
for distribution in dep_map:
to_add = {distribution}
while to_add:
new = to_add.pop()
extra = dep_map[new]
transitive[distribution] |= extra
assert distribution not in transitive[distribution], f"Cyclic dependency {distribution} -> {distribution}"
to_add |= extra
return transitive


def sort_by_dependency(dep_map: Dict[str, Set[str]]) -> List[str]:
"""Sort distributions by dependency order (those depending on nothing appear first)."""
trans_map = transitive_deps(dep_map)

def compare(d1: str, d2: str) -> int:
if d1 in trans_map[d2]:
return -1
if d2 in trans_map[d1]:
return 1
return 0

# Return independent packages sorted by name for stability.
return sorted(sorted(dep_map), key=cmp_to_key(compare))


def generate_setup_file(typeshed_dir: str, distribution: str, increment: str) -> str:
"""Auto-generate a setup.py file for given distribution using a template."""
base_dir = os.path.join(typeshed_dir, THIRD_PARTY_NAMESPACE, distribution)
Expand All @@ -178,17 +261,11 @@ def generate_setup_file(typeshed_dir: str, distribution: str, increment: str) ->
packages += py2_packages
package_data.update(py2_package_data)
version = metadata["version"]
requires = metadata.get("requires", [])
known_distributions = set(os.listdir(os.path.join(typeshed_dir, THIRD_PARTY_NAMESPACE)))
for dependency in requires:
assert dependency.startswith("types-"), "Only dependencies on stub packages are allowed"
dep_name = dependency[len("types-"):]
assert dep_name in known_distributions, "Only dependencies on typeshed stubs are allowed"
assert version.count(".") == 1, f"Version must be major.minor, not {version}"
return SETUP_TEMPLATE.format(
distribution=distribution,
version=f"{version}.{increment}",
requires=requires,
requires=metadata.get("requires", []),
packages=packages,
package_data=package_data,
)
Expand Down
27 changes: 27 additions & 0 deletions scripts/get_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
If the given version was never uploaded, this will return -1. See
https://github.com/python/typeshed/blob/master/README.md for details
on stub versioning.

This file also contains some helper functions related to querying
distribution information.
"""

import argparse
Expand Down Expand Up @@ -34,6 +37,30 @@ def read_base_version(typeshed_dir: str, distribution: str) -> str:
return data["version"]


def strip_dep_version(dependency: str) -> str:
"""Strip a possible version suffix, e.g. types-six>=0.1.4 -> types-six."""
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if there is a space before >=? There could also be a semicolon. This example is from test-requirements.txt for mypy:

flake8-bugbear; python_version >= '3.5'

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There should not be semicolons, requires is a list already.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I didn't notice what is in the second part, I think we may not support this initially.

dep_version_pos = len(dependency)
for pos, c in enumerate(dependency):
if c in "=<>":
dep_version_pos = pos
break
return dependency[:dep_version_pos]


def check_exists(distribution: str) -> bool:
"""Check if any version of this *stub* distribution has ben ever uploaded."""
url = URL_TEMPLATE.format(distribution)
retry_strategy = Retry(total=RETRIES, status_forcelist=RETRY_ON)
with requests.Session() as session:
session.mount("https://", HTTPAdapter(max_retries=retry_strategy))
resp = session.get(url, timeout=TIMEOUT)
if resp.ok:
return True
if resp.status_code == 404:
return False
raise ValueError("Error while verifying existence")


def main(typeshed_dir: str, distribution: str, version: Optional[str]) -> int:
"""A simple function to get version increment of a third-party stub package.

Expand Down
21 changes: 17 additions & 4 deletions scripts/upload_changed.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
"""
Entry point for scheduled GitHub auto-upload action.

This does three things:
This does following things:
* Reads the list of stub packages modified since last commit in typeshed
* Checks what is the current stub version increment for each package on PyPI
* Bumps the increment, builds and uploads (unless run with --dry-run) each
new package to PyPI
* Verifies validity of stub dependencies, and updates known dependencies if needed
"""

import argparse
Expand All @@ -17,24 +18,36 @@
from scripts import get_changed


def main(typeshed_dir: str, commit: str, dry_run: bool = False) -> None:
def main(typeshed_dir: str, commit: str, uploaded: str, dry_run: bool = False) -> None:
"""Upload stub typeshed packages modified since commit."""
changed = get_changed.main(typeshed_dir, commit)
for distribution in changed:
# Sort by dependency to prevent depending on foreign distributions.
to_upload = build_wheel.sort_by_dependency(
build_wheel.make_dependency_map(typeshed_dir, changed)
)
for distribution in to_upload:
# Setting base version to None, so it will be read from current METADATA.toml.
increment = get_version.main(typeshed_dir, distribution, None)
increment += 1
temp_dir = build_wheel.main(typeshed_dir, distribution, increment)
if dry_run:
print(f"Would upload: {distribution}, increment {increment}")
continue
for dependency in build_wheel.read_matadata(
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Share this code? It seems repeated.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I tried, but because of one line in the middle that is different it doesn't look worth it.

os.path.join(typeshed_dir, build_wheel.THIRD_PARTY_NAMESPACE, distribution)
).get("requires", []):
if get_version.check_exists(get_version.strip_dep_version(dependency)):
# If this dependency is already present, check it was uploaded by us.
build_wheel.verify_dependency(typeshed_dir, dependency, uploaded)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we always check that the dependency is on PyPI? Otherwise it may be missing, and somebody could upload a bad distribution afterwards?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that I sort by dependency order it should be OK.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the check_exists check for? Can we verify the dependency unconditionally? If a dependency doesn't exist, we should reject the dependency, since somebody could later upload some bad stuff, I think.

subprocess.run(["twine", "upload", os.path.join(temp_dir, "*")], check=True)
build_wheel.update_uploaded(uploaded, distribution)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("typeshed_dir", help="Path to typeshed checkout directory")
parser.add_argument("previous_commit", help="Previous typeshed commit for which we performed upload")
parser.add_argument("uploaded", help="Previously uploaded packages to validate dependencies")
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a path to a file, not the name of packages? Make the help text more explicit.

parser.add_argument("--dry-run", action="store_true", help="Should we perform a dry run (don't actually upload)")
args = parser.parse_args()
main(args.typeshed_dir, args.previous_commit, args.dry_run)
main(args.typeshed_dir, args.previous_commit, args.uploaded, args.dry_run)
28 changes: 21 additions & 7 deletions scripts/upload_some.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
"""
Entry point for manual GitHub upload action.

This does three things:
This does following things:
* Finds all distributions with names that match the pattern provided
* Checks what is the current stub version increment for each package on PyPI
* Bumps the increment, builds and uploads the each new package to PyPI
* Verifies validity of stub dependencies, and updates known dependencies if needed
"""

import argparse
Expand All @@ -16,22 +17,35 @@
from scripts import build_wheel


def main(typeshed_dir: str, pattern: str) -> None:
def main(typeshed_dir: str, pattern: str, uploaded: str) -> None:
"""Force upload typeshed stub packages to PyPI."""
compiled = re.compile(f"^{pattern}$") # force exact matches
for distribution in os.listdir(os.path.join(typeshed_dir, "stubs")):
if not re.match(compiled, distribution):
continue
matching = [
d for d in os.listdir(os.path.join(typeshed_dir, "stubs")) if re.match(compiled, d)
]
# Sort by dependency to prevent depending on foreign distributions.
to_upload = build_wheel.sort_by_dependency(
build_wheel.make_dependency_map(typeshed_dir, matching)
)
for distribution in to_upload:
# Setting base version to None, so it will be read from current METADATA.toml.
increment = get_version.main(typeshed_dir, distribution, version=None)
increment += 1
for dependency in build_wheel.read_matadata(
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here's the other copy which looks the same.

os.path.join(typeshed_dir, build_wheel.THIRD_PARTY_NAMESPACE, distribution)
).get("requires", []):
if get_version.check_exists(get_version.strip_dep_version(dependency)):
# If this dependency is already present, check it was uploaded by us.
build_wheel.verify_dependency(typeshed_dir, dependency, uploaded)
temp_dir = build_wheel.main(typeshed_dir, distribution, increment)
subprocess.run(["twine", "upload", os.path.join(temp_dir, "*")], check=True)
build_wheel.update_uploaded(uploaded, distribution)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("typeshed_dir", help="Path to typeshed checkout directory")
parser.add_argument("pattern", help="Pattern to select distributions for upload")
parser.add_argument("pattern", help="Regular expression to select distributions for upload")
parser.add_argument("uploaded", help="Previously uploaded packages to validate dependencies")
args = parser.parse_args()
main(args.typeshed_dir, args.pattern)
main(args.typeshed_dir, args.pattern, args.uploaded)
Empty file added tests/__init__.py
Empty file.
20 changes: 20 additions & 0 deletions tests/integration.py → tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
a typeshed checkout side by side.
"""
import os
import pytest # type: ignore[import]
from scripts import get_version, build_wheel

TYPESHED = "../typeshed"
UPLOADED = "data/uploaded_packages.txt"


def test_version() -> None:
Expand All @@ -17,9 +19,27 @@ def test_version() -> None:
assert get_version.main(TYPESHED, "typing-extensions", None) >= 0


def test_check_exists() -> None:
assert get_version.check_exists("six")
assert not get_version.check_exists("nonexistent-distribution")


def test_build_wheel() -> None:
# Check that we can build wheels for all distributions.
for distribution in os.listdir(os.path.join(TYPESHED, "stubs")):
tmp_dir = build_wheel.main(TYPESHED, distribution, increment=1)
assert tmp_dir.endswith("/dist")
assert list(os.listdir(tmp_dir)) # check it is not empty


def test_verify_dependency() -> None:
# Check some known dependencies that they verify as valid.
build_wheel.verify_dependency(TYPESHED, "types-six", UPLOADED)
build_wheel.verify_dependency(TYPESHED, "types-six==0.1.1", UPLOADED)
build_wheel.verify_dependency(TYPESHED, "types-typing-extensions", UPLOADED)
build_wheel.verify_dependency(TYPESHED, "types-typing-extensions>=3.7", UPLOADED)
# Also check couple errors.
with pytest.raises(AssertionError):
build_wheel.verify_dependency(TYPESHED, "unsupported", UPLOADED)
with pytest.raises(AssertionError):
build_wheel.verify_dependency(TYPESHED, "types-unknown-xxx", UPLOADED)
Loading