From bffc81d8b1ef426fe8d34ae46c9c9b900e5d18d6 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Fri, 29 Jul 2022 14:47:21 -0400 Subject: [PATCH 001/142] Add initial cibuildwheel workflow --- .github/workflows/wheels.yml | 99 ++ .gitignore | 4 + cpp/cmake/thirdparty/get_arrow.cmake | 5 + cpp/libcudf/__init__.py | 0 cpp/libcudf/_version.py | 567 ++++++++ cpp/pyproject.toml | 24 + cpp/setup.cfg | 15 + cpp/setup.py | 64 + cpp/versioneer.py | 1904 ++++++++++++++++++++++++++ python/cudf/pyproject.toml | 2 + python/cudf/setup.py | 18 +- python/dask_cudf/setup.py | 18 +- 12 files changed, 2707 insertions(+), 13 deletions(-) create mode 100644 .github/workflows/wheels.yml create mode 100644 cpp/libcudf/__init__.py create mode 100644 cpp/libcudf/_version.py create mode 100644 cpp/pyproject.toml create mode 100644 cpp/setup.cfg create mode 100644 cpp/setup.py create mode 100644 cpp/versioneer.py diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml new file mode 100644 index 00000000000..9825dbeaadf --- /dev/null +++ b/.github/workflows/wheels.yml @@ -0,0 +1,99 @@ +name: cuDF wheels + +on: + push: + branches: + - 'pull-request/[0-9]+' + +jobs: + libcudf-wheel-amd64: + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-amd64.yml@feat/wheel-ci-actions + with: + package-name: libcudf_cuda11 + package-dir: cpp + python-version: "3.8" + cibw-environment: "PYTHON_PACKAGE_CUDA_SUFFIX='-cuda11'" + cibw-before-all: "apt-get install -y libzstd-dev liblz4-dev libboost-regex-dev libboost-system-dev libboost-filesystem-dev libsnappy-dev libprotobuf-dev libprotoc-dev protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + cibw-before-build: "pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + skbuild-configure-options: '-DBUILD_TESTS=OFF -DBUILD_BENCHMARKS=OFF -DDISABLE_DEPRECATION_WARNING=ON -DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_ORC=ON -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON' + gpu-smoketest: "" + auditwheel-repair-override: "cp {wheel} {dest_dir}" + secrets: inherit + cudf-wheel-amd64-38: + needs: libcudf-wheel-amd64 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-amd64.yml@feat/wheel-ci-actions + with: + package-name: cudf + package-dir: python/cudf + python-version: "3.8" + cibw-environment: "PYTHON_PACKAGE_CUDA_SUFFIX='-cuda11'" + cibw-before-all: "apt-get install -y protobuf-compiler && git clone https://github.com/dmlc/dlpack -b v0.5 /opt/dlpack && cp -r /opt/dlpack/include/dlpack /usr/include/dlpack && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cuda11 libcudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + skbuild-configure-options: "-DFIND_CUDF_CPP=ON" + gpu-smoketest-before: "pip install requests && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920);" + secrets: inherit + dask_cudf-wheel: + needs: cudf-wheel-amd64-38 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions + with: + package-name: dask_cudf + package-dir: python/dask_cudf + gpu-smoketest: "import dask_cudf; print(dask_cudf)" + gpu-smoketest-before: "pip install rmm-cuda11 cudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + secrets: inherit + cudf-wheel-amd64-39: + needs: libcudf-wheel-amd64 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-amd64.yml@feat/wheel-ci-actions + with: + package-name: cudf + package-dir: python/cudf + python-version: "3.9" + cibw-environment: "PYTHON_PACKAGE_CUDA_SUFFIX='-cuda11'" + cibw-before-all: "apt-get install -y protobuf-compiler && git clone https://github.com/dmlc/dlpack -b v0.5 /opt/dlpack && cp -r /opt/dlpack/include/dlpack /usr/include/dlpack && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cuda11 libcudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + skbuild-configure-options: "-DFIND_CUDF_CPP=ON" + gpu-smoketest-before: "pip install requests && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920);" + secrets: inherit + libcudf-wheel-arm64: + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-arm64.yml@feat/wheel-ci-actions + with: + package-name: libcudf_cuda11 + package-dir: cpp + python-version: "3.8" + cibw-environment: "PYTHON_PACKAGE_CUDA_SUFFIX='-cuda11'" + cibw-before-all: "apt-get install -y libzstd-dev liblz4-dev libboost-regex-dev libboost-system-dev libboost-filesystem-dev libsnappy-dev libprotobuf-dev libprotoc-dev protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + cibw-before-build: "pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + skbuild-configure-options: '-DBUILD_TESTS=OFF -DBUILD_BENCHMARKS=OFF -DDISABLE_DEPRECATION_WARNING=ON -DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_ORC=ON -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON' + gpu-smoketest: "" + auditwheel-repair-override: "cp {wheel} {dest_dir}" + secrets: inherit + cudf-wheel-arm64-38: + needs: libcudf-wheel-arm64 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-arm64.yml@feat/wheel-ci-actions + with: + package-name: cudf + package-dir: python/cudf + python-version: "3.8" + cibw-environment: "PYTHON_PACKAGE_CUDA_SUFFIX='-cuda11'" + cibw-before-all: "apt-get install -y protobuf-compiler && git clone https://github.com/dmlc/dlpack -b v0.5 /opt/dlpack && cp -r /opt/dlpack/include/dlpack /usr/include/dlpack && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cuda11 libcudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + skbuild-configure-options: "-DFIND_CUDF_CPP=ON" + gpu-smoketest-before: "pip install requests && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920);" + secrets: inherit + cudf-wheel-arm64-39: + needs: libcudf-wheel-arm64 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-arm64.yml@feat/wheel-ci-actions + with: + package-name: cudf + package-dir: python/cudf + python-version: "3.9" + cibw-environment: "PYTHON_PACKAGE_CUDA_SUFFIX='-cuda11'" + cibw-before-all: "apt-get install -y protobuf-compiler && git clone https://github.com/dmlc/dlpack -b v0.5 /opt/dlpack && cp -r /opt/dlpack/include/dlpack /usr/include/dlpack && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cuda11 libcudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + skbuild-configure-options: "-DFIND_CUDF_CPP=ON" + gpu-smoketest-before: "pip install requests && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920);" + secrets: inherit diff --git a/.gitignore b/.gitignore index 0d63c76bf9f..bc8a1697d10 100644 --- a/.gitignore +++ b/.gitignore @@ -166,3 +166,7 @@ dask-worker-space/ # Sphinx docs & build artifacts docs/cudf/source/api_docs/generated/* docs/cudf/source/api_docs/api/* + +# cibuildwheel +/wheelhouse +/dist diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index 9fa5b9d1658..3a008db45d7 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -312,6 +312,11 @@ if(NOT DEFINED CUDF_VERSION_Arrow) ) endif() +if(SKBUILD) + message(VERBOSE "calling find_package(Python3) with correct arguments to help arrow find it later on") + find_package(Python3 COMPONENTS Interpreter Development NumPy REQUIRED) +endif() + find_and_configure_arrow( ${CUDF_VERSION_Arrow} ${CUDF_USE_ARROW_STATIC} ${CUDF_ENABLE_ARROW_S3} ${CUDF_ENABLE_ARROW_ORC} ${CUDF_ENABLE_ARROW_PYTHON} ${CUDF_ENABLE_ARROW_PARQUET} diff --git a/cpp/libcudf/__init__.py b/cpp/libcudf/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/cpp/libcudf/_version.py b/cpp/libcudf/_version.py new file mode 100644 index 00000000000..c265a22b162 --- /dev/null +++ b/cpp/libcudf/_version.py @@ -0,0 +1,567 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. +# This file helps to compute a version number in source trees obtained from +# git-archive tarball (such as those provided by githubs download-from-tag +# feature). Distribution tarballs (built by setup.py sdist) and build +# directories (produced by setup.py build) will contain a much shorter file +# that just contains the computed version number. + +# This file is released into the public domain. Generated by +# versioneer-0.18 (https://github.com/warner/python-versioneer) + +"""Git implementation of _version.py.""" + +import errno +import os +import re +import subprocess +import sys + + +def get_keywords(): + """Get the keywords needed to look up the version information.""" + # these strings will be replaced by git during git-archive. + # setup.py/versioneer.py will grep for the variable names, so they must + # each be defined on a line of their own. _version.py will just call + # get_keywords(). + git_refnames = "$Format:%d$" + git_full = "$Format:%H$" + git_date = "$Format:%ci$" + keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} + return keywords + + +class VersioneerConfig: + """Container for Versioneer configuration parameters.""" + + +def get_config(): + """Create, populate and return the VersioneerConfig() object.""" + # these strings are filled in when 'setup.py versioneer' creates + # _version.py + cfg = VersioneerConfig() + cfg.VCS = "git" + cfg.style = "pep440" + cfg.tag_prefix = "v" + cfg.parentdir_prefix = "libcudf-" + cfg.versionfile_source = "libcudf/_version.py" + cfg.verbose = False + return cfg + + +class NotThisMethod(Exception): + """Exception raised if a method is not valid for the current scenario.""" + + +LONG_VERSION_PY = {} +HANDLERS = {} + + +def register_vcs_handler(vcs, method): # decorator + """Decorator to mark a method as the handler for a particular VCS.""" + + def decorate(f): + """Store f in HANDLERS[vcs][method].""" + if vcs not in HANDLERS: + HANDLERS[vcs] = {} + HANDLERS[vcs][method] = f + return f + + return decorate + + +def run_command( + commands, args, cwd=None, verbose=False, hide_stderr=False, env=None +): + """Call the given command(s).""" + assert isinstance(commands, list) + p = None + for c in commands: + try: + dispcmd = str([c] + args) + # remember shell=False, so use git.cmd on windows, not just git + p = subprocess.Popen( + [c] + args, + cwd=cwd, + env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr else None), + ) + break + except EnvironmentError: + e = sys.exc_info()[1] + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %s" % dispcmd) + print(e) + return None, None + else: + if verbose: + print("unable to find command, tried %s" % (commands,)) + return None, None + stdout = p.communicate()[0].strip() + if sys.version_info[0] >= 3: + stdout = stdout.decode() + if p.returncode != 0: + if verbose: + print("unable to run %s (error)" % dispcmd) + print("stdout was %s" % stdout) + return None, p.returncode + return stdout, p.returncode + + +def versions_from_parentdir(parentdir_prefix, root, verbose): + """Try to determine the version from the parent directory name. + + Source tarballs conventionally unpack into a directory that includes both + the project name and a version string. We will also support searching up + two directory levels for an appropriately named parent directory + """ + rootdirs = [] + + for i in range(3): + dirname = os.path.basename(root) + if dirname.startswith(parentdir_prefix): + return { + "version": dirname[len(parentdir_prefix):], + "full-revisionid": None, + "dirty": False, + "error": None, + "date": None, + } + else: + rootdirs.append(root) + root = os.path.dirname(root) # up a level + + if verbose: + print( + "Tried directories %s but none started with prefix %s" + % (str(rootdirs), parentdir_prefix) + ) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs): + """Extract version information from the given file.""" + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords = {} + try: + f = open(versionfile_abs, "r") + for line in f.readlines(): + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + f.close() + except EnvironmentError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords(keywords, tag_prefix, verbose): + """Get version information from git keywords.""" + if not keywords: + raise NotThisMethod("no keywords at all, weird") + date = keywords.get("date") + if date is not None: + # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant + # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 + # -like" string, which we must then edit to make compliant), because + # it's been around since git-1.5.3, and it's too difficult to + # discover which version we're using, or to work around using an + # older one. + date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = set([r.strip() for r in refnames.strip("()").split(",")]) + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = set([r for r in refs if re.search(r"\d", r)]) + if verbose: + print("discarding '%s', no digits" % ",".join(refs - tags)) + if verbose: + print("likely tags: %s" % ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix):] + if verbose: + print("picking %s" % r) + return { + "version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": None, + "date": date, + } + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return { + "version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": "no suitable tags", + "date": None, + } + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): + """Get version from 'git describe' in the root of the source tree. + + This only gets called if the git-archive 'subst' keywords were *not* + expanded, and _version.py hasn't already been rewritten with a short + version string, meaning we're inside a checked out source tree. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + + out, rc = run_command( + GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True + ) + if rc != 0: + if verbose: + print("Directory %s not under git control" % root) + raise NotThisMethod("'git rev-parse --git-dir' returned error") + + # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] + # if there isn't one, this yields HEX[-dirty] (no NUM) + describe_out, rc = run_command( + GITS, + [ + "describe", + "--tags", + "--dirty", + "--always", + "--long", + "--match", + "%s*" % tag_prefix, + ], + cwd=root, + ) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[: git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) + if not mo: + # unparseable. Maybe git-describe is misbehaving? + pieces["error"] = ( + "unable to parse git-describe output: '%s'" % describe_out + ) + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%s' doesn't start with prefix '%s'" + print(fmt % (full_tag, tag_prefix)) + pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + full_tag, + tag_prefix, + ) + return pieces + pieces["closest-tag"] = full_tag[len(tag_prefix):] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + count_out, rc = run_command( + GITS, ["rev-list", "HEAD", "--count"], cwd=root + ) + pieces["distance"] = int(count_out) # total number of commits + + # commit date: see ISO-8601 comment in git_versions_from_keywords() + date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[ + 0 + ].strip() + pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + + return pieces + + +def plus_or_dot(pieces): + """Return a + if we don't already have one, else return a .""" + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces): + """Build up version string, with post-release "local version identifier". + + Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + Exceptions: + 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_pre(pieces): + """TAG[.post.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post.devDISTANCE + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += ".post.dev%d" % pieces["distance"] + else: + # exception #1 + rendered = "0.post.dev%d" % pieces["distance"] + return rendered + + +def render_pep440_post(pieces): + """TAG[.postDISTANCE[.dev0]+gHEX] . + + The ".dev0" means dirty. Note that .dev0 sorts backwards + (a dirty tree will appear "older" than the corresponding clean one), + but you shouldn't be releasing software with -dirty anyways. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + return rendered + + +def render_pep440_old(pieces): + """TAG[.postDISTANCE[.dev0]] . + + The ".dev0" means dirty. + + Eexceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces): + """TAG[-DISTANCE-gHEX][-dirty]. + + Like 'git describe --tags --dirty --always'. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces): + """TAG-DISTANCE-gHEX[-dirty]. + + Like 'git describe --tags --dirty --always -long'. + The distance/hash is unconditional. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces, style): + """Render the given version pieces into the requested style.""" + if pieces["error"]: + return { + "version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None, + } + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError("unknown style '%s'" % style) + + return { + "version": rendered, + "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], + "error": None, + "date": pieces.get("date"), + } + + +def get_versions(): + """Get version information or return default if unable to do so.""" + # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have + # __file__, we can work backwards from there to the root. Some + # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which + # case we can only use expanded keywords. + + cfg = get_config() + verbose = cfg.verbose + + try: + return git_versions_from_keywords( + get_keywords(), cfg.tag_prefix, verbose + ) + except NotThisMethod: + pass + + try: + root = os.path.realpath(__file__) + # versionfile_source is the relative path from the top of the source + # tree (where the .git directory might live) to this file. Invert + # this to find the root from __file__. + for i in cfg.versionfile_source.split("/"): + root = os.path.dirname(root) + except NameError: + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None, + } + + try: + pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) + return render(pieces, cfg.style) + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + except NotThisMethod: + pass + + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", + "date": None, + } diff --git a/cpp/pyproject.toml b/cpp/pyproject.toml new file mode 100644 index 00000000000..12777bb77bc --- /dev/null +++ b/cpp/pyproject.toml @@ -0,0 +1,24 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[build-system] + +requires = [ + "wheel", + "setuptools", + "scikit-build>=0.13.1", + "cmake>=3.20.1,!=3.23.0", + "ninja", + "numpy" +] diff --git a/cpp/setup.cfg b/cpp/setup.cfg new file mode 100644 index 00000000000..ea1c7dab305 --- /dev/null +++ b/cpp/setup.cfg @@ -0,0 +1,15 @@ +# Copyright (c) 2020-2022, NVIDIA CORPORATION. + +[flake8] +exclude = versioneer.py +# See the docstring in versioneer.py for instructions. Note that you must +# re-run 'versioneer.py setup' after changing this section, and commit the +# resulting files. + +[versioneer] +VCS = git +style = pep440 +versionfile_source = libcudf/_version.py +versionfile_build = libcudf/_version.py +tag_prefix = v +parentdir_prefix = libcudf- diff --git a/cpp/setup.py b/cpp/setup.py new file mode 100644 index 00000000000..ef60e9833de --- /dev/null +++ b/cpp/setup.py @@ -0,0 +1,64 @@ +# +# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from setuptools import find_packages +from skbuild import setup +from wheel.bdist_wheel import bdist_wheel as _bdist_wheel + +import versioneer +import os + + +''' +copy this trick from https://github.com/ssciwr/clang-format-wheel/blob/main/setup.py +since the C++ code compiled by this cpp module is not a Python C extension +override the platform to be py3-none +''' +class genericpy_bdist_wheel(_bdist_wheel): + def finalize_options(self): + _bdist_wheel.finalize_options(self) + self.root_is_pure = False + + def get_tag(self): + python, abi, plat = _bdist_wheel.get_tag(self) + python, abi = "py3", "none" + return python, abi, plat + + +cmdclass = versioneer.get_cmdclass() +cmdclass['bdist_wheel'] = genericpy_bdist_wheel + + +def exclude_libcxx_symlink(cmake_manifest): + return list(filter(lambda name: not ('include/rapids/libcxx/include' in name), cmake_manifest)) + + +setup(name='libcudf'+os.getenv("PYTHON_PACKAGE_CUDA_SUFFIX", default=""), + description="cuDF C++ library", + version=versioneer.get_version(), + classifiers=[ + "Intended Audience :: Developers", + "Programming Language :: Python", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9" + ], + author="NVIDIA Corporation", + cmake_process_manifest_hook=exclude_libcxx_symlink, + packages=find_packages(include=['libcudf']), + license="Apache", + cmdclass=cmdclass, + zip_safe=False + ) diff --git a/cpp/versioneer.py b/cpp/versioneer.py new file mode 100644 index 00000000000..a6537a34ede --- /dev/null +++ b/cpp/versioneer.py @@ -0,0 +1,1904 @@ +# Version: 0.18 + +"""The Versioneer - like a rocketeer, but for versions. + +The Versioneer +============== + +* like a rocketeer, but for versions! +* https://github.com/warner/python-versioneer +* Brian Warner +* License: Public Domain +* Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, and pypy +* [![Latest Version] +(https://pypip.in/version/versioneer/badge.svg?style=flat) +](https://pypi.python.org/pypi/versioneer/) +* [![Build Status] +(https://travis-ci.org/warner/python-versioneer.png?branch=master) +](https://travis-ci.org/warner/python-versioneer) + +This is a tool for managing a recorded version number in distutils-based +python projects. The goal is to remove the tedious and error-prone "update +the embedded version string" step from your release process. Making a new +release should be as easy as recording a new tag in your version-control +system, and maybe making new tarballs. + + +## Quick Install + +* `pip install versioneer` to somewhere to your $PATH +* add a `[versioneer]` section to your setup.cfg (see below) +* run `versioneer install` in your source tree, commit the results + +## Version Identifiers + +Source trees come from a variety of places: + +* a version-control system checkout (mostly used by developers) +* a nightly tarball, produced by build automation +* a snapshot tarball, produced by a web-based VCS browser, like github's + "tarball from tag" feature +* a release tarball, produced by "setup.py sdist", distributed through PyPI + +Within each source tree, the version identifier (either a string or a number, +this tool is format-agnostic) can come from a variety of places: + +* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows + about recent "tags" and an absolute revision-id +* the name of the directory into which the tarball was unpacked +* an expanded VCS keyword ($Id$, etc) +* a `_version.py` created by some earlier build step + +For released software, the version identifier is closely related to a VCS +tag. Some projects use tag names that include more than just the version +string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool +needs to strip the tag prefix to extract the version identifier. For +unreleased software (between tags), the version identifier should provide +enough information to help developers recreate the same tree, while also +giving them an idea of roughly how old the tree is (after version 1.2, before +version 1.3). Many VCS systems can report a description that captures this, +for example `git describe --tags --dirty --always` reports things like +"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the +0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has +uncommitted changes. + +The version identifier is used for multiple purposes: + +* to allow the module to self-identify its version: `myproject.__version__` +* to choose a name and prefix for a 'setup.py sdist' tarball + +## Theory of Operation + +Versioneer works by adding a special `_version.py` file into your source +tree, where your `__init__.py` can import it. This `_version.py` knows how to +dynamically ask the VCS tool for version information at import time. + +`_version.py` also contains `$Revision$` markers, and the installation +process marks `_version.py` to have this marker rewritten with a tag name +during the `git archive` command. As a result, generated tarballs will +contain enough information to get the proper version. + +To allow `setup.py` to compute a version too, a `versioneer.py` is added to +the top level of your source tree, next to `setup.py` and the `setup.cfg` +that configures it. This overrides several distutils/setuptools commands to +compute the version when invoked, and changes `setup.py build` and `setup.py +sdist` to replace `_version.py` with a small static file that contains just +the generated version data. + +## Installation + +See [INSTALL.md](./INSTALL.md) for detailed installation instructions. + +## Version-String Flavors + +Code which uses Versioneer can learn about its version string at runtime by +importing `_version` from your main `__init__.py` file and running the +`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can +import the top-level `versioneer.py` and run `get_versions()`. + +Both functions return a dictionary with different flavors of version +information: + +* `['version']`: A condensed version string, rendered using the selected + style. This is the most commonly used value for the project's version + string. The default "pep440" style yields strings like `0.11`, + `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section + below for alternative styles. + +* `['full-revisionid']`: detailed revision identifier. For Git, this is the + full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". + +* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the + commit date in ISO 8601 format. This will be None if the date is not + available. + +* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that + this is only accurate if run in a VCS checkout, otherwise it is likely to + be False or None + +* `['error']`: if the version string could not be computed, this will be set + to a string describing the problem, otherwise it will be None. It may be + useful to throw an exception in setup.py if this is set, to avoid e.g. + creating tarballs with a version string of "unknown". + +Some variants are more useful than others. Including `full-revisionid` in a +bug report should allow developers to reconstruct the exact code being tested +(or indicate the presence of local changes that should be shared with the +developers). `version` is suitable for display in an "about" box or a CLI +`--version` output: it can be easily compared against release notes and lists +of bugs fixed in various releases. + +The installer adds the following text to your `__init__.py` to place a basic +version in `YOURPROJECT.__version__`: + + from cudf._version import get_versions + __version__ = get_versions()['version'] + del get_versions + +## Styles + +The setup.cfg `style=` configuration controls how the VCS information is +rendered into a version string. + +The default style, "pep440", produces a PEP440-compliant string, equal to the +un-prefixed tag name for actual releases, and containing an additional "local +version" section with more detail for in-between builds. For Git, this is +TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags +--dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the +tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and +that this commit is two revisions ("+2") beyond the "0.11" tag. For released +software (exactly equal to a known tag), the identifier will only contain the +stripped tag, e.g. "0.11". + +Other styles are available. See [details.md](details.md) in the Versioneer +source tree for descriptions. + +## Debugging + +Versioneer tries to avoid fatal errors: if something goes wrong, it will tend +to return a version of "0+unknown". To investigate the problem, run `setup.py +version`, which will run the version-lookup code in a verbose mode, and will +display the full contents of `get_versions()` (including the `error` string, +which may help identify what went wrong). + +## Known Limitations + +Some situations are known to cause problems for Versioneer. This details the +most significant ones. More can be found on Github +[issues page](https://github.com/warner/python-versioneer/issues). + +### Subprojects + +Versioneer has limited support for source trees in which `setup.py` is not in +the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are +two common reasons why `setup.py` might not be in the root: + +* Source trees which contain multiple subprojects, such as + [Buildbot](https://github.com/buildbot/buildbot), which contains both + "master" and "slave" subprojects, each with their own `setup.py`, + `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI + distributions (and upload multiple independently-installable tarballs). +* Source trees whose main purpose is to contain a C library, but which also + provide bindings to Python (and perhaps other langauges) in subdirectories. + +Versioneer will look for `.git` in parent directories, and most operations +should get the right version string. However `pip` and `setuptools` have bugs +and implementation details which frequently cause `pip install .` from a +subproject directory to fail to find a correct version string (so it usually +defaults to `0+unknown`). + +`pip install --editable .` should work correctly. `setup.py install` might +work too. + +Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in +some later version. + +[Bug #38](https://github.com/warner/python-versioneer/issues/38) is tracking +this issue. The discussion in +[PR #61](https://github.com/warner/python-versioneer/pull/61) describes the +issue from the Versioneer side in more detail. +[pip PR#3176](https://github.com/pypa/pip/pull/3176) and +[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve +pip to let Versioneer work correctly. + +Versioneer-0.16 and earlier only looked for a `.git` directory next to the +`setup.cfg`, so subprojects were completely unsupported with those releases. + +### Editable installs with setuptools <= 18.5 + +`setup.py develop` and `pip install --editable .` allow you to install a +project into a virtualenv once, then continue editing the source code (and +test) without re-installing after every change. + +"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a +convenient way to specify executable scripts that should be installed along +with the python package. + +These both work as expected when using modern setuptools. When using +setuptools-18.5 or earlier, however, certain operations will cause +`pkg_resources.DistributionNotFound` errors when running the entrypoint +script, which must be resolved by re-installing the package. This happens +when the install happens with one version, then the egg_info data is +regenerated while a different version is checked out. Many setup.py commands +cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into +a different virtualenv), so this can be surprising. + +[Bug #83](https://github.com/warner/python-versioneer/issues/83) describes +this one, but upgrading to a newer version of setuptools should probably +resolve it. + +### Unicode version strings + +While Versioneer works (and is continually tested) with both Python 2 and +Python 3, it is not entirely consistent with bytes-vs-unicode distinctions. +Newer releases probably generate unicode version strings on py2. It's not +clear that this is wrong, but it may be surprising for applications when then +write these strings to a network connection or include them in bytes-oriented +APIs like cryptographic checksums. + +[Bug #71](https://github.com/warner/python-versioneer/issues/71) investigates +this question. + + +## Updating Versioneer + +To upgrade your project to a new release of Versioneer, do the following: + +* install the new Versioneer (`pip install -U versioneer` or equivalent) +* edit `setup.cfg`, if necessary, to include any new configuration settings + indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details. +* re-run `versioneer install` in your source tree, to replace + `SRC/_version.py` +* commit any changed files + +## Future Directions + +This tool is designed to make it easily extended to other version-control +systems: all VCS-specific components are in separate directories like +src/git/ . The top-level `versioneer.py` script is assembled from these +components by running make-versioneer.py . In the future, make-versioneer.py +will take a VCS name as an argument, and will construct a version of +`versioneer.py` that is specific to the given VCS. It might also take the +configuration arguments that are currently provided manually during +installation by editing setup.py . Alternatively, it might go the other +direction and include code from all supported VCS systems, reducing the +number of intermediate scripts. + + +## License + +To make Versioneer easier to embed, all its code is dedicated to the public +domain. The `_version.py` that it creates is also in the public domain. +Specifically, both are released under the Creative Commons "Public Domain +Dedication" license (CC0-1.0), as described in +https://creativecommons.org/publicdomain/zero/1.0/ . + +""" + +from __future__ import print_function + +import errno +import json +import os +import re +import subprocess +import sys + +try: + import configparser +except ImportError: + import ConfigParser as configparser + + +class VersioneerConfig: + """Container for Versioneer configuration parameters.""" + + +def get_root(): + """Get the project root directory. + + We require that all commands are run from the project root, i.e. the + directory that contains setup.py, setup.cfg, and versioneer.py . + """ + root = os.path.realpath(os.path.abspath(os.getcwd())) + setup_py = os.path.join(root, "setup.py") + versioneer_py = os.path.join(root, "versioneer.py") + if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): + # allow 'python path/to/setup.py COMMAND' + root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) + setup_py = os.path.join(root, "setup.py") + versioneer_py = os.path.join(root, "versioneer.py") + if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): + err = ( + "Versioneer was unable to run the project root directory. " + "Versioneer requires setup.py to be executed from " + "its immediate directory (like 'python setup.py COMMAND'), " + "or in a way that lets it use sys.argv[0] to find the root " + "(like 'python path/to/setup.py COMMAND')." + ) + raise VersioneerBadRootError(err) + try: + # Certain runtime workflows (setup.py install/develop in a setuptools + # tree) execute all dependencies in a single python process, so + # "versioneer" may be imported multiple times, and python's shared + # module-import table will cache the first one. So we can't use + # os.path.dirname(__file__), as that will find whichever + # versioneer.py was first imported, even in later projects. + me = os.path.realpath(os.path.abspath(__file__)) + me_dir = os.path.normcase(os.path.splitext(me)[0]) + vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) + if me_dir != vsr_dir: + print( + "Warning: build in %s is using versioneer.py from %s" + % (os.path.dirname(me), versioneer_py) + ) + except NameError: + pass + return root + + +def get_config_from_root(root): + """Read the project setup.cfg file to determine Versioneer config.""" + # This might raise EnvironmentError (if setup.cfg is missing), or + # configparser.NoSectionError (if it lacks a [versioneer] section), or + # configparser.NoOptionError (if it lacks "VCS="). See the docstring at + # the top of versioneer.py for instructions on writing your setup.cfg . + setup_cfg = os.path.join(root, "setup.cfg") + parser = configparser.SafeConfigParser() + with open(setup_cfg, "r") as f: + parser.readfp(f) + VCS = parser.get("versioneer", "VCS") # mandatory + + def get(parser, name): + if parser.has_option("versioneer", name): + return parser.get("versioneer", name) + return None + + cfg = VersioneerConfig() + cfg.VCS = VCS + cfg.style = get(parser, "style") or "" + cfg.versionfile_source = get(parser, "versionfile_source") + cfg.versionfile_build = get(parser, "versionfile_build") + cfg.tag_prefix = get(parser, "tag_prefix") + if cfg.tag_prefix in ("''", '""'): + cfg.tag_prefix = "" + cfg.parentdir_prefix = get(parser, "parentdir_prefix") + cfg.verbose = get(parser, "verbose") + return cfg + + +class NotThisMethod(Exception): + """Exception raised if a method is not valid for the current scenario.""" + + +# these dictionaries contain VCS-specific tools +LONG_VERSION_PY = {} +HANDLERS = {} + + +def register_vcs_handler(vcs, method): # decorator + """Decorator to mark a method as the handler for a particular VCS.""" + + def decorate(f): + """Store f in HANDLERS[vcs][method].""" + if vcs not in HANDLERS: + HANDLERS[vcs] = {} + HANDLERS[vcs][method] = f + return f + + return decorate + + +def run_command( + commands, args, cwd=None, verbose=False, hide_stderr=False, env=None +): + """Call the given command(s).""" + assert isinstance(commands, list) + p = None + for c in commands: + try: + dispcmd = str([c] + args) + # remember shell=False, so use git.cmd on windows, not just git + p = subprocess.Popen( + [c] + args, + cwd=cwd, + env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr else None), + ) + break + except EnvironmentError: + e = sys.exc_info()[1] + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %s" % dispcmd) + print(e) + return None, None + else: + if verbose: + print("unable to find command, tried %s" % (commands,)) + return None, None + stdout = p.communicate()[0].strip() + if sys.version_info[0] >= 3: + stdout = stdout.decode() + if p.returncode != 0: + if verbose: + print("unable to run %s (error)" % dispcmd) + print("stdout was %s" % stdout) + return None, p.returncode + return stdout, p.returncode + + +LONG_VERSION_PY[ + "git" +] = r''' +# This file helps to compute a version number in source trees obtained from +# git-archive tarball (such as those provided by githubs download-from-tag +# feature). Distribution tarballs (built by setup.py sdist) and build +# directories (produced by setup.py build) will contain a much shorter file +# that just contains the computed version number. + +# This file is released into the public domain. Generated by +# versioneer-0.18 (https://github.com/warner/python-versioneer) + +"""Git implementation of _version.py.""" + +import errno +import os +import re +import subprocess +import sys + + +def get_keywords(): + """Get the keywords needed to look up the version information.""" + # these strings will be replaced by git during git-archive. + # setup.py/versioneer.py will grep for the variable names, so they must + # each be defined on a line of their own. _version.py will just call + # get_keywords(). + git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" + git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" + git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" + keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} + return keywords + + +class VersioneerConfig: + """Container for Versioneer configuration parameters.""" + + +def get_config(): + """Create, populate and return the VersioneerConfig() object.""" + # these strings are filled in when 'setup.py versioneer' creates + # _version.py + cfg = VersioneerConfig() + cfg.VCS = "git" + cfg.style = "%(STYLE)s" + cfg.tag_prefix = "%(TAG_PREFIX)s" + cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" + cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" + cfg.verbose = False + return cfg + + +class NotThisMethod(Exception): + """Exception raised if a method is not valid for the current scenario.""" + + +LONG_VERSION_PY = {} +HANDLERS = {} + + +def register_vcs_handler(vcs, method): # decorator + """Decorator to mark a method as the handler for a particular VCS.""" + def decorate(f): + """Store f in HANDLERS[vcs][method].""" + if vcs not in HANDLERS: + HANDLERS[vcs] = {} + HANDLERS[vcs][method] = f + return f + return decorate + + +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, + env=None): + """Call the given command(s).""" + assert isinstance(commands, list) + p = None + for c in commands: + try: + dispcmd = str([c] + args) + # remember shell=False, so use git.cmd on windows, not just git + p = subprocess.Popen([c] + args, cwd=cwd, env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None)) + break + except EnvironmentError: + e = sys.exc_info()[1] + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %%s" %% dispcmd) + print(e) + return None, None + else: + if verbose: + print("unable to find command, tried %%s" %% (commands,)) + return None, None + stdout = p.communicate()[0].strip() + if sys.version_info[0] >= 3: + stdout = stdout.decode() + if p.returncode != 0: + if verbose: + print("unable to run %%s (error)" %% dispcmd) + print("stdout was %%s" %% stdout) + return None, p.returncode + return stdout, p.returncode + + +def versions_from_parentdir(parentdir_prefix, root, verbose): + """Try to determine the version from the parent directory name. + + Source tarballs conventionally unpack into a directory that includes both + the project name and a version string. We will also support searching up + two directory levels for an appropriately named parent directory + """ + rootdirs = [] + + for i in range(3): + dirname = os.path.basename(root) + if dirname.startswith(parentdir_prefix): + return {"version": dirname[len(parentdir_prefix):], + "full-revisionid": None, + "dirty": False, "error": None, "date": None} + else: + rootdirs.append(root) + root = os.path.dirname(root) # up a level + + if verbose: + print("Tried directories %%s but none started with prefix %%s" %% + (str(rootdirs), parentdir_prefix)) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs): + """Extract version information from the given file.""" + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords = {} + try: + f = open(versionfile_abs, "r") + for line in f.readlines(): + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + f.close() + except EnvironmentError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords(keywords, tag_prefix, verbose): + """Get version information from git keywords.""" + if not keywords: + raise NotThisMethod("no keywords at all, weird") + date = keywords.get("date") + if date is not None: + # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant + # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 + # -like" string, which we must then edit to make compliant), because + # it's been around since git-1.5.3, and it's too difficult to + # discover which version we're using, or to work around using an + # older one. + date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = set([r.strip() for r in refnames.strip("()").split(",")]) + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %%d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = set([r for r in refs if re.search(r'\d', r)]) + if verbose: + print("discarding '%%s', no digits" %% ",".join(refs - tags)) + if verbose: + print("likely tags: %%s" %% ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix):] + if verbose: + print("picking %%s" %% r) + return {"version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": None, + "date": date} + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return {"version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": "no suitable tags", "date": None} + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): + """Get version from 'git describe' in the root of the source tree. + + This only gets called if the git-archive 'subst' keywords were *not* + expanded, and _version.py hasn't already been rewritten with a short + version string, meaning we're inside a checked out source tree. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + + out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, + hide_stderr=True) + if rc != 0: + if verbose: + print("Directory %%s not under git control" %% root) + raise NotThisMethod("'git rev-parse --git-dir' returned error") + + # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] + # if there isn't one, this yields HEX[-dirty] (no NUM) + describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", + "--always", "--long", + "--match", "%%s*" %% tag_prefix], + cwd=root) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[:git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + if not mo: + # unparseable. Maybe git-describe is misbehaving? + pieces["error"] = ("unable to parse git-describe output: '%%s'" + %% describe_out) + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%%s' doesn't start with prefix '%%s'" + print(fmt %% (full_tag, tag_prefix)) + pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" + %% (full_tag, tag_prefix)) + return pieces + pieces["closest-tag"] = full_tag[len(tag_prefix):] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], + cwd=root) + pieces["distance"] = int(count_out) # total number of commits + + # commit date: see ISO-8601 comment in git_versions_from_keywords() + date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"], + cwd=root)[0].strip() + pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + + return pieces + + +def plus_or_dot(pieces): + """Return a + if we don't already have one, else return a .""" + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces): + """Build up version string, with post-release "local version identifier". + + Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + Exceptions: + 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_pre(pieces): + """TAG[.post.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post.devDISTANCE + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += ".post.dev%%d" %% pieces["distance"] + else: + # exception #1 + rendered = "0.post.dev%%d" %% pieces["distance"] + return rendered + + +def render_pep440_post(pieces): + """TAG[.postDISTANCE[.dev0]+gHEX] . + + The ".dev0" means dirty. Note that .dev0 sorts backwards + (a dirty tree will appear "older" than the corresponding clean one), + but you shouldn't be releasing software with -dirty anyways. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%%d" %% pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%%s" %% pieces["short"] + else: + # exception #1 + rendered = "0.post%%d" %% pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += "+g%%s" %% pieces["short"] + return rendered + + +def render_pep440_old(pieces): + """TAG[.postDISTANCE[.dev0]] . + + The ".dev0" means dirty. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%%d" %% pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = "0.post%%d" %% pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces): + """TAG[-DISTANCE-gHEX][-dirty]. + + Like 'git describe --tags --dirty --always'. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces): + """TAG-DISTANCE-gHEX[-dirty]. + + Like 'git describe --tags --dirty --always -long'. + The distance/hash is unconditional. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces, style): + """Render the given version pieces into the requested style.""" + if pieces["error"]: + return {"version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None} + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError("unknown style '%%s'" %% style) + + return {"version": rendered, "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], "error": None, + "date": pieces.get("date")} + + +def get_versions(): + """Get version information or return default if unable to do so.""" + # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have + # __file__, we can work backwards from there to the root. Some + # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which + # case we can only use expanded keywords. + + cfg = get_config() + verbose = cfg.verbose + + try: + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, + verbose) + except NotThisMethod: + pass + + try: + root = os.path.realpath(__file__) + # versionfile_source is the relative path from the top of the source + # tree (where the .git directory might live) to this file. Invert + # this to find the root from __file__. + for i in cfg.versionfile_source.split('/'): + root = os.path.dirname(root) + except NameError: + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None} + + try: + pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) + return render(pieces, cfg.style) + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + except NotThisMethod: + pass + + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", "date": None} +''' + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs): + """Extract version information from the given file.""" + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords = {} + try: + f = open(versionfile_abs, "r") + for line in f.readlines(): + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + f.close() + except EnvironmentError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords(keywords, tag_prefix, verbose): + """Get version information from git keywords.""" + if not keywords: + raise NotThisMethod("no keywords at all, weird") + date = keywords.get("date") + if date is not None: + # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant + # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 + # -like" string, which we must then edit to make compliant), because + # it's been around since git-1.5.3, and it's too difficult to + # discover which version we're using, or to work around using an + # older one. + date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = set([r.strip() for r in refnames.strip("()").split(",")]) + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = set([r for r in refs if re.search(r"\d", r)]) + if verbose: + print("discarding '%s', no digits" % ",".join(refs - tags)) + if verbose: + print("likely tags: %s" % ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix) :] + if verbose: + print("picking %s" % r) + return { + "version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": None, + "date": date, + } + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return { + "version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": "no suitable tags", + "date": None, + } + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): + """Get version from 'git describe' in the root of the source tree. + + This only gets called if the git-archive 'subst' keywords were *not* + expanded, and _version.py hasn't already been rewritten with a short + version string, meaning we're inside a checked out source tree. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + + out, rc = run_command( + GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True + ) + if rc != 0: + if verbose: + print("Directory %s not under git control" % root) + raise NotThisMethod("'git rev-parse --git-dir' returned error") + + # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] + # if there isn't one, this yields HEX[-dirty] (no NUM) + describe_out, rc = run_command( + GITS, + [ + "describe", + "--tags", + "--dirty", + "--always", + "--long", + "--match", + "%s*" % tag_prefix, + ], + cwd=root, + ) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[: git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) + if not mo: + # unparseable. Maybe git-describe is misbehaving? + pieces["error"] = ( + "unable to parse git-describe output: '%s'" % describe_out + ) + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%s' doesn't start with prefix '%s'" + print(fmt % (full_tag, tag_prefix)) + pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + full_tag, + tag_prefix, + ) + return pieces + pieces["closest-tag"] = full_tag[len(tag_prefix) :] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + count_out, rc = run_command( + GITS, ["rev-list", "HEAD", "--count"], cwd=root + ) + pieces["distance"] = int(count_out) # total number of commits + + # commit date: see ISO-8601 comment in git_versions_from_keywords() + date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[ + 0 + ].strip() + pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + + return pieces + + +def do_vcs_install(manifest_in, versionfile_source, ipy): + """Git-specific installation logic for Versioneer. + + For Git, this means creating/changing .gitattributes to mark _version.py + for export-subst keyword substitution. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + files = [manifest_in, versionfile_source] + if ipy: + files.append(ipy) + try: + me = __file__ + if me.endswith(".pyc") or me.endswith(".pyo"): + me = os.path.splitext(me)[0] + ".py" + versioneer_file = os.path.relpath(me) + except NameError: + versioneer_file = "versioneer.py" + files.append(versioneer_file) + present = False + try: + f = open(".gitattributes", "r") + for line in f.readlines(): + if line.strip().startswith(versionfile_source): + if "export-subst" in line.strip().split()[1:]: + present = True + f.close() + except EnvironmentError: + pass + if not present: + f = open(".gitattributes", "a+") + f.write("%s export-subst\n" % versionfile_source) + f.close() + files.append(".gitattributes") + run_command(GITS, ["add", "--"] + files) + + +def versions_from_parentdir(parentdir_prefix, root, verbose): + """Try to determine the version from the parent directory name. + + Source tarballs conventionally unpack into a directory that includes both + the project name and a version string. We will also support searching up + two directory levels for an appropriately named parent directory + """ + rootdirs = [] + + for i in range(3): + dirname = os.path.basename(root) + if dirname.startswith(parentdir_prefix): + return { + "version": dirname[len(parentdir_prefix) :], + "full-revisionid": None, + "dirty": False, + "error": None, + "date": None, + } + else: + rootdirs.append(root) + root = os.path.dirname(root) # up a level + + if verbose: + print( + "Tried directories %s but none started with prefix %s" + % (str(rootdirs), parentdir_prefix) + ) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + + +SHORT_VERSION_PY = """ +# This file was generated by 'versioneer.py' (0.18) from +# revision-control system data, or from the parent directory name of an +# unpacked source archive. Distribution tarballs contain a pre-generated copy +# of this file. + +import json + +version_json = ''' +%s +''' # END VERSION_JSON + + +def get_versions(): + return json.loads(version_json) +""" + + +def versions_from_file(filename): + """Try to determine the version from _version.py if present.""" + try: + with open(filename) as f: + contents = f.read() + except EnvironmentError: + raise NotThisMethod("unable to read _version.py") + mo = re.search( + r"version_json = '''\n(.*)''' # END VERSION_JSON", + contents, + re.M | re.S, + ) + if not mo: + mo = re.search( + r"version_json = '''\r\n(.*)''' # END VERSION_JSON", + contents, + re.M | re.S, + ) + if not mo: + raise NotThisMethod("no version_json in _version.py") + return json.loads(mo.group(1)) + + +def write_to_version_file(filename, versions): + """Write the given version number to the given _version.py file.""" + os.unlink(filename) + contents = json.dumps( + versions, sort_keys=True, indent=1, separators=(",", ": ") + ) + with open(filename, "w") as f: + f.write(SHORT_VERSION_PY % contents) + + print("set %s to '%s'" % (filename, versions["version"])) + + +def plus_or_dot(pieces): + """Return a + if we don't already have one, else return a .""" + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces): + """Build up version string, with post-release "local version identifier". + + Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + Exceptions: + 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_pre(pieces): + """TAG[.post.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post.devDISTANCE + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += ".post.dev%d" % pieces["distance"] + else: + # exception #1 + rendered = "0.post.dev%d" % pieces["distance"] + return rendered + + +def render_pep440_post(pieces): + """TAG[.postDISTANCE[.dev0]+gHEX] . + + The ".dev0" means dirty. Note that .dev0 sorts backwards + (a dirty tree will appear "older" than the corresponding clean one), + but you shouldn't be releasing software with -dirty anyways. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + return rendered + + +def render_pep440_old(pieces): + """TAG[.postDISTANCE[.dev0]] . + + The ".dev0" means dirty. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces): + """TAG[-DISTANCE-gHEX][-dirty]. + + Like 'git describe --tags --dirty --always'. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces): + """TAG-DISTANCE-gHEX[-dirty]. + + Like 'git describe --tags --dirty --always -long'. + The distance/hash is unconditional. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces, style): + """Render the given version pieces into the requested style.""" + if pieces["error"]: + return { + "version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None, + } + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError("unknown style '%s'" % style) + + return { + "version": rendered, + "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], + "error": None, + "date": pieces.get("date"), + } + + +class VersioneerBadRootError(Exception): + """The project root directory is unknown or missing key files.""" + + +def get_versions(verbose=False): + """Get the project version from whatever source is available. + + Returns dict with two keys: 'version' and 'full'. + """ + if "versioneer" in sys.modules: + # see the discussion in cmdclass.py:get_cmdclass() + del sys.modules["versioneer"] + + root = get_root() + cfg = get_config_from_root(root) + + assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" + handlers = HANDLERS.get(cfg.VCS) + assert handlers, "unrecognized VCS '%s'" % cfg.VCS + verbose = verbose or cfg.verbose + assert ( + cfg.versionfile_source is not None + ), "please set versioneer.versionfile_source" + assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" + + versionfile_abs = os.path.join(root, cfg.versionfile_source) + + # extract version from first of: _version.py, VCS command (e.g. 'git + # describe'), parentdir. This is meant to work for developers using a + # source checkout, for users of a tarball created by 'setup.py sdist', + # and for users of a tarball/zipball created by 'git archive' or github's + # download-from-tag feature or the equivalent in other VCSes. + + get_keywords_f = handlers.get("get_keywords") + from_keywords_f = handlers.get("keywords") + if get_keywords_f and from_keywords_f: + try: + keywords = get_keywords_f(versionfile_abs) + ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) + if verbose: + print("got version from expanded keyword %s" % ver) + return ver + except NotThisMethod: + pass + + try: + ver = versions_from_file(versionfile_abs) + if verbose: + print("got version from file %s %s" % (versionfile_abs, ver)) + return ver + except NotThisMethod: + pass + + from_vcs_f = handlers.get("pieces_from_vcs") + if from_vcs_f: + try: + pieces = from_vcs_f(cfg.tag_prefix, root, verbose) + ver = render(pieces, cfg.style) + if verbose: + print("got version from VCS %s" % ver) + return ver + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + if verbose: + print("got version from parentdir %s" % ver) + return ver + except NotThisMethod: + pass + + if verbose: + print("unable to compute version") + + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", + "date": None, + } + + +def get_version(): + """Get the short version string for this project.""" + return get_versions()["version"] + + +def get_cmdclass(): + """Get the custom setuptools/distutils subclasses used by Versioneer.""" + if "versioneer" in sys.modules: + del sys.modules["versioneer"] + # this fixes the "python setup.py develop" case (also 'install' and + # 'easy_install .'), in which subdependencies of the main project are + # built (using setup.py bdist_egg) in the same python process. Assume + # a main project A and a dependency B, which use different versions + # of Versioneer. A's setup.py imports A's Versioneer, leaving it in + # sys.modules by the time B's setup.py is executed, causing B to run + # with the wrong versioneer. Setuptools wraps the sub-dep builds in a + # sandbox that restores sys.modules to it's pre-build state, so the + # parent is protected against the child's "import versioneer". By + # removing ourselves from sys.modules here, before the child build + # happens, we protect the child from the parent's versioneer too. + # Also see https://github.com/warner/python-versioneer/issues/52 + + cmds = {} + + # we add "version" to both distutils and setuptools + from distutils.core import Command + + class cmd_version(Command): + description = "report generated version string" + user_options = [] + boolean_options = [] + + def initialize_options(self): + pass + + def finalize_options(self): + pass + + def run(self): + vers = get_versions(verbose=True) + print("Version: %s" % vers["version"]) + print(" full-revisionid: %s" % vers.get("full-revisionid")) + print(" dirty: %s" % vers.get("dirty")) + print(" date: %s" % vers.get("date")) + if vers["error"]: + print(" error: %s" % vers["error"]) + + cmds["version"] = cmd_version + + # we override "build_py" in both distutils and setuptools + # + # most invocation pathways end up running build_py: + # distutils/build -> build_py + # distutils/install -> distutils/build ->.. + # setuptools/bdist_wheel -> distutils/install ->.. + # setuptools/bdist_egg -> distutils/install_lib -> build_py + # setuptools/install -> bdist_egg ->.. + # setuptools/develop -> ? + # pip install: + # copies source tree to a tempdir before running egg_info/etc + # if .git isn't copied too, 'git describe' will fail + # then does setup.py bdist_wheel, or sometimes setup.py install + # setup.py egg_info -> ? + + # we override different "build_py" commands for both environments + if "setuptools" in sys.modules: + from setuptools.command.build_py import build_py as _build_py + else: + from distutils.command.build_py import build_py as _build_py + + class cmd_build_py(_build_py): + def run(self): + root = get_root() + cfg = get_config_from_root(root) + versions = get_versions() + _build_py.run(self) + # now locate _version.py in the new build/ directory and replace + # it with an updated value + if cfg.versionfile_build: + target_versionfile = os.path.join( + self.build_lib, cfg.versionfile_build + ) + print("UPDATING %s" % target_versionfile) + write_to_version_file(target_versionfile, versions) + + cmds["build_py"] = cmd_build_py + + if "cx_Freeze" in sys.modules: # cx_freeze enabled? + from cx_Freeze.dist import build_exe as _build_exe + + # nczeczulin reports that py2exe won't like the pep440-style string + # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. + # setup(console=[{ + # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION + # "product_version": versioneer.get_version(), + # ... + + class cmd_build_exe(_build_exe): + def run(self): + root = get_root() + cfg = get_config_from_root(root) + versions = get_versions() + target_versionfile = cfg.versionfile_source + print("UPDATING %s" % target_versionfile) + write_to_version_file(target_versionfile, versions) + + _build_exe.run(self) + os.unlink(target_versionfile) + with open(cfg.versionfile_source, "w") as f: + LONG = LONG_VERSION_PY[cfg.VCS] + f.write( + LONG + % { + "DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + } + ) + + cmds["build_exe"] = cmd_build_exe + del cmds["build_py"] + + if "py2exe" in sys.modules: # py2exe enabled? + try: + from py2exe.distutils_buildexe import py2exe as _py2exe # py3 + except ImportError: + from py2exe.build_exe import py2exe as _py2exe # py2 + + class cmd_py2exe(_py2exe): + def run(self): + root = get_root() + cfg = get_config_from_root(root) + versions = get_versions() + target_versionfile = cfg.versionfile_source + print("UPDATING %s" % target_versionfile) + write_to_version_file(target_versionfile, versions) + + _py2exe.run(self) + os.unlink(target_versionfile) + with open(cfg.versionfile_source, "w") as f: + LONG = LONG_VERSION_PY[cfg.VCS] + f.write( + LONG + % { + "DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + } + ) + + cmds["py2exe"] = cmd_py2exe + + # we override different "sdist" commands for both environments + if "setuptools" in sys.modules: + from setuptools.command.sdist import sdist as _sdist + else: + from distutils.command.sdist import sdist as _sdist + + class cmd_sdist(_sdist): + def run(self): + versions = get_versions() + self._versioneer_generated_versions = versions + # unless we update this, the command will keep using the old + # version + self.distribution.metadata.version = versions["version"] + return _sdist.run(self) + + def make_release_tree(self, base_dir, files): + root = get_root() + cfg = get_config_from_root(root) + _sdist.make_release_tree(self, base_dir, files) + # now locate _version.py in the new base_dir directory + # (remembering that it may be a hardlink) and replace it with an + # updated value + target_versionfile = os.path.join(base_dir, cfg.versionfile_source) + print("UPDATING %s" % target_versionfile) + write_to_version_file( + target_versionfile, self._versioneer_generated_versions + ) + + cmds["sdist"] = cmd_sdist + + return cmds + + +CONFIG_ERROR = """ +setup.cfg is missing the necessary Versioneer configuration. You need +a section like: + + [versioneer] + VCS = git + style = pep440 + versionfile_source = src/myproject/_version.py + versionfile_build = myproject/_version.py + tag_prefix = + parentdir_prefix = myproject- + +You will also need to edit your setup.py to use the results: + + import versioneer + setup(version=versioneer.get_version(), + cmdclass=versioneer.get_cmdclass(), ...) + +Please read the docstring in ./versioneer.py for configuration instructions, +edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. +""" + +SAMPLE_CONFIG = """ +# See the docstring in versioneer.py for instructions. Note that you must +# re-run 'versioneer.py setup' after changing this section, and commit the +# resulting files. + +[versioneer] +#VCS = git +#style = pep440 +#versionfile_source = +#versionfile_build = +#tag_prefix = +#parentdir_prefix = + +""" + +INIT_PY_SNIPPET = """ +from cudf._version import get_versions +__version__ = get_versions()['version'] +del get_versions +""" + + +def do_setup(): + """Main VCS-independent setup function for installing Versioneer.""" + root = get_root() + try: + cfg = get_config_from_root(root) + except ( + EnvironmentError, + configparser.NoSectionError, + configparser.NoOptionError, + ) as e: + if isinstance(e, (EnvironmentError, configparser.NoSectionError)): + print( + "Adding sample versioneer config to setup.cfg", file=sys.stderr + ) + with open(os.path.join(root, "setup.cfg"), "a") as f: + f.write(SAMPLE_CONFIG) + print(CONFIG_ERROR, file=sys.stderr) + return 1 + + print(" creating %s" % cfg.versionfile_source) + with open(cfg.versionfile_source, "w") as f: + LONG = LONG_VERSION_PY[cfg.VCS] + f.write( + LONG + % { + "DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + } + ) + + ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") + if os.path.exists(ipy): + try: + with open(ipy, "r") as f: + old = f.read() + except EnvironmentError: + old = "" + if INIT_PY_SNIPPET not in old: + print(" appending to %s" % ipy) + with open(ipy, "a") as f: + f.write(INIT_PY_SNIPPET) + else: + print(" %s unmodified" % ipy) + else: + print(" %s doesn't exist, ok" % ipy) + ipy = None + + # Make sure both the top-level "versioneer.py" and versionfile_source + # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so + # they'll be copied into source distributions. Pip won't be able to + # install the package without this. + manifest_in = os.path.join(root, "MANIFEST.in") + simple_includes = set() + try: + with open(manifest_in, "r") as f: + for line in f: + if line.startswith("include "): + for include in line.split()[1:]: + simple_includes.add(include) + except EnvironmentError: + pass + # That doesn't cover everything MANIFEST.in can do + # (http://docs.python.org/2/distutils/sourcedist.html#commands), so + # it might give some false negatives. Appending redundant 'include' + # lines is safe, though. + if "versioneer.py" not in simple_includes: + print(" appending 'versioneer.py' to MANIFEST.in") + with open(manifest_in, "a") as f: + f.write("include versioneer.py\n") + else: + print(" 'versioneer.py' already in MANIFEST.in") + if cfg.versionfile_source not in simple_includes: + print( + " appending versionfile_source ('%s') to MANIFEST.in" + % cfg.versionfile_source + ) + with open(manifest_in, "a") as f: + f.write("include %s\n" % cfg.versionfile_source) + else: + print(" versionfile_source already in MANIFEST.in") + + # Make VCS-specific changes. For git, this means creating/changing + # .gitattributes to mark _version.py for export-subst keyword + # substitution. + do_vcs_install(manifest_in, cfg.versionfile_source, ipy) + return 0 + + +def scan_setup_py(): + """Validate the contents of setup.py against Versioneer's expectations.""" + found = set() + setters = False + errors = 0 + with open("setup.py", "r") as f: + for line in f.readlines(): + if "import versioneer" in line: + found.add("import") + if "versioneer.get_cmdclass()" in line: + found.add("cmdclass") + if "versioneer.get_version()" in line: + found.add("get_version") + if "versioneer.VCS" in line: + setters = True + if "versioneer.versionfile_source" in line: + setters = True + if len(found) != 3: + print("") + print("Your setup.py appears to be missing some important items") + print("(but I might be wrong). Please make sure it has something") + print("roughly like the following:") + print("") + print(" import versioneer") + print(" setup( version=versioneer.get_version(),") + print(" cmdclass=versioneer.get_cmdclass(), ...)") + print("") + errors += 1 + if setters: + print("You should remove lines like 'versioneer.VCS = ' and") + print("'versioneer.versionfile_source = ' . This configuration") + print("now lives in setup.cfg, and should be removed from setup.py") + print("") + errors += 1 + return errors + + +if __name__ == "__main__": + cmd = sys.argv[1] + if cmd == "setup": + errors = do_setup() + errors += scan_setup_py() + if errors: + sys.exit(1) diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index 52490444dba..cef44dddd03 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -9,4 +9,6 @@ requires = [ "scikit-build>=0.13.1", "cmake>=3.23.1", "ninja", + "numpy", + "pyarrow==8.0.0", ] diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 58286da42fe..2c012afa65b 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -5,6 +5,7 @@ import shutil import subprocess import sys +import platform from distutils.spawn import find_executable import versioneer @@ -23,6 +24,7 @@ "pandas>=1.0,<1.6.0dev0", "protobuf>=3.20.1,<3.21.0a0", "typing_extensions", + "pyarrow==8.0.0", ] extras_require = { @@ -77,11 +79,15 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): raise OSError(f"Invalid CUDA_HOME: directory does not exist: {CUDA_HOME}") cuda_include_dir = os.path.join(CUDA_HOME, "include") -install_requires.append( - "cupy-cuda" - + get_cuda_version_from_header(cuda_include_dir) - + ">=9.5.0,<12.0.0a0" -) + +myplat = platform.machine() + +if myplat == 'x86_64': + install_requires.append( + "cupy-cuda" + + get_cuda_version_from_header(cuda_include_dir) + + ">=9.5.0,<11.0.0a0" + ) class build_ext_and_proto(build_ext): @@ -128,7 +134,7 @@ def run(self): cmdclass["build_ext"] = build_ext_and_proto setup( - name="cudf", + name="cudf"+os.getenv("PYTHON_PACKAGE_CUDA_SUFFIX", default=""), version=versioneer.get_version(), description="cuDF - GPU Dataframe", url="https://github.com/rapidsai/cudf", diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py index a31a058eb55..50fb1d515ce 100644 --- a/python/dask_cudf/setup.py +++ b/python/dask_cudf/setup.py @@ -3,12 +3,12 @@ import os import re import shutil +import platform import versioneer from setuptools import find_packages, setup install_requires = [ - "cudf", "dask>=2022.7.1", "distributed>=2022.7.1", "fsspec>=0.6.0", @@ -64,15 +64,19 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): raise OSError(f"Invalid CUDA_HOME: directory does not exist: {CUDA_HOME}") cuda_include_dir = os.path.join(CUDA_HOME, "include") -install_requires.append( - "cupy-cuda" - + get_cuda_version_from_header(cuda_include_dir) - + ">=9.5.0,<12.0.0a0" -) + +myplat = platform.machine() + +if myplat == 'x86_64': + install_requires.append( + "cupy-cuda" + + get_cuda_version_from_header(cuda_include_dir) + + ">=9.5.0,<11.0.0a0" + ) setup( - name="dask-cudf", + name="dask-cudf"+os.getenv("PYTHON_PACKAGE_CUDA_SUFFIX", default=""), version=versioneer.get_version(), description="Utilities for Dask and cuDF interactions", url="https://github.com/rapidsai/cudf", From 65e2fe404e839fbf85fc8cec5e1f4149ded88b58 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Fri, 19 Aug 2022 15:19:44 -0400 Subject: [PATCH 002/142] Install cupy for cudf smoke test --- .github/workflows/wheels.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 9825dbeaadf..1e40fb1ba7a 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -80,7 +80,7 @@ jobs: cibw-before-all: "apt-get install -y protobuf-compiler && git clone https://github.com/dmlc/dlpack -b v0.5 /opt/dlpack && cp -r /opt/dlpack/include/dlpack /usr/include/dlpack && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cuda11 libcudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" skbuild-configure-options: "-DFIND_CUDF_CPP=ON" - gpu-smoketest-before: "pip install requests && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + gpu-smoketest-before: "pip install requests && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920);" secrets: inherit cudf-wheel-arm64-39: @@ -94,6 +94,6 @@ jobs: cibw-before-all: "apt-get install -y protobuf-compiler && git clone https://github.com/dmlc/dlpack -b v0.5 /opt/dlpack && cp -r /opt/dlpack/include/dlpack /usr/include/dlpack && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cuda11 libcudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" skbuild-configure-options: "-DFIND_CUDF_CPP=ON" - gpu-smoketest-before: "pip install requests && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + gpu-smoketest-before: "pip install requests && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920);" secrets: inherit From 80b5f941c92668688d79650fbed86d3f62526de1 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Wed, 24 Aug 2022 16:05:08 -0400 Subject: [PATCH 003/142] Switch to pyarrow==9.0.0 --- python/cudf/pyproject.toml | 2 +- python/cudf/setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index cef44dddd03..eac267f2ce4 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -10,5 +10,5 @@ requires = [ "cmake>=3.23.1", "ninja", "numpy", - "pyarrow==8.0.0", + "pyarrow==9.0.0", ] diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 2c012afa65b..d0c213aaf5c 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -24,7 +24,7 @@ "pandas>=1.0,<1.6.0dev0", "protobuf>=3.20.1,<3.21.0a0", "typing_extensions", - "pyarrow==8.0.0", + "pyarrow==9.0.0", ] extras_require = { From b9278abf23dae612d66466b1099504ae46c5f5c2 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Thu, 25 Aug 2022 09:51:53 -0400 Subject: [PATCH 004/142] Add pyarrow smoke test --- .github/workflows/wheels.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 1e40fb1ba7a..721f84c19a9 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -31,7 +31,7 @@ jobs: cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cuda11 libcudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" skbuild-configure-options: "-DFIND_CUDF_CPP=ON" gpu-smoketest-before: "pip install requests && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" - gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920);" + gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" secrets: inherit dask_cudf-wheel: needs: cudf-wheel-amd64-38 @@ -39,8 +39,8 @@ jobs: with: package-name: dask_cudf package-dir: python/dask_cudf - gpu-smoketest: "import dask_cudf; print(dask_cudf)" gpu-smoketest-before: "pip install rmm-cuda11 cudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + gpu-smoketest: "import dask_cudf; print(dask_cudf)" secrets: inherit cudf-wheel-amd64-39: needs: libcudf-wheel-amd64 @@ -54,7 +54,7 @@ jobs: cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cuda11 libcudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" skbuild-configure-options: "-DFIND_CUDF_CPP=ON" gpu-smoketest-before: "pip install requests && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" - gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920);" + gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" secrets: inherit libcudf-wheel-arm64: uses: rapidsai/shared-action-workflows/.github/workflows/wheels-arm64.yml@feat/wheel-ci-actions @@ -81,7 +81,7 @@ jobs: cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cuda11 libcudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" skbuild-configure-options: "-DFIND_CUDF_CPP=ON" gpu-smoketest-before: "pip install requests && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" - gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920);" + gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" secrets: inherit cudf-wheel-arm64-39: needs: libcudf-wheel-arm64 @@ -95,5 +95,5 @@ jobs: cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cuda11 libcudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" skbuild-configure-options: "-DFIND_CUDF_CPP=ON" gpu-smoketest-before: "pip install requests && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" - gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920);" + gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" secrets: inherit From 2f4a1f592c20b445493fc9b67f5caaecc9335a58 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Thu, 25 Aug 2022 11:57:54 -0400 Subject: [PATCH 005/142] Unit tests during cibuildwheel --- .github/workflows/wheels.yml | 131 ++++++++++++++++++----------------- 1 file changed, 67 insertions(+), 64 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 721f84c19a9..ccc430345b5 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -29,71 +29,74 @@ jobs: cibw-environment: "PYTHON_PACKAGE_CUDA_SUFFIX='-cuda11'" cibw-before-all: "apt-get install -y protobuf-compiler && git clone https://github.com/dmlc/dlpack -b v0.5 /opt/dlpack && cp -r /opt/dlpack/include/dlpack /usr/include/dlpack && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cuda11 libcudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + cibw-test-requires: "cupy-cuda115" + cibw-test-extras: "test" + cibw-test-command: "pytest -v {package}/cudf/tests" skbuild-configure-options: "-DFIND_CUDF_CPP=ON" gpu-smoketest-before: "pip install requests && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" secrets: inherit - dask_cudf-wheel: - needs: cudf-wheel-amd64-38 - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions - with: - package-name: dask_cudf - package-dir: python/dask_cudf - gpu-smoketest-before: "pip install rmm-cuda11 cudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" - gpu-smoketest: "import dask_cudf; print(dask_cudf)" - secrets: inherit - cudf-wheel-amd64-39: - needs: libcudf-wheel-amd64 - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-amd64.yml@feat/wheel-ci-actions - with: - package-name: cudf - package-dir: python/cudf - python-version: "3.9" - cibw-environment: "PYTHON_PACKAGE_CUDA_SUFFIX='-cuda11'" - cibw-before-all: "apt-get install -y protobuf-compiler && git clone https://github.com/dmlc/dlpack -b v0.5 /opt/dlpack && cp -r /opt/dlpack/include/dlpack /usr/include/dlpack && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cuda11 libcudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" - skbuild-configure-options: "-DFIND_CUDF_CPP=ON" - gpu-smoketest-before: "pip install requests && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" - gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" - secrets: inherit - libcudf-wheel-arm64: - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-arm64.yml@feat/wheel-ci-actions - with: - package-name: libcudf_cuda11 - package-dir: cpp - python-version: "3.8" - cibw-environment: "PYTHON_PACKAGE_CUDA_SUFFIX='-cuda11'" - cibw-before-all: "apt-get install -y libzstd-dev liblz4-dev libboost-regex-dev libboost-system-dev libboost-filesystem-dev libsnappy-dev libprotobuf-dev libprotoc-dev protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - cibw-before-build: "pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" - skbuild-configure-options: '-DBUILD_TESTS=OFF -DBUILD_BENCHMARKS=OFF -DDISABLE_DEPRECATION_WARNING=ON -DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_ORC=ON -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON' - gpu-smoketest: "" - auditwheel-repair-override: "cp {wheel} {dest_dir}" - secrets: inherit - cudf-wheel-arm64-38: - needs: libcudf-wheel-arm64 - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-arm64.yml@feat/wheel-ci-actions - with: - package-name: cudf - package-dir: python/cudf - python-version: "3.8" - cibw-environment: "PYTHON_PACKAGE_CUDA_SUFFIX='-cuda11'" - cibw-before-all: "apt-get install -y protobuf-compiler && git clone https://github.com/dmlc/dlpack -b v0.5 /opt/dlpack && cp -r /opt/dlpack/include/dlpack /usr/include/dlpack && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cuda11 libcudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" - skbuild-configure-options: "-DFIND_CUDF_CPP=ON" - gpu-smoketest-before: "pip install requests && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" - gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" - secrets: inherit - cudf-wheel-arm64-39: - needs: libcudf-wheel-arm64 - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-arm64.yml@feat/wheel-ci-actions - with: - package-name: cudf - package-dir: python/cudf - python-version: "3.9" - cibw-environment: "PYTHON_PACKAGE_CUDA_SUFFIX='-cuda11'" - cibw-before-all: "apt-get install -y protobuf-compiler && git clone https://github.com/dmlc/dlpack -b v0.5 /opt/dlpack && cp -r /opt/dlpack/include/dlpack /usr/include/dlpack && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cuda11 libcudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" - skbuild-configure-options: "-DFIND_CUDF_CPP=ON" - gpu-smoketest-before: "pip install requests && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" - gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" - secrets: inherit + #dask_cudf-wheel: + # needs: cudf-wheel-amd64-38 + # uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions + # with: + # package-name: dask_cudf + # package-dir: python/dask_cudf + # gpu-smoketest-before: "pip install rmm-cuda11 cudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + # gpu-smoketest: "import dask_cudf; print(dask_cudf)" + # secrets: inherit + #cudf-wheel-amd64-39: + # needs: libcudf-wheel-amd64 + # uses: rapidsai/shared-action-workflows/.github/workflows/wheels-amd64.yml@feat/wheel-ci-actions + # with: + # package-name: cudf + # package-dir: python/cudf + # python-version: "3.9" + # cibw-environment: "PYTHON_PACKAGE_CUDA_SUFFIX='-cuda11'" + # cibw-before-all: "apt-get install -y protobuf-compiler && git clone https://github.com/dmlc/dlpack -b v0.5 /opt/dlpack && cp -r /opt/dlpack/include/dlpack /usr/include/dlpack && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + # cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cuda11 libcudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + # skbuild-configure-options: "-DFIND_CUDF_CPP=ON" + # gpu-smoketest-before: "pip install requests && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + # gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" + # secrets: inherit + #libcudf-wheel-arm64: + # uses: rapidsai/shared-action-workflows/.github/workflows/wheels-arm64.yml@feat/wheel-ci-actions + # with: + # package-name: libcudf_cuda11 + # package-dir: cpp + # python-version: "3.8" + # cibw-environment: "PYTHON_PACKAGE_CUDA_SUFFIX='-cuda11'" + # cibw-before-all: "apt-get install -y libzstd-dev liblz4-dev libboost-regex-dev libboost-system-dev libboost-filesystem-dev libsnappy-dev libprotobuf-dev libprotoc-dev protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + # cibw-before-build: "pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + # skbuild-configure-options: '-DBUILD_TESTS=OFF -DBUILD_BENCHMARKS=OFF -DDISABLE_DEPRECATION_WARNING=ON -DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_ORC=ON -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON' + # gpu-smoketest: "" + # auditwheel-repair-override: "cp {wheel} {dest_dir}" + # secrets: inherit + #cudf-wheel-arm64-38: + # needs: libcudf-wheel-arm64 + # uses: rapidsai/shared-action-workflows/.github/workflows/wheels-arm64.yml@feat/wheel-ci-actions + # with: + # package-name: cudf + # package-dir: python/cudf + # python-version: "3.8" + # cibw-environment: "PYTHON_PACKAGE_CUDA_SUFFIX='-cuda11'" + # cibw-before-all: "apt-get install -y protobuf-compiler && git clone https://github.com/dmlc/dlpack -b v0.5 /opt/dlpack && cp -r /opt/dlpack/include/dlpack /usr/include/dlpack && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + # cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cuda11 libcudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + # skbuild-configure-options: "-DFIND_CUDF_CPP=ON" + # gpu-smoketest-before: "pip install requests && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" + # gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" + # secrets: inherit + #cudf-wheel-arm64-39: + # needs: libcudf-wheel-arm64 + # uses: rapidsai/shared-action-workflows/.github/workflows/wheels-arm64.yml@feat/wheel-ci-actions + # with: + # package-name: cudf + # package-dir: python/cudf + # python-version: "3.9" + # cibw-environment: "PYTHON_PACKAGE_CUDA_SUFFIX='-cuda11'" + # cibw-before-all: "apt-get install -y protobuf-compiler && git clone https://github.com/dmlc/dlpack -b v0.5 /opt/dlpack && cp -r /opt/dlpack/include/dlpack /usr/include/dlpack && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + # cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cuda11 libcudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + # skbuild-configure-options: "-DFIND_CUDF_CPP=ON" + # gpu-smoketest-before: "pip install requests && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" + # gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" + # secrets: inherit From abb25ec2a327b3a5c1078bb9efff5c2be1f035dd Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Mon, 29 Aug 2022 10:30:56 -0400 Subject: [PATCH 006/142] DRY wheel workflow with unit tests --- .github/workflows/wheels.yml | 103 ++++++++--------------------------- 1 file changed, 24 insertions(+), 79 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index ccc430345b5..92754b6c613 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -6,97 +6,42 @@ on: - 'pull-request/[0-9]+' jobs: - libcudf-wheel-amd64: - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-amd64.yml@feat/wheel-ci-actions + libcudf-wheel: + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions with: package-name: libcudf_cuda11 package-dir: cpp python-version: "3.8" - cibw-environment: "PYTHON_PACKAGE_CUDA_SUFFIX='-cuda11'" cibw-before-all: "apt-get install -y libzstd-dev liblz4-dev libboost-regex-dev libboost-system-dev libboost-filesystem-dev libsnappy-dev libprotobuf-dev libprotoc-dev protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" cibw-before-build: "pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" - skbuild-configure-options: '-DBUILD_TESTS=OFF -DBUILD_BENCHMARKS=OFF -DDISABLE_DEPRECATION_WARNING=ON -DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_ORC=ON -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON' - gpu-smoketest: "" - auditwheel-repair-override: "cp {wheel} {dest_dir}" + skbuild-configure-options: '-DBUILD_TESTS=ON -DBUILD_BENCHMARKS=OFF -DDISABLE_DEPRECATION_WARNING=ON -DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_ORC=ON -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON' + auditwheel-skip-repair: "true" + cibw-test-command: "find . -name gtests && false" secrets: inherit - cudf-wheel-amd64-38: - needs: libcudf-wheel-amd64 - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-amd64.yml@feat/wheel-ci-actions + cudf-wheel: + needs: libcudf-wheel + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions with: package-name: cudf package-dir: python/cudf - python-version: "3.8" - cibw-environment: "PYTHON_PACKAGE_CUDA_SUFFIX='-cuda11'" + python-version: "3.8 3.9" cibw-before-all: "apt-get install -y protobuf-compiler && git clone https://github.com/dmlc/dlpack -b v0.5 /opt/dlpack && cp -r /opt/dlpack/include/dlpack /usr/include/dlpack && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cuda11 libcudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" - cibw-test-requires: "cupy-cuda115" - cibw-test-extras: "test" - cibw-test-command: "pytest -v {package}/cudf/tests" skbuild-configure-options: "-DFIND_CUDF_CPP=ON" - gpu-smoketest-before: "pip install requests && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + cibw-test-extras: "test" + gpu-smoketest-before-amd64: "pip install requests && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + gpu-smoketest-before-arm64: "pip install requests && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" + cibw-test-command: "pytest -v ./python/cudf/cudf/tests" gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" secrets: inherit - #dask_cudf-wheel: - # needs: cudf-wheel-amd64-38 - # uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions - # with: - # package-name: dask_cudf - # package-dir: python/dask_cudf - # gpu-smoketest-before: "pip install rmm-cuda11 cudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" - # gpu-smoketest: "import dask_cudf; print(dask_cudf)" - # secrets: inherit - #cudf-wheel-amd64-39: - # needs: libcudf-wheel-amd64 - # uses: rapidsai/shared-action-workflows/.github/workflows/wheels-amd64.yml@feat/wheel-ci-actions - # with: - # package-name: cudf - # package-dir: python/cudf - # python-version: "3.9" - # cibw-environment: "PYTHON_PACKAGE_CUDA_SUFFIX='-cuda11'" - # cibw-before-all: "apt-get install -y protobuf-compiler && git clone https://github.com/dmlc/dlpack -b v0.5 /opt/dlpack && cp -r /opt/dlpack/include/dlpack /usr/include/dlpack && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - # cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cuda11 libcudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" - # skbuild-configure-options: "-DFIND_CUDF_CPP=ON" - # gpu-smoketest-before: "pip install requests && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" - # gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" - # secrets: inherit - #libcudf-wheel-arm64: - # uses: rapidsai/shared-action-workflows/.github/workflows/wheels-arm64.yml@feat/wheel-ci-actions - # with: - # package-name: libcudf_cuda11 - # package-dir: cpp - # python-version: "3.8" - # cibw-environment: "PYTHON_PACKAGE_CUDA_SUFFIX='-cuda11'" - # cibw-before-all: "apt-get install -y libzstd-dev liblz4-dev libboost-regex-dev libboost-system-dev libboost-filesystem-dev libsnappy-dev libprotobuf-dev libprotoc-dev protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - # cibw-before-build: "pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" - # skbuild-configure-options: '-DBUILD_TESTS=OFF -DBUILD_BENCHMARKS=OFF -DDISABLE_DEPRECATION_WARNING=ON -DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_ORC=ON -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON' - # gpu-smoketest: "" - # auditwheel-repair-override: "cp {wheel} {dest_dir}" - # secrets: inherit - #cudf-wheel-arm64-38: - # needs: libcudf-wheel-arm64 - # uses: rapidsai/shared-action-workflows/.github/workflows/wheels-arm64.yml@feat/wheel-ci-actions - # with: - # package-name: cudf - # package-dir: python/cudf - # python-version: "3.8" - # cibw-environment: "PYTHON_PACKAGE_CUDA_SUFFIX='-cuda11'" - # cibw-before-all: "apt-get install -y protobuf-compiler && git clone https://github.com/dmlc/dlpack -b v0.5 /opt/dlpack && cp -r /opt/dlpack/include/dlpack /usr/include/dlpack && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - # cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cuda11 libcudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" - # skbuild-configure-options: "-DFIND_CUDF_CPP=ON" - # gpu-smoketest-before: "pip install requests && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" - # gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" - # secrets: inherit - #cudf-wheel-arm64-39: - # needs: libcudf-wheel-arm64 - # uses: rapidsai/shared-action-workflows/.github/workflows/wheels-arm64.yml@feat/wheel-ci-actions - # with: - # package-name: cudf - # package-dir: python/cudf - # python-version: "3.9" - # cibw-environment: "PYTHON_PACKAGE_CUDA_SUFFIX='-cuda11'" - # cibw-before-all: "apt-get install -y protobuf-compiler && git clone https://github.com/dmlc/dlpack -b v0.5 /opt/dlpack && cp -r /opt/dlpack/include/dlpack /usr/include/dlpack && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - # cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cuda11 libcudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" - # skbuild-configure-options: "-DFIND_CUDF_CPP=ON" - # gpu-smoketest-before: "pip install requests && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" - # gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" - # secrets: inherit + dask_cudf-wheel: + needs: cudf-wheel + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions + with: + package-name: dask_cudf + package-dir: python/dask_cudf + gpu-smoketest-before: "pip install rmm-cuda11 cudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + cibw-test-extras: "test" + cibw-test-command: "pytest -v ./python/dask_cudf/dask_cudf/tests" + gpu-smoketest: "import dask_cudf; print(dask_cudf)" + secrets: inherit From 41ff0af3ab1baa0963e8b4d94d700ebc43f8f4fb Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Mon, 29 Aug 2022 12:51:41 -0400 Subject: [PATCH 007/142] Add wheel pattern override --- .github/workflows/wheels.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 92754b6c613..d494c71fd4b 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -16,6 +16,7 @@ jobs: cibw-before-build: "pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" skbuild-configure-options: '-DBUILD_TESTS=ON -DBUILD_BENCHMARKS=OFF -DDISABLE_DEPRECATION_WARNING=ON -DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_ORC=ON -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON' auditwheel-skip-repair: "true" + wheel-pattern-override: "py3-none-linux" cibw-test-command: "find . -name gtests && false" secrets: inherit cudf-wheel: From 4a58cc87ec63ff0b0087ad3f3315fefcf5d4d6aa Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Mon, 29 Aug 2022 13:50:35 -0400 Subject: [PATCH 008/142] Run CPP unit tests with _venv_placeholder --- .github/workflows/wheels.yml | 2 +- cpp/tests/CMakeLists.txt | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index d494c71fd4b..cf4b938bb65 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -17,7 +17,7 @@ jobs: skbuild-configure-options: '-DBUILD_TESTS=ON -DBUILD_BENCHMARKS=OFF -DDISABLE_DEPRECATION_WARNING=ON -DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_ORC=ON -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON' auditwheel-skip-repair: "true" wheel-pattern-override: "py3-none-linux" - cibw-test-command: "find . -name gtests && false" + cibw-test-command: "_venv_placeholder/bin/gtests/libcudf/*" secrets: inherit cudf-wheel: needs: libcudf-wheel diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index e630e842f4e..fa128c1f920 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -36,12 +36,19 @@ function(ConfigureTest CMAKE_TEST_NAME) $ ) add_test(NAME ${CMAKE_TEST_NAME} COMMAND ${CMAKE_TEST_NAME}) - install( - TARGETS ${CMAKE_TEST_NAME} - COMPONENT testing - DESTINATION bin/gtests/libcudf - EXCLUDE_FROM_ALL - ) + if(SKBUILD) + install( + TARGETS ${CMAKE_TEST_NAME} + COMPONENT testing + DESTINATION bin/gtests/libcudf) + else() + install( + TARGETS ${CMAKE_TEST_NAME} + COMPONENT testing + DESTINATION bin/gtests/libcudf + EXCLUDE_FROM_ALL + ) + endif() endfunction() # ################################################################################################## From 75657d47abb2dd2303755d39e00165d09252d309 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Mon, 29 Aug 2022 14:15:15 -0400 Subject: [PATCH 009/142] Install libprotobuf-dev before unit test --- .github/workflows/wheels.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index cf4b938bb65..56ef49dcf78 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -16,6 +16,8 @@ jobs: cibw-before-build: "pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" skbuild-configure-options: '-DBUILD_TESTS=ON -DBUILD_BENCHMARKS=OFF -DDISABLE_DEPRECATION_WARNING=ON -DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_ORC=ON -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON' auditwheel-skip-repair: "true" + gpu-smoketest-before-amd64: "apt-get install -y libprotobuf-dev" + gpu-smoketest-before-arm64: "apt-get install -y libprotobuf-dev" wheel-pattern-override: "py3-none-linux" cibw-test-command: "_venv_placeholder/bin/gtests/libcudf/*" secrets: inherit From 744c92b8de82963ddabe4b6b8a325e7f29badab5 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Mon, 29 Aug 2022 16:08:43 -0400 Subject: [PATCH 010/142] Debug stub issue --- .github/workflows/wheels.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 56ef49dcf78..e5a8c653baa 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -16,8 +16,8 @@ jobs: cibw-before-build: "pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" skbuild-configure-options: '-DBUILD_TESTS=ON -DBUILD_BENCHMARKS=OFF -DDISABLE_DEPRECATION_WARNING=ON -DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_ORC=ON -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON' auditwheel-skip-repair: "true" - gpu-smoketest-before-amd64: "apt-get install -y libprotobuf-dev" - gpu-smoketest-before-arm64: "apt-get install -y libprotobuf-dev" + gpu-smoketest-before-amd64: "apt-get install -y libprotobuf-dev && nvidia-smi" + gpu-smoketest-before-arm64: "apt-get install -y libprotobuf-dev && nvidia-smi" wheel-pattern-override: "py3-none-linux" cibw-test-command: "_venv_placeholder/bin/gtests/libcudf/*" secrets: inherit From da7403c6bdb9eac7ef90ee3016d1b62c884eeb01 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Tue, 30 Aug 2022 14:37:57 -0400 Subject: [PATCH 011/142] Install tokenizers==0.10.2 to avoid Rust PEP517 compile --- .github/workflows/wheels.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index e5a8c653baa..827150e1b35 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -16,8 +16,8 @@ jobs: cibw-before-build: "pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" skbuild-configure-options: '-DBUILD_TESTS=ON -DBUILD_BENCHMARKS=OFF -DDISABLE_DEPRECATION_WARNING=ON -DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_ORC=ON -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON' auditwheel-skip-repair: "true" - gpu-smoketest-before-amd64: "apt-get install -y libprotobuf-dev && nvidia-smi" - gpu-smoketest-before-arm64: "apt-get install -y libprotobuf-dev && nvidia-smi" + gpu-smoketest-before-amd64: "apt-get install -y libprotobuf-dev" + gpu-smoketest-before-arm64: "apt-get install -y libprotobuf-dev" wheel-pattern-override: "py3-none-linux" cibw-test-command: "_venv_placeholder/bin/gtests/libcudf/*" secrets: inherit @@ -32,8 +32,8 @@ jobs: cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cuda11 libcudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" skbuild-configure-options: "-DFIND_CUDF_CPP=ON" cibw-test-extras: "test" - gpu-smoketest-before-amd64: "pip install requests && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" - gpu-smoketest-before-arm64: "pip install requests && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" + gpu-smoketest-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + gpu-smoketest-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" cibw-test-command: "pytest -v ./python/cudf/cudf/tests" gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" secrets: inherit From 4323999c414289e4e554f9dc6d1b7605a31452ad Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Tue, 30 Aug 2022 15:26:53 -0400 Subject: [PATCH 012/142] Retrigger build --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 827150e1b35..83718e50bc7 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -14,7 +14,7 @@ jobs: python-version: "3.8" cibw-before-all: "apt-get install -y libzstd-dev liblz4-dev libboost-regex-dev libboost-system-dev libboost-filesystem-dev libsnappy-dev libprotobuf-dev libprotoc-dev protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" cibw-before-build: "pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" - skbuild-configure-options: '-DBUILD_TESTS=ON -DBUILD_BENCHMARKS=OFF -DDISABLE_DEPRECATION_WARNING=ON -DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_ORC=ON -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON' + skbuild-configure-options: '-DBUILD_TESTS=ON -DBUILD_BENCHMARKS=OFF -DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_ORC=ON -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON' auditwheel-skip-repair: "true" gpu-smoketest-before-amd64: "apt-get install -y libprotobuf-dev" gpu-smoketest-before-arm64: "apt-get install -y libprotobuf-dev" From 738768dda1f39b65a3ae1dc878c52bd5b1f8eb87 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Wed, 31 Aug 2022 15:09:37 -0400 Subject: [PATCH 013/142] Build dask_cudf --- .github/workflows/wheels.yml | 64 ++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 83718e50bc7..26d41b697c3 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -6,39 +6,39 @@ on: - 'pull-request/[0-9]+' jobs: - libcudf-wheel: - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions - with: - package-name: libcudf_cuda11 - package-dir: cpp - python-version: "3.8" - cibw-before-all: "apt-get install -y libzstd-dev liblz4-dev libboost-regex-dev libboost-system-dev libboost-filesystem-dev libsnappy-dev libprotobuf-dev libprotoc-dev protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - cibw-before-build: "pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" - skbuild-configure-options: '-DBUILD_TESTS=ON -DBUILD_BENCHMARKS=OFF -DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_ORC=ON -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON' - auditwheel-skip-repair: "true" - gpu-smoketest-before-amd64: "apt-get install -y libprotobuf-dev" - gpu-smoketest-before-arm64: "apt-get install -y libprotobuf-dev" - wheel-pattern-override: "py3-none-linux" - cibw-test-command: "_venv_placeholder/bin/gtests/libcudf/*" - secrets: inherit - cudf-wheel: - needs: libcudf-wheel - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions - with: - package-name: cudf - package-dir: python/cudf - python-version: "3.8 3.9" - cibw-before-all: "apt-get install -y protobuf-compiler && git clone https://github.com/dmlc/dlpack -b v0.5 /opt/dlpack && cp -r /opt/dlpack/include/dlpack /usr/include/dlpack && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cuda11 libcudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" - skbuild-configure-options: "-DFIND_CUDF_CPP=ON" - cibw-test-extras: "test" - gpu-smoketest-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" - gpu-smoketest-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" - cibw-test-command: "pytest -v ./python/cudf/cudf/tests" - gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" - secrets: inherit + #libcudf-wheel: + # uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions + # with: + # package-name: libcudf_cuda11 + # package-dir: cpp + # python-version: "3.8" + # cibw-before-all: "apt-get install -y libzstd-dev liblz4-dev libboost-regex-dev libboost-system-dev libboost-filesystem-dev libsnappy-dev libprotobuf-dev libprotoc-dev protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + # cibw-before-build: "pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + # skbuild-configure-options: '-DBUILD_TESTS=ON -DBUILD_BENCHMARKS=OFF -DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_ORC=ON -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON' + # auditwheel-skip-repair: "true" + # gpu-smoketest-before-amd64: "apt-get install -y libprotobuf-dev" + # gpu-smoketest-before-arm64: "apt-get install -y libprotobuf-dev" + # wheel-pattern-override: "py3-none-linux" + # cibw-test-command: "_venv_placeholder/bin/gtests/libcudf/*" + # secrets: inherit + #cudf-wheel: + # needs: libcudf-wheel + # uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions + # with: + # package-name: cudf + # package-dir: python/cudf + # python-version: "3.8 3.9" + # cibw-before-all: "apt-get install -y protobuf-compiler && git clone https://github.com/dmlc/dlpack -b v0.5 /opt/dlpack && cp -r /opt/dlpack/include/dlpack /usr/include/dlpack && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + # cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cuda11 libcudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + # skbuild-configure-options: "-DFIND_CUDF_CPP=ON" + # cibw-test-extras: "test" + # gpu-smoketest-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + # gpu-smoketest-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" + # cibw-test-command: "pytest -v ./python/cudf/cudf/tests" + # gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" + # secrets: inherit dask_cudf-wheel: - needs: cudf-wheel + #needs: cudf-wheel uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions with: package-name: dask_cudf From 2f5b83ee4c54e55e15b8b586de8915c91e085ee5 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Wed, 31 Aug 2022 15:31:54 -0400 Subject: [PATCH 014/142] Rebuild all --- .github/workflows/wheels.yml | 64 ++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 26d41b697c3..83718e50bc7 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -6,39 +6,39 @@ on: - 'pull-request/[0-9]+' jobs: - #libcudf-wheel: - # uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions - # with: - # package-name: libcudf_cuda11 - # package-dir: cpp - # python-version: "3.8" - # cibw-before-all: "apt-get install -y libzstd-dev liblz4-dev libboost-regex-dev libboost-system-dev libboost-filesystem-dev libsnappy-dev libprotobuf-dev libprotoc-dev protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - # cibw-before-build: "pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" - # skbuild-configure-options: '-DBUILD_TESTS=ON -DBUILD_BENCHMARKS=OFF -DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_ORC=ON -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON' - # auditwheel-skip-repair: "true" - # gpu-smoketest-before-amd64: "apt-get install -y libprotobuf-dev" - # gpu-smoketest-before-arm64: "apt-get install -y libprotobuf-dev" - # wheel-pattern-override: "py3-none-linux" - # cibw-test-command: "_venv_placeholder/bin/gtests/libcudf/*" - # secrets: inherit - #cudf-wheel: - # needs: libcudf-wheel - # uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions - # with: - # package-name: cudf - # package-dir: python/cudf - # python-version: "3.8 3.9" - # cibw-before-all: "apt-get install -y protobuf-compiler && git clone https://github.com/dmlc/dlpack -b v0.5 /opt/dlpack && cp -r /opt/dlpack/include/dlpack /usr/include/dlpack && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - # cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cuda11 libcudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" - # skbuild-configure-options: "-DFIND_CUDF_CPP=ON" - # cibw-test-extras: "test" - # gpu-smoketest-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" - # gpu-smoketest-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" - # cibw-test-command: "pytest -v ./python/cudf/cudf/tests" - # gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" - # secrets: inherit + libcudf-wheel: + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions + with: + package-name: libcudf_cuda11 + package-dir: cpp + python-version: "3.8" + cibw-before-all: "apt-get install -y libzstd-dev liblz4-dev libboost-regex-dev libboost-system-dev libboost-filesystem-dev libsnappy-dev libprotobuf-dev libprotoc-dev protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + cibw-before-build: "pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + skbuild-configure-options: '-DBUILD_TESTS=ON -DBUILD_BENCHMARKS=OFF -DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_ORC=ON -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON' + auditwheel-skip-repair: "true" + gpu-smoketest-before-amd64: "apt-get install -y libprotobuf-dev" + gpu-smoketest-before-arm64: "apt-get install -y libprotobuf-dev" + wheel-pattern-override: "py3-none-linux" + cibw-test-command: "_venv_placeholder/bin/gtests/libcudf/*" + secrets: inherit + cudf-wheel: + needs: libcudf-wheel + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions + with: + package-name: cudf + package-dir: python/cudf + python-version: "3.8 3.9" + cibw-before-all: "apt-get install -y protobuf-compiler && git clone https://github.com/dmlc/dlpack -b v0.5 /opt/dlpack && cp -r /opt/dlpack/include/dlpack /usr/include/dlpack && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cuda11 libcudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + skbuild-configure-options: "-DFIND_CUDF_CPP=ON" + cibw-test-extras: "test" + gpu-smoketest-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + gpu-smoketest-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" + cibw-test-command: "pytest -v ./python/cudf/cudf/tests" + gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" + secrets: inherit dask_cudf-wheel: - #needs: cudf-wheel + needs: cudf-wheel uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions with: package-name: dask_cudf From 72d793b47fabe6a598d1d897f87f5870d36d2d6b Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Thu, 1 Sep 2022 15:09:31 -0400 Subject: [PATCH 015/142] Switch from -cuda11 to -cu11 suffix --- .github/workflows/wheels.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 83718e50bc7..411492250fe 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -9,11 +9,11 @@ jobs: libcudf-wheel: uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions with: - package-name: libcudf_cuda11 + package-name: libcudf_cu11 package-dir: cpp python-version: "3.8" cibw-before-all: "apt-get install -y libzstd-dev liblz4-dev libboost-regex-dev libboost-system-dev libboost-filesystem-dev libsnappy-dev libprotobuf-dev libprotoc-dev protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - cibw-before-build: "pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + cibw-before-build: "pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" skbuild-configure-options: '-DBUILD_TESTS=ON -DBUILD_BENCHMARKS=OFF -DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_ORC=ON -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON' auditwheel-skip-repair: "true" gpu-smoketest-before-amd64: "apt-get install -y libprotobuf-dev" @@ -29,11 +29,11 @@ jobs: package-dir: python/cudf python-version: "3.8 3.9" cibw-before-all: "apt-get install -y protobuf-compiler && git clone https://github.com/dmlc/dlpack -b v0.5 /opt/dlpack && cp -r /opt/dlpack/include/dlpack /usr/include/dlpack && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cuda11 libcudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cu11 libcudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" skbuild-configure-options: "-DFIND_CUDF_CPP=ON" cibw-test-extras: "test" - gpu-smoketest-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" - gpu-smoketest-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cuda11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" + gpu-smoketest-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" + gpu-smoketest-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" cibw-test-command: "pytest -v ./python/cudf/cudf/tests" gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" secrets: inherit @@ -43,7 +43,7 @@ jobs: with: package-name: dask_cudf package-dir: python/dask_cudf - gpu-smoketest-before: "pip install rmm-cuda11 cudf-cuda11 --index-url https://pypi.k8s.rapids.ai/simple" + gpu-smoketest-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" cibw-test-extras: "test" cibw-test-command: "pytest -v ./python/dask_cudf/dask_cudf/tests" gpu-smoketest: "import dask_cudf; print(dask_cudf)" From db601d38a656e1bd69e45c1d0d270e7093d1da49 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Thu, 1 Sep 2022 15:20:50 -0400 Subject: [PATCH 016/142] Run all gtests for libcudf --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 411492250fe..e3b66eaf717 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -19,7 +19,7 @@ jobs: gpu-smoketest-before-amd64: "apt-get install -y libprotobuf-dev" gpu-smoketest-before-arm64: "apt-get install -y libprotobuf-dev" wheel-pattern-override: "py3-none-linux" - cibw-test-command: "_venv_placeholder/bin/gtests/libcudf/*" + cibw-test-command: "for gt in _venv_placeholder/bin/gtests/libcudf*/*; do ${gt}; done" secrets: inherit cudf-wheel: needs: libcudf-wheel From ea2a171ba84f4128b81a47514cbfca204ca28c6d Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Wed, 7 Sep 2022 11:51:49 -0400 Subject: [PATCH 017/142] Change libcudf package-name --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index e3b66eaf717..31c55ae8f5c 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -9,7 +9,7 @@ jobs: libcudf-wheel: uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions with: - package-name: libcudf_cu11 + package-name: libcudf package-dir: cpp python-version: "3.8" cibw-before-all: "apt-get install -y libzstd-dev liblz4-dev libboost-regex-dev libboost-system-dev libboost-filesystem-dev libsnappy-dev libprotobuf-dev libprotoc-dev protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" From c66c85d5da86b8af38f445d964424a51bb7948c4 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Wed, 7 Sep 2022 15:02:26 -0400 Subject: [PATCH 018/142] Don't auditwheel-repair cudf, see what happens --- .github/workflows/wheels.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 31c55ae8f5c..2fa6c4c4064 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -35,6 +35,7 @@ jobs: gpu-smoketest-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" gpu-smoketest-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" cibw-test-command: "pytest -v ./python/cudf/cudf/tests" + auditwheel-skip-repair: "true" gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" secrets: inherit dask_cudf-wheel: From 6a65eb563725558439a99bb680325c49dc24a58a Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Wed, 7 Sep 2022 15:14:34 -0400 Subject: [PATCH 019/142] Don't waste time rebuilding libcudf --- .github/workflows/wheels.yml | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 2fa6c4c4064..c0081d3dac5 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -6,23 +6,23 @@ on: - 'pull-request/[0-9]+' jobs: - libcudf-wheel: - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions - with: - package-name: libcudf - package-dir: cpp - python-version: "3.8" - cibw-before-all: "apt-get install -y libzstd-dev liblz4-dev libboost-regex-dev libboost-system-dev libboost-filesystem-dev libsnappy-dev libprotobuf-dev libprotoc-dev protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - cibw-before-build: "pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" - skbuild-configure-options: '-DBUILD_TESTS=ON -DBUILD_BENCHMARKS=OFF -DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_ORC=ON -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON' - auditwheel-skip-repair: "true" - gpu-smoketest-before-amd64: "apt-get install -y libprotobuf-dev" - gpu-smoketest-before-arm64: "apt-get install -y libprotobuf-dev" - wheel-pattern-override: "py3-none-linux" - cibw-test-command: "for gt in _venv_placeholder/bin/gtests/libcudf*/*; do ${gt}; done" - secrets: inherit + #libcudf-wheel: + # uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions + # with: + # package-name: libcudf + # package-dir: cpp + # python-version: "3.8" + # cibw-before-all: "apt-get install -y libzstd-dev liblz4-dev libboost-regex-dev libboost-system-dev libboost-filesystem-dev libsnappy-dev libprotobuf-dev libprotoc-dev protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + # cibw-before-build: "pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" + # skbuild-configure-options: '-DBUILD_TESTS=ON -DBUILD_BENCHMARKS=OFF -DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_ORC=ON -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON' + # auditwheel-skip-repair: "true" + # gpu-smoketest-before-amd64: "apt-get install -y libprotobuf-dev" + # gpu-smoketest-before-arm64: "apt-get install -y libprotobuf-dev" + # wheel-pattern-override: "py3-none-linux" + # cibw-test-command: "for gt in _venv_placeholder/bin/gtests/libcudf*/*; do ${gt}; done" + # secrets: inherit cudf-wheel: - needs: libcudf-wheel + #needs: libcudf-wheel uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions with: package-name: cudf From bf9a7543870c133bdddfe05232428bbb9be26f9b Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Wed, 7 Sep 2022 16:25:51 -0400 Subject: [PATCH 020/142] Uncomment libcudf --- .github/workflows/wheels.yml | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index c0081d3dac5..2fa6c4c4064 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -6,23 +6,23 @@ on: - 'pull-request/[0-9]+' jobs: - #libcudf-wheel: - # uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions - # with: - # package-name: libcudf - # package-dir: cpp - # python-version: "3.8" - # cibw-before-all: "apt-get install -y libzstd-dev liblz4-dev libboost-regex-dev libboost-system-dev libboost-filesystem-dev libsnappy-dev libprotobuf-dev libprotoc-dev protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - # cibw-before-build: "pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" - # skbuild-configure-options: '-DBUILD_TESTS=ON -DBUILD_BENCHMARKS=OFF -DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_ORC=ON -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON' - # auditwheel-skip-repair: "true" - # gpu-smoketest-before-amd64: "apt-get install -y libprotobuf-dev" - # gpu-smoketest-before-arm64: "apt-get install -y libprotobuf-dev" - # wheel-pattern-override: "py3-none-linux" - # cibw-test-command: "for gt in _venv_placeholder/bin/gtests/libcudf*/*; do ${gt}; done" - # secrets: inherit + libcudf-wheel: + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions + with: + package-name: libcudf + package-dir: cpp + python-version: "3.8" + cibw-before-all: "apt-get install -y libzstd-dev liblz4-dev libboost-regex-dev libboost-system-dev libboost-filesystem-dev libsnappy-dev libprotobuf-dev libprotoc-dev protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + cibw-before-build: "pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" + skbuild-configure-options: '-DBUILD_TESTS=ON -DBUILD_BENCHMARKS=OFF -DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_ORC=ON -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON' + auditwheel-skip-repair: "true" + gpu-smoketest-before-amd64: "apt-get install -y libprotobuf-dev" + gpu-smoketest-before-arm64: "apt-get install -y libprotobuf-dev" + wheel-pattern-override: "py3-none-linux" + cibw-test-command: "for gt in _venv_placeholder/bin/gtests/libcudf*/*; do ${gt}; done" + secrets: inherit cudf-wheel: - #needs: libcudf-wheel + needs: libcudf-wheel uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions with: package-name: cudf From 60e4517b8a1c9f0013e2ca00480a5e61a348cf96 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Wed, 7 Sep 2022 17:58:14 -0400 Subject: [PATCH 021/142] Don't need to unpack python static libs if avoiding building pyarrow from source --- .github/workflows/wheels.yml | 3 +-- cpp/cmake/thirdparty/get_arrow.cmake | 5 ----- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 2fa6c4c4064..caa92c6867f 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -12,7 +12,7 @@ jobs: package-name: libcudf package-dir: cpp python-version: "3.8" - cibw-before-all: "apt-get install -y libzstd-dev liblz4-dev libboost-regex-dev libboost-system-dev libboost-filesystem-dev libsnappy-dev libprotobuf-dev libprotoc-dev protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + cibw-before-all: "apt-get install -y libzstd-dev liblz4-dev libboost-regex-dev libboost-system-dev libboost-filesystem-dev libsnappy-dev libprotobuf-dev libprotoc-dev protobuf-compiler" cibw-before-build: "pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" skbuild-configure-options: '-DBUILD_TESTS=ON -DBUILD_BENCHMARKS=OFF -DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_ORC=ON -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON' auditwheel-skip-repair: "true" @@ -35,7 +35,6 @@ jobs: gpu-smoketest-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" gpu-smoketest-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" cibw-test-command: "pytest -v ./python/cudf/cudf/tests" - auditwheel-skip-repair: "true" gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" secrets: inherit dask_cudf-wheel: diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index 3a008db45d7..9fa5b9d1658 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -312,11 +312,6 @@ if(NOT DEFINED CUDF_VERSION_Arrow) ) endif() -if(SKBUILD) - message(VERBOSE "calling find_package(Python3) with correct arguments to help arrow find it later on") - find_package(Python3 COMPONENTS Interpreter Development NumPy REQUIRED) -endif() - find_and_configure_arrow( ${CUDF_VERSION_Arrow} ${CUDF_USE_ARROW_STATIC} ${CUDF_ENABLE_ARROW_S3} ${CUDF_ENABLE_ARROW_ORC} ${CUDF_ENABLE_ARROW_PYTHON} ${CUDF_ENABLE_ARROW_PARQUET} From 00a17510000e14c490f79865b47260a7daecc085 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Mon, 12 Sep 2022 16:51:54 -0700 Subject: [PATCH 022/142] WIP --- cpp/CMakeLists.txt | 142 +- cpp/cmake/thirdparty/get_arrow.cmake | 65 +- cpp/include/cudf/detail/interop.hpp | 2 +- cpp/libcudf/__init__.py | 0 cpp/libcudf/_version.py | 567 ----- cpp/pyproject.toml | 24 - cpp/setup.cfg | 15 - cpp/setup.py | 64 - cpp/src/interop/to_arrow.cu | 6 +- cpp/versioneer.py | 1904 ----------------- python/cudf/CMakeLists.txt | 61 +- python/cudf/cmake/Modules/WheelHelpers.cmake | 74 + python/cudf/cudf/_lib/CMakeLists.txt | 5 +- python/cudf/cudf/_lib/interop.pyx | 16 +- python/cudf/cudf/_lib/io/CMakeLists.txt | 4 +- python/cudf/cudf/_lib/nvtext/CMakeLists.txt | 4 +- python/cudf/cudf/_lib/strings/CMakeLists.txt | 4 +- .../cudf/_lib/strings/convert/CMakeLists.txt | 4 +- .../cudf/_lib/strings/split/CMakeLists.txt | 4 +- python/cudf/pyproject.toml | 1 + python/cudf/setup.py | 25 +- python/dask_cudf/setup.py | 2 +- 22 files changed, 296 insertions(+), 2697 deletions(-) delete mode 100644 cpp/libcudf/__init__.py delete mode 100644 cpp/libcudf/_version.py delete mode 100644 cpp/pyproject.toml delete mode 100644 cpp/setup.cfg delete mode 100644 cpp/setup.py delete mode 100644 cpp/versioneer.py create mode 100644 python/cudf/cmake/Modules/WheelHelpers.cmake diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index c84589af345..d2e7372cc97 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -48,6 +48,7 @@ option(BUILD_TESTS "Configure CMake to build tests" ON) option(BUILD_BENCHMARKS "Configure CMake to build (google & nvbench) benchmarks" OFF) option(BUILD_SHARED_LIBS "Build cuDF shared libraries" ON) option(JITIFY_USE_CACHE "Use a file cache for JIT compiled kernels" ON) +option(CUDF_BUILD_WHEELS "Whether we're building libcudf to go in a wheel for pypi" OFF) option(CUDF_USE_PROPRIETARY_NVCOMP "Download and use NVCOMP with proprietary extensions" ON) option(CUDF_USE_ARROW_STATIC "Build and statically link Arrow libraries" OFF) option(CUDF_ENABLE_ARROW_ORC "Build the Arrow ORC adapter" OFF) @@ -136,12 +137,14 @@ set(CUDA_SANITIZER_COMMAND_OPTIONS "--tool memcheck") # find zlib rapids_find_package(ZLIB REQUIRED) -# find Threads (needed by cudftestutil) -rapids_find_package( - Threads REQUIRED - BUILD_EXPORT_SET cudf-exports - INSTALL_EXPORT_SET cudf-exports -) +if(NOT CUDF_BUILD_WHEELS) + # find Threads (needed by cudftestutil) + rapids_find_package( + Threads REQUIRED + BUILD_EXPORT_SET cudf-exports + INSTALL_EXPORT_SET cudf-exports + ) +endif() # add third party dependencies using CPM rapids_cpm_init() @@ -163,7 +166,9 @@ rapids_cpm_libcudacxx(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-expo # find cuCollections Should come after including thrust and libcudacxx include(cmake/thirdparty/get_cucollections.cmake) # find or install GoogleTest -include(cmake/thirdparty/get_gtest.cmake) +if(NOT CUDF_BUILD_WHEELS) + include(cmake/thirdparty/get_gtest.cmake) +endif() # preprocess jitify-able kernels include(cmake/Modules/JitifyPreprocessKernels.cmake) # find cuFile @@ -685,47 +690,46 @@ add_library(cudf::cudf ALIAS cudf) # ################################################################################################## # * build cudftestutil ---------------------------------------------------------------------------- -add_library( - cudftestutil STATIC - tests/io/metadata_utilities.cpp - tests/quantiles/tdigest_utilities.cu - tests/utilities/base_fixture.cpp - tests/utilities/column_utilities.cu - tests/utilities/table_utilities.cu - tests/strings/utilities.cpp -) +if(NOT CUDF_BUILD_WHEELS) -set_target_properties( - cudftestutil - PROPERTIES BUILD_RPATH "\$ORIGIN" - INSTALL_RPATH "\$ORIGIN" - # set target compile options - CXX_STANDARD 17 - CXX_STANDARD_REQUIRED ON - CUDA_STANDARD 17 - CUDA_STANDARD_REQUIRED ON - POSITION_INDEPENDENT_CODE ON - INTERFACE_POSITION_INDEPENDENT_CODE ON -) + add_library( + cudftestutil STATIC + tests/io/metadata_utilities.cpp + tests/quantiles/tdigest_utilities.cu + tests/utilities/base_fixture.cpp + tests/utilities/column_utilities.cu + tests/utilities/table_utilities.cu + tests/strings/utilities.cpp + ) -target_compile_options( - cudftestutil PUBLIC "$:${CUDF_CXX_FLAGS}>>" - "$:${CUDF_CUDA_FLAGS}>>" -) + set_target_properties( + cudftestutil + PROPERTIES BUILD_RPATH "\$ORIGIN" + INSTALL_RPATH "\$ORIGIN" + # set target compile options + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON + INTERFACE_POSITION_INDEPENDENT_CODE ON + ) -target_link_libraries( - cudftestutil - PUBLIC GTest::gmock GTest::gtest Threads::Threads cudf - PRIVATE $ -) + target_compile_options( + cudftestutil PUBLIC "$:${CUDF_CXX_FLAGS}>>" + "$:${CUDF_CUDA_FLAGS}>>" + ) -target_include_directories( - cudftestutil PUBLIC "$" - "$" -) + target_link_libraries(cudftestutil PUBLIC GTest::gmock GTest::gtest Threads::Threads cudf) -add_library(cudf::cudftestutil ALIAS cudftestutil) + target_include_directories( + cudftestutil PUBLIC "$" + "$" + ) + add_library(cudf::cudftestutil ALIAS cudftestutil) + +endif() # ################################################################################################## # * add tests ------------------------------------------------------------------------------------- @@ -780,24 +784,26 @@ install(DIRECTORY ${CUDF_SOURCE_DIR}/include/cudf ${CUDF_SOURCE_DIR}/include/cud ${CUDF_SOURCE_DIR}/include/nvtext DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} ) -install( - TARGETS cudftestutil - DESTINATION ${lib_dir} - EXPORT cudf-testing-exports -) +if(NOT CUDF_BUILD_WHEELS) + install( + TARGETS cudftestutil + DESTINATION ${lib_dir} + EXPORT cudf-testing-exports + ) -install( - EXPORT cudf-testing-exports - FILE cudf-testing-targets.cmake - NAMESPACE cudf:: - DESTINATION "${lib_dir}/cmake/cudf" -) + install( + EXPORT cudf-testing-exports + FILE cudf-testing-targets.cmake + NAMESPACE cudf:: + DESTINATION "${lib_dir}/cmake/cudf" + ) -include("${rapids-cmake-dir}/export/write_dependencies.cmake") -rapids_export_write_dependencies( - INSTALL cudf-testing-exports - "${PROJECT_BINARY_DIR}/rapids-cmake/cudf/export/cudf-testing-dependencies.cmake" -) + include("${rapids-cmake-dir}/export/write_dependencies.cmake") + rapids_export_write_dependencies( + INSTALL cudf-testing-exports + "${PROJECT_BINARY_DIR}/rapids-cmake/cudf/export/cudf-testing-dependencies.cmake" + ) +endif() set(doc_string [=[ @@ -887,6 +893,7 @@ if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-testing-targets.cmake") endif() ]=] ) + string(APPEND build_code_string "${common_code_string}") rapids_export( @@ -898,15 +905,16 @@ rapids_export( FINAL_CODE_BLOCK build_code_string ) -export( - EXPORT cudf-testing-exports - FILE ${CUDF_BINARY_DIR}/cudf-testing-targets.cmake - NAMESPACE cudf:: -) -rapids_export_write_dependencies( - BUILD cudf-testing-exports "${CUDF_BINARY_DIR}/cudf-testing-dependencies.cmake" -) - +if(NOT CUDF_BUILD_WHEELS) + export( + EXPORT cudf-testing-exports + FILE ${CUDF_BINARY_DIR}/cudf-testing-targets.cmake + NAMESPACE cudf:: + ) + rapids_export_write_dependencies( + BUILD cudf-testing-exports "${CUDF_BINARY_DIR}/cudf-testing-dependencies.cmake" + ) +endif() # ################################################################################################## # * make documentation ---------------------------------------------------------------------------- diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index 9fa5b9d1658..a915a7d59a5 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -20,35 +20,58 @@ # cmake-lint: disable=R0912,R0913,R0915 +include_guard(GLOBAL) + +function(find_libarrow_in_python_wheel VERSION) + function(find_arrow_lib _name _alias _lib) + if(CUDF_PYARROW_WHEEL_DIR) + list(APPEND CMAKE_PREFIX_PATH "${CUDF_PYARROW_WHEEL_DIR}") + endif() + rapids_find_generate_module( + "${_name}" + NO_CONFIG + VERSION "${VERSION}" + LIBRARY_NAMES "${_lib}" + BUILD_EXPORT_SET cudf-exports + INSTALL_EXPORT_SET cudf-exports + HEADER_NAMES arrow/python/arrow_to_pandas.h + ) + + find_package(${_name} ${VERSION} MODULE REQUIRED GLOBAL) + add_library(${_alias} ALIAS ${_name}::${_name}) + + if(CUDF_PYARROW_WHEEL_DIR) + list(POP_BACK CMAKE_PREFIX_PATH) + endif() + endfunction() + + string(REPLACE "." "" PYARROW_SO_VER "${VERSION}") + find_arrow_lib(Arrow arrow_shared libarrow.so.${PYARROW_SO_VER}) +endfunction() + # This function finds arrow and sets any additional necessary environment variables. function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENABLE_PYTHON ENABLE_PARQUET ) + if(CUDF_BUILD_WHEELS AND (NOT BUILD_STATIC)) + # Generate a FindArrow.cmake to find pyarrow's libarrow.so + find_libarrow_in_python_wheel(${VERSION}) + set(ARROW_FOUND TRUE PARENT_SCOPE) + set(ARROW_LIBRARIES arrow_shared PARENT_SCOPE) + return() + endif() + if(BUILD_STATIC) if(TARGET arrow_static) - list(APPEND ARROW_LIBRARIES arrow_static) - set(ARROW_FOUND - TRUE - PARENT_SCOPE - ) - set(ARROW_LIBRARIES - ${ARROW_LIBRARIES} - PARENT_SCOPE - ) + set(ARROW_FOUND TRUE PARENT_SCOPE) + set(ARROW_LIBRARIES arrow_static PARENT_SCOPE) return() endif() else() if(TARGET arrow_shared) - list(APPEND ARROW_LIBRARIES arrow_shared) - set(ARROW_FOUND - TRUE - PARENT_SCOPE - ) - set(ARROW_LIBRARIES - ${ARROW_LIBRARIES} - PARENT_SCOPE - ) + set(ARROW_FOUND TRUE PARENT_SCOPE) + set(ARROW_LIBRARIES arrow_shared PARENT_SCOPE) return() endif() endif() @@ -92,6 +115,7 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB rapids_cpm_find( Arrow ${VERSION} GLOBAL_TARGETS arrow_shared parquet_shared arrow_dataset_shared + arrow_static parquet_static arrow_dataset_static CPM_ARGS GIT_REPOSITORY https://github.com/apache/arrow.git GIT_TAG apache-arrow-${VERSION} @@ -128,8 +152,8 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB set(ARROW_FOUND TRUE) set(ARROW_LIBRARIES "") - # Arrow_ADDED: set if CPM downloaded Arrow from Github Arrow_DIR: set if CPM found Arrow on the - # system/conda/etc. + # Arrow_ADDED: set if CPM downloaded Arrow from Github + # Arrow_DIR: set if CPM found Arrow on the system/conda/etc. if(Arrow_ADDED OR Arrow_DIR) if(BUILD_STATIC) list(APPEND ARROW_LIBRARIES arrow_static) @@ -302,7 +326,6 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB "${ARROW_LIBRARIES}" PARENT_SCOPE ) - endfunction() if(NOT DEFINED CUDF_VERSION_Arrow) diff --git a/cpp/include/cudf/detail/interop.hpp b/cpp/include/cudf/detail/interop.hpp index 1417be358de..babee008c23 100644 --- a/cpp/include/cudf/detail/interop.hpp +++ b/cpp/include/cudf/detail/interop.hpp @@ -49,7 +49,7 @@ DLManagedTensor* to_dlpack( // Creating arrow as per given type_id and buffer arguments template -std::shared_ptr to_arrow_array(cudf::type_id id, Ts&&... args) +inline std::shared_ptr to_arrow_array(cudf::type_id id, Ts&&... args) { switch (id) { case type_id::BOOL8: return std::make_shared(std::forward(args)...); diff --git a/cpp/libcudf/__init__.py b/cpp/libcudf/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/cpp/libcudf/_version.py b/cpp/libcudf/_version.py deleted file mode 100644 index c265a22b162..00000000000 --- a/cpp/libcudf/_version.py +++ /dev/null @@ -1,567 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. -# This file helps to compute a version number in source trees obtained from -# git-archive tarball (such as those provided by githubs download-from-tag -# feature). Distribution tarballs (built by setup.py sdist) and build -# directories (produced by setup.py build) will contain a much shorter file -# that just contains the computed version number. - -# This file is released into the public domain. Generated by -# versioneer-0.18 (https://github.com/warner/python-versioneer) - -"""Git implementation of _version.py.""" - -import errno -import os -import re -import subprocess -import sys - - -def get_keywords(): - """Get the keywords needed to look up the version information.""" - # these strings will be replaced by git during git-archive. - # setup.py/versioneer.py will grep for the variable names, so they must - # each be defined on a line of their own. _version.py will just call - # get_keywords(). - git_refnames = "$Format:%d$" - git_full = "$Format:%H$" - git_date = "$Format:%ci$" - keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} - return keywords - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - -def get_config(): - """Create, populate and return the VersioneerConfig() object.""" - # these strings are filled in when 'setup.py versioneer' creates - # _version.py - cfg = VersioneerConfig() - cfg.VCS = "git" - cfg.style = "pep440" - cfg.tag_prefix = "v" - cfg.parentdir_prefix = "libcudf-" - cfg.versionfile_source = "libcudf/_version.py" - cfg.verbose = False - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -LONG_VERSION_PY = {} -HANDLERS = {} - - -def register_vcs_handler(vcs, method): # decorator - """Decorator to mark a method as the handler for a particular VCS.""" - - def decorate(f): - """Store f in HANDLERS[vcs][method].""" - if vcs not in HANDLERS: - HANDLERS[vcs] = {} - HANDLERS[vcs][method] = f - return f - - return decorate - - -def run_command( - commands, args, cwd=None, verbose=False, hide_stderr=False, env=None -): - """Call the given command(s).""" - assert isinstance(commands, list) - p = None - for c in commands: - try: - dispcmd = str([c] + args) - # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen( - [c] + args, - cwd=cwd, - env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr else None), - ) - break - except EnvironmentError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %s" % dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %s" % (commands,)) - return None, None - stdout = p.communicate()[0].strip() - if sys.version_info[0] >= 3: - stdout = stdout.decode() - if p.returncode != 0: - if verbose: - print("unable to run %s (error)" % dispcmd) - print("stdout was %s" % stdout) - return None, p.returncode - return stdout, p.returncode - - -def versions_from_parentdir(parentdir_prefix, root, verbose): - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for i in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return { - "version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, - "error": None, - "date": None, - } - else: - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print( - "Tried directories %s but none started with prefix %s" - % (str(rootdirs), parentdir_prefix) - ) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - f = open(versionfile_abs, "r") - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - f.close() - except EnvironmentError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): - """Get version information from git keywords.""" - if not keywords: - raise NotThisMethod("no keywords at all, weird") - date = keywords.get("date") - if date is not None: - # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r"\d", r)]) - if verbose: - print("discarding '%s', no digits" % ",".join(refs - tags)) - if verbose: - print("likely tags: %s" % ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - if verbose: - print("picking %s" % r) - return { - "version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, - "error": None, - "date": date, - } - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return { - "version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, - "error": "no suitable tags", - "date": None, - } - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - out, rc = run_command( - GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True - ) - if rc != 0: - if verbose: - print("Directory %s not under git control" % root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = run_command( - GITS, - [ - "describe", - "--tags", - "--dirty", - "--always", - "--long", - "--match", - "%s*" % tag_prefix, - ], - cwd=root, - ) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[: git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) - if not mo: - # unparseable. Maybe git-describe is misbehaving? - pieces["error"] = ( - "unable to parse git-describe output: '%s'" % describe_out - ) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%s' doesn't start with prefix '%s'" - print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( - full_tag, - tag_prefix, - ) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - count_out, rc = run_command( - GITS, ["rev-list", "HEAD", "--count"], cwd=root - ) - pieces["distance"] = int(count_out) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[ - 0 - ].strip() - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def plus_or_dot(pieces): - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces): - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_pre(pieces): - """TAG[.post.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post.devDISTANCE - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += ".post.dev%d" % pieces["distance"] - else: - # exception #1 - rendered = "0.post.dev%d" % pieces["distance"] - return rendered - - -def render_pep440_post(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - return rendered - - -def render_pep440_old(pieces): - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Eexceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces): - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces): - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces, style): - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return { - "version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None, - } - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%s'" % style) - - return { - "version": rendered, - "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], - "error": None, - "date": pieces.get("date"), - } - - -def get_versions(): - """Get version information or return default if unable to do so.""" - # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have - # __file__, we can work backwards from there to the root. Some - # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which - # case we can only use expanded keywords. - - cfg = get_config() - verbose = cfg.verbose - - try: - return git_versions_from_keywords( - get_keywords(), cfg.tag_prefix, verbose - ) - except NotThisMethod: - pass - - try: - root = os.path.realpath(__file__) - # versionfile_source is the relative path from the top of the source - # tree (where the .git directory might live) to this file. Invert - # this to find the root from __file__. - for i in cfg.versionfile_source.split("/"): - root = os.path.dirname(root) - except NameError: - return { - "version": "0+unknown", - "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None, - } - - try: - pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) - return render(pieces, cfg.style) - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - except NotThisMethod: - pass - - return { - "version": "0+unknown", - "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", - "date": None, - } diff --git a/cpp/pyproject.toml b/cpp/pyproject.toml deleted file mode 100644 index 12777bb77bc..00000000000 --- a/cpp/pyproject.toml +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -[build-system] - -requires = [ - "wheel", - "setuptools", - "scikit-build>=0.13.1", - "cmake>=3.20.1,!=3.23.0", - "ninja", - "numpy" -] diff --git a/cpp/setup.cfg b/cpp/setup.cfg deleted file mode 100644 index ea1c7dab305..00000000000 --- a/cpp/setup.cfg +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. - -[flake8] -exclude = versioneer.py -# See the docstring in versioneer.py for instructions. Note that you must -# re-run 'versioneer.py setup' after changing this section, and commit the -# resulting files. - -[versioneer] -VCS = git -style = pep440 -versionfile_source = libcudf/_version.py -versionfile_build = libcudf/_version.py -tag_prefix = v -parentdir_prefix = libcudf- diff --git a/cpp/setup.py b/cpp/setup.py deleted file mode 100644 index ef60e9833de..00000000000 --- a/cpp/setup.py +++ /dev/null @@ -1,64 +0,0 @@ -# -# Copyright (c) 2020-2022, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from setuptools import find_packages -from skbuild import setup -from wheel.bdist_wheel import bdist_wheel as _bdist_wheel - -import versioneer -import os - - -''' -copy this trick from https://github.com/ssciwr/clang-format-wheel/blob/main/setup.py -since the C++ code compiled by this cpp module is not a Python C extension -override the platform to be py3-none -''' -class genericpy_bdist_wheel(_bdist_wheel): - def finalize_options(self): - _bdist_wheel.finalize_options(self) - self.root_is_pure = False - - def get_tag(self): - python, abi, plat = _bdist_wheel.get_tag(self) - python, abi = "py3", "none" - return python, abi, plat - - -cmdclass = versioneer.get_cmdclass() -cmdclass['bdist_wheel'] = genericpy_bdist_wheel - - -def exclude_libcxx_symlink(cmake_manifest): - return list(filter(lambda name: not ('include/rapids/libcxx/include' in name), cmake_manifest)) - - -setup(name='libcudf'+os.getenv("PYTHON_PACKAGE_CUDA_SUFFIX", default=""), - description="cuDF C++ library", - version=versioneer.get_version(), - classifiers=[ - "Intended Audience :: Developers", - "Programming Language :: Python", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9" - ], - author="NVIDIA Corporation", - cmake_process_manifest_hook=exclude_libcxx_symlink, - packages=find_packages(include=['libcudf']), - license="Apache", - cmdclass=cmdclass, - zip_safe=False - ) diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu index eeb27c2ac05..8c3014e3799 100644 --- a/cpp/src/interop/to_arrow.cu +++ b/cpp/src/interop/to_arrow.cu @@ -398,9 +398,11 @@ std::shared_ptr to_arrow(table_view input, arrays.end(), metadata.begin(), std::back_inserter(fields), - [](auto const& array, auto const& meta) { return arrow::field(meta.name, array->type()); }); + [](auto const& array, auto const& meta) { + return std::make_shared(meta.name, array->type()); + }); - auto result = arrow::Table::Make(arrow::schema(fields), arrays); + auto result = arrow::Table::Make(arrow::schema(fields), arrays, input.num_rows()); // synchronize the stream because after the return the data may be accessed from the host before // the above `cudaMemcpyAsync` calls have completed their copies (especially if pinned host diff --git a/cpp/versioneer.py b/cpp/versioneer.py deleted file mode 100644 index a6537a34ede..00000000000 --- a/cpp/versioneer.py +++ /dev/null @@ -1,1904 +0,0 @@ -# Version: 0.18 - -"""The Versioneer - like a rocketeer, but for versions. - -The Versioneer -============== - -* like a rocketeer, but for versions! -* https://github.com/warner/python-versioneer -* Brian Warner -* License: Public Domain -* Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, and pypy -* [![Latest Version] -(https://pypip.in/version/versioneer/badge.svg?style=flat) -](https://pypi.python.org/pypi/versioneer/) -* [![Build Status] -(https://travis-ci.org/warner/python-versioneer.png?branch=master) -](https://travis-ci.org/warner/python-versioneer) - -This is a tool for managing a recorded version number in distutils-based -python projects. The goal is to remove the tedious and error-prone "update -the embedded version string" step from your release process. Making a new -release should be as easy as recording a new tag in your version-control -system, and maybe making new tarballs. - - -## Quick Install - -* `pip install versioneer` to somewhere to your $PATH -* add a `[versioneer]` section to your setup.cfg (see below) -* run `versioneer install` in your source tree, commit the results - -## Version Identifiers - -Source trees come from a variety of places: - -* a version-control system checkout (mostly used by developers) -* a nightly tarball, produced by build automation -* a snapshot tarball, produced by a web-based VCS browser, like github's - "tarball from tag" feature -* a release tarball, produced by "setup.py sdist", distributed through PyPI - -Within each source tree, the version identifier (either a string or a number, -this tool is format-agnostic) can come from a variety of places: - -* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows - about recent "tags" and an absolute revision-id -* the name of the directory into which the tarball was unpacked -* an expanded VCS keyword ($Id$, etc) -* a `_version.py` created by some earlier build step - -For released software, the version identifier is closely related to a VCS -tag. Some projects use tag names that include more than just the version -string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool -needs to strip the tag prefix to extract the version identifier. For -unreleased software (between tags), the version identifier should provide -enough information to help developers recreate the same tree, while also -giving them an idea of roughly how old the tree is (after version 1.2, before -version 1.3). Many VCS systems can report a description that captures this, -for example `git describe --tags --dirty --always` reports things like -"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the -0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has -uncommitted changes. - -The version identifier is used for multiple purposes: - -* to allow the module to self-identify its version: `myproject.__version__` -* to choose a name and prefix for a 'setup.py sdist' tarball - -## Theory of Operation - -Versioneer works by adding a special `_version.py` file into your source -tree, where your `__init__.py` can import it. This `_version.py` knows how to -dynamically ask the VCS tool for version information at import time. - -`_version.py` also contains `$Revision$` markers, and the installation -process marks `_version.py` to have this marker rewritten with a tag name -during the `git archive` command. As a result, generated tarballs will -contain enough information to get the proper version. - -To allow `setup.py` to compute a version too, a `versioneer.py` is added to -the top level of your source tree, next to `setup.py` and the `setup.cfg` -that configures it. This overrides several distutils/setuptools commands to -compute the version when invoked, and changes `setup.py build` and `setup.py -sdist` to replace `_version.py` with a small static file that contains just -the generated version data. - -## Installation - -See [INSTALL.md](./INSTALL.md) for detailed installation instructions. - -## Version-String Flavors - -Code which uses Versioneer can learn about its version string at runtime by -importing `_version` from your main `__init__.py` file and running the -`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can -import the top-level `versioneer.py` and run `get_versions()`. - -Both functions return a dictionary with different flavors of version -information: - -* `['version']`: A condensed version string, rendered using the selected - style. This is the most commonly used value for the project's version - string. The default "pep440" style yields strings like `0.11`, - `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section - below for alternative styles. - -* `['full-revisionid']`: detailed revision identifier. For Git, this is the - full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". - -* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the - commit date in ISO 8601 format. This will be None if the date is not - available. - -* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that - this is only accurate if run in a VCS checkout, otherwise it is likely to - be False or None - -* `['error']`: if the version string could not be computed, this will be set - to a string describing the problem, otherwise it will be None. It may be - useful to throw an exception in setup.py if this is set, to avoid e.g. - creating tarballs with a version string of "unknown". - -Some variants are more useful than others. Including `full-revisionid` in a -bug report should allow developers to reconstruct the exact code being tested -(or indicate the presence of local changes that should be shared with the -developers). `version` is suitable for display in an "about" box or a CLI -`--version` output: it can be easily compared against release notes and lists -of bugs fixed in various releases. - -The installer adds the following text to your `__init__.py` to place a basic -version in `YOURPROJECT.__version__`: - - from cudf._version import get_versions - __version__ = get_versions()['version'] - del get_versions - -## Styles - -The setup.cfg `style=` configuration controls how the VCS information is -rendered into a version string. - -The default style, "pep440", produces a PEP440-compliant string, equal to the -un-prefixed tag name for actual releases, and containing an additional "local -version" section with more detail for in-between builds. For Git, this is -TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags ---dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the -tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and -that this commit is two revisions ("+2") beyond the "0.11" tag. For released -software (exactly equal to a known tag), the identifier will only contain the -stripped tag, e.g. "0.11". - -Other styles are available. See [details.md](details.md) in the Versioneer -source tree for descriptions. - -## Debugging - -Versioneer tries to avoid fatal errors: if something goes wrong, it will tend -to return a version of "0+unknown". To investigate the problem, run `setup.py -version`, which will run the version-lookup code in a verbose mode, and will -display the full contents of `get_versions()` (including the `error` string, -which may help identify what went wrong). - -## Known Limitations - -Some situations are known to cause problems for Versioneer. This details the -most significant ones. More can be found on Github -[issues page](https://github.com/warner/python-versioneer/issues). - -### Subprojects - -Versioneer has limited support for source trees in which `setup.py` is not in -the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are -two common reasons why `setup.py` might not be in the root: - -* Source trees which contain multiple subprojects, such as - [Buildbot](https://github.com/buildbot/buildbot), which contains both - "master" and "slave" subprojects, each with their own `setup.py`, - `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI - distributions (and upload multiple independently-installable tarballs). -* Source trees whose main purpose is to contain a C library, but which also - provide bindings to Python (and perhaps other langauges) in subdirectories. - -Versioneer will look for `.git` in parent directories, and most operations -should get the right version string. However `pip` and `setuptools` have bugs -and implementation details which frequently cause `pip install .` from a -subproject directory to fail to find a correct version string (so it usually -defaults to `0+unknown`). - -`pip install --editable .` should work correctly. `setup.py install` might -work too. - -Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in -some later version. - -[Bug #38](https://github.com/warner/python-versioneer/issues/38) is tracking -this issue. The discussion in -[PR #61](https://github.com/warner/python-versioneer/pull/61) describes the -issue from the Versioneer side in more detail. -[pip PR#3176](https://github.com/pypa/pip/pull/3176) and -[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve -pip to let Versioneer work correctly. - -Versioneer-0.16 and earlier only looked for a `.git` directory next to the -`setup.cfg`, so subprojects were completely unsupported with those releases. - -### Editable installs with setuptools <= 18.5 - -`setup.py develop` and `pip install --editable .` allow you to install a -project into a virtualenv once, then continue editing the source code (and -test) without re-installing after every change. - -"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a -convenient way to specify executable scripts that should be installed along -with the python package. - -These both work as expected when using modern setuptools. When using -setuptools-18.5 or earlier, however, certain operations will cause -`pkg_resources.DistributionNotFound` errors when running the entrypoint -script, which must be resolved by re-installing the package. This happens -when the install happens with one version, then the egg_info data is -regenerated while a different version is checked out. Many setup.py commands -cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into -a different virtualenv), so this can be surprising. - -[Bug #83](https://github.com/warner/python-versioneer/issues/83) describes -this one, but upgrading to a newer version of setuptools should probably -resolve it. - -### Unicode version strings - -While Versioneer works (and is continually tested) with both Python 2 and -Python 3, it is not entirely consistent with bytes-vs-unicode distinctions. -Newer releases probably generate unicode version strings on py2. It's not -clear that this is wrong, but it may be surprising for applications when then -write these strings to a network connection or include them in bytes-oriented -APIs like cryptographic checksums. - -[Bug #71](https://github.com/warner/python-versioneer/issues/71) investigates -this question. - - -## Updating Versioneer - -To upgrade your project to a new release of Versioneer, do the following: - -* install the new Versioneer (`pip install -U versioneer` or equivalent) -* edit `setup.cfg`, if necessary, to include any new configuration settings - indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details. -* re-run `versioneer install` in your source tree, to replace - `SRC/_version.py` -* commit any changed files - -## Future Directions - -This tool is designed to make it easily extended to other version-control -systems: all VCS-specific components are in separate directories like -src/git/ . The top-level `versioneer.py` script is assembled from these -components by running make-versioneer.py . In the future, make-versioneer.py -will take a VCS name as an argument, and will construct a version of -`versioneer.py` that is specific to the given VCS. It might also take the -configuration arguments that are currently provided manually during -installation by editing setup.py . Alternatively, it might go the other -direction and include code from all supported VCS systems, reducing the -number of intermediate scripts. - - -## License - -To make Versioneer easier to embed, all its code is dedicated to the public -domain. The `_version.py` that it creates is also in the public domain. -Specifically, both are released under the Creative Commons "Public Domain -Dedication" license (CC0-1.0), as described in -https://creativecommons.org/publicdomain/zero/1.0/ . - -""" - -from __future__ import print_function - -import errno -import json -import os -import re -import subprocess -import sys - -try: - import configparser -except ImportError: - import ConfigParser as configparser - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - -def get_root(): - """Get the project root directory. - - We require that all commands are run from the project root, i.e. the - directory that contains setup.py, setup.cfg, and versioneer.py . - """ - root = os.path.realpath(os.path.abspath(os.getcwd())) - setup_py = os.path.join(root, "setup.py") - versioneer_py = os.path.join(root, "versioneer.py") - if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): - # allow 'python path/to/setup.py COMMAND' - root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) - setup_py = os.path.join(root, "setup.py") - versioneer_py = os.path.join(root, "versioneer.py") - if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): - err = ( - "Versioneer was unable to run the project root directory. " - "Versioneer requires setup.py to be executed from " - "its immediate directory (like 'python setup.py COMMAND'), " - "or in a way that lets it use sys.argv[0] to find the root " - "(like 'python path/to/setup.py COMMAND')." - ) - raise VersioneerBadRootError(err) - try: - # Certain runtime workflows (setup.py install/develop in a setuptools - # tree) execute all dependencies in a single python process, so - # "versioneer" may be imported multiple times, and python's shared - # module-import table will cache the first one. So we can't use - # os.path.dirname(__file__), as that will find whichever - # versioneer.py was first imported, even in later projects. - me = os.path.realpath(os.path.abspath(__file__)) - me_dir = os.path.normcase(os.path.splitext(me)[0]) - vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) - if me_dir != vsr_dir: - print( - "Warning: build in %s is using versioneer.py from %s" - % (os.path.dirname(me), versioneer_py) - ) - except NameError: - pass - return root - - -def get_config_from_root(root): - """Read the project setup.cfg file to determine Versioneer config.""" - # This might raise EnvironmentError (if setup.cfg is missing), or - # configparser.NoSectionError (if it lacks a [versioneer] section), or - # configparser.NoOptionError (if it lacks "VCS="). See the docstring at - # the top of versioneer.py for instructions on writing your setup.cfg . - setup_cfg = os.path.join(root, "setup.cfg") - parser = configparser.SafeConfigParser() - with open(setup_cfg, "r") as f: - parser.readfp(f) - VCS = parser.get("versioneer", "VCS") # mandatory - - def get(parser, name): - if parser.has_option("versioneer", name): - return parser.get("versioneer", name) - return None - - cfg = VersioneerConfig() - cfg.VCS = VCS - cfg.style = get(parser, "style") or "" - cfg.versionfile_source = get(parser, "versionfile_source") - cfg.versionfile_build = get(parser, "versionfile_build") - cfg.tag_prefix = get(parser, "tag_prefix") - if cfg.tag_prefix in ("''", '""'): - cfg.tag_prefix = "" - cfg.parentdir_prefix = get(parser, "parentdir_prefix") - cfg.verbose = get(parser, "verbose") - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -# these dictionaries contain VCS-specific tools -LONG_VERSION_PY = {} -HANDLERS = {} - - -def register_vcs_handler(vcs, method): # decorator - """Decorator to mark a method as the handler for a particular VCS.""" - - def decorate(f): - """Store f in HANDLERS[vcs][method].""" - if vcs not in HANDLERS: - HANDLERS[vcs] = {} - HANDLERS[vcs][method] = f - return f - - return decorate - - -def run_command( - commands, args, cwd=None, verbose=False, hide_stderr=False, env=None -): - """Call the given command(s).""" - assert isinstance(commands, list) - p = None - for c in commands: - try: - dispcmd = str([c] + args) - # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen( - [c] + args, - cwd=cwd, - env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr else None), - ) - break - except EnvironmentError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %s" % dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %s" % (commands,)) - return None, None - stdout = p.communicate()[0].strip() - if sys.version_info[0] >= 3: - stdout = stdout.decode() - if p.returncode != 0: - if verbose: - print("unable to run %s (error)" % dispcmd) - print("stdout was %s" % stdout) - return None, p.returncode - return stdout, p.returncode - - -LONG_VERSION_PY[ - "git" -] = r''' -# This file helps to compute a version number in source trees obtained from -# git-archive tarball (such as those provided by githubs download-from-tag -# feature). Distribution tarballs (built by setup.py sdist) and build -# directories (produced by setup.py build) will contain a much shorter file -# that just contains the computed version number. - -# This file is released into the public domain. Generated by -# versioneer-0.18 (https://github.com/warner/python-versioneer) - -"""Git implementation of _version.py.""" - -import errno -import os -import re -import subprocess -import sys - - -def get_keywords(): - """Get the keywords needed to look up the version information.""" - # these strings will be replaced by git during git-archive. - # setup.py/versioneer.py will grep for the variable names, so they must - # each be defined on a line of their own. _version.py will just call - # get_keywords(). - git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" - git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" - git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" - keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} - return keywords - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - -def get_config(): - """Create, populate and return the VersioneerConfig() object.""" - # these strings are filled in when 'setup.py versioneer' creates - # _version.py - cfg = VersioneerConfig() - cfg.VCS = "git" - cfg.style = "%(STYLE)s" - cfg.tag_prefix = "%(TAG_PREFIX)s" - cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" - cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" - cfg.verbose = False - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -LONG_VERSION_PY = {} -HANDLERS = {} - - -def register_vcs_handler(vcs, method): # decorator - """Decorator to mark a method as the handler for a particular VCS.""" - def decorate(f): - """Store f in HANDLERS[vcs][method].""" - if vcs not in HANDLERS: - HANDLERS[vcs] = {} - HANDLERS[vcs][method] = f - return f - return decorate - - -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): - """Call the given command(s).""" - assert isinstance(commands, list) - p = None - for c in commands: - try: - dispcmd = str([c] + args) - # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen([c] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) - break - except EnvironmentError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %%s" %% dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %%s" %% (commands,)) - return None, None - stdout = p.communicate()[0].strip() - if sys.version_info[0] >= 3: - stdout = stdout.decode() - if p.returncode != 0: - if verbose: - print("unable to run %%s (error)" %% dispcmd) - print("stdout was %%s" %% stdout) - return None, p.returncode - return stdout, p.returncode - - -def versions_from_parentdir(parentdir_prefix, root, verbose): - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for i in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} - else: - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print("Tried directories %%s but none started with prefix %%s" %% - (str(rootdirs), parentdir_prefix)) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - f = open(versionfile_abs, "r") - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - f.close() - except EnvironmentError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): - """Get version information from git keywords.""" - if not keywords: - raise NotThisMethod("no keywords at all, weird") - date = keywords.get("date") - if date is not None: - # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %%d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) - if verbose: - print("discarding '%%s', no digits" %% ",".join(refs - tags)) - if verbose: - print("likely tags: %%s" %% ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - if verbose: - print("picking %%s" %% r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) - if rc != 0: - if verbose: - print("Directory %%s not under git control" %% root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", - "--match", "%%s*" %% tag_prefix], - cwd=root) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparseable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%%s'" - %% describe_out) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%%s' doesn't start with prefix '%%s'" - print(fmt %% (full_tag, tag_prefix)) - pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" - %% (full_tag, tag_prefix)) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], - cwd=root) - pieces["distance"] = int(count_out) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"], - cwd=root)[0].strip() - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def plus_or_dot(pieces): - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces): - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_pre(pieces): - """TAG[.post.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post.devDISTANCE - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += ".post.dev%%d" %% pieces["distance"] - else: - # exception #1 - rendered = "0.post.dev%%d" %% pieces["distance"] - return rendered - - -def render_pep440_post(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%%s" %% pieces["short"] - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%%s" %% pieces["short"] - return rendered - - -def render_pep440_old(pieces): - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces): - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces): - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces, style): - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%%s'" %% style) - - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} - - -def get_versions(): - """Get version information or return default if unable to do so.""" - # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have - # __file__, we can work backwards from there to the root. Some - # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which - # case we can only use expanded keywords. - - cfg = get_config() - verbose = cfg.verbose - - try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, - verbose) - except NotThisMethod: - pass - - try: - root = os.path.realpath(__file__) - # versionfile_source is the relative path from the top of the source - # tree (where the .git directory might live) to this file. Invert - # this to find the root from __file__. - for i in cfg.versionfile_source.split('/'): - root = os.path.dirname(root) - except NameError: - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None} - - try: - pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) - return render(pieces, cfg.style) - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - except NotThisMethod: - pass - - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", "date": None} -''' - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - f = open(versionfile_abs, "r") - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - f.close() - except EnvironmentError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): - """Get version information from git keywords.""" - if not keywords: - raise NotThisMethod("no keywords at all, weird") - date = keywords.get("date") - if date is not None: - # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r"\d", r)]) - if verbose: - print("discarding '%s', no digits" % ",".join(refs - tags)) - if verbose: - print("likely tags: %s" % ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix) :] - if verbose: - print("picking %s" % r) - return { - "version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, - "error": None, - "date": date, - } - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return { - "version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, - "error": "no suitable tags", - "date": None, - } - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - out, rc = run_command( - GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True - ) - if rc != 0: - if verbose: - print("Directory %s not under git control" % root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = run_command( - GITS, - [ - "describe", - "--tags", - "--dirty", - "--always", - "--long", - "--match", - "%s*" % tag_prefix, - ], - cwd=root, - ) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[: git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) - if not mo: - # unparseable. Maybe git-describe is misbehaving? - pieces["error"] = ( - "unable to parse git-describe output: '%s'" % describe_out - ) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%s' doesn't start with prefix '%s'" - print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( - full_tag, - tag_prefix, - ) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix) :] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - count_out, rc = run_command( - GITS, ["rev-list", "HEAD", "--count"], cwd=root - ) - pieces["distance"] = int(count_out) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[ - 0 - ].strip() - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def do_vcs_install(manifest_in, versionfile_source, ipy): - """Git-specific installation logic for Versioneer. - - For Git, this means creating/changing .gitattributes to mark _version.py - for export-subst keyword substitution. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - files = [manifest_in, versionfile_source] - if ipy: - files.append(ipy) - try: - me = __file__ - if me.endswith(".pyc") or me.endswith(".pyo"): - me = os.path.splitext(me)[0] + ".py" - versioneer_file = os.path.relpath(me) - except NameError: - versioneer_file = "versioneer.py" - files.append(versioneer_file) - present = False - try: - f = open(".gitattributes", "r") - for line in f.readlines(): - if line.strip().startswith(versionfile_source): - if "export-subst" in line.strip().split()[1:]: - present = True - f.close() - except EnvironmentError: - pass - if not present: - f = open(".gitattributes", "a+") - f.write("%s export-subst\n" % versionfile_source) - f.close() - files.append(".gitattributes") - run_command(GITS, ["add", "--"] + files) - - -def versions_from_parentdir(parentdir_prefix, root, verbose): - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for i in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return { - "version": dirname[len(parentdir_prefix) :], - "full-revisionid": None, - "dirty": False, - "error": None, - "date": None, - } - else: - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print( - "Tried directories %s but none started with prefix %s" - % (str(rootdirs), parentdir_prefix) - ) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -SHORT_VERSION_PY = """ -# This file was generated by 'versioneer.py' (0.18) from -# revision-control system data, or from the parent directory name of an -# unpacked source archive. Distribution tarballs contain a pre-generated copy -# of this file. - -import json - -version_json = ''' -%s -''' # END VERSION_JSON - - -def get_versions(): - return json.loads(version_json) -""" - - -def versions_from_file(filename): - """Try to determine the version from _version.py if present.""" - try: - with open(filename) as f: - contents = f.read() - except EnvironmentError: - raise NotThisMethod("unable to read _version.py") - mo = re.search( - r"version_json = '''\n(.*)''' # END VERSION_JSON", - contents, - re.M | re.S, - ) - if not mo: - mo = re.search( - r"version_json = '''\r\n(.*)''' # END VERSION_JSON", - contents, - re.M | re.S, - ) - if not mo: - raise NotThisMethod("no version_json in _version.py") - return json.loads(mo.group(1)) - - -def write_to_version_file(filename, versions): - """Write the given version number to the given _version.py file.""" - os.unlink(filename) - contents = json.dumps( - versions, sort_keys=True, indent=1, separators=(",", ": ") - ) - with open(filename, "w") as f: - f.write(SHORT_VERSION_PY % contents) - - print("set %s to '%s'" % (filename, versions["version"])) - - -def plus_or_dot(pieces): - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces): - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_pre(pieces): - """TAG[.post.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post.devDISTANCE - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += ".post.dev%d" % pieces["distance"] - else: - # exception #1 - rendered = "0.post.dev%d" % pieces["distance"] - return rendered - - -def render_pep440_post(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - return rendered - - -def render_pep440_old(pieces): - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces): - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces): - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces, style): - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return { - "version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None, - } - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%s'" % style) - - return { - "version": rendered, - "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], - "error": None, - "date": pieces.get("date"), - } - - -class VersioneerBadRootError(Exception): - """The project root directory is unknown or missing key files.""" - - -def get_versions(verbose=False): - """Get the project version from whatever source is available. - - Returns dict with two keys: 'version' and 'full'. - """ - if "versioneer" in sys.modules: - # see the discussion in cmdclass.py:get_cmdclass() - del sys.modules["versioneer"] - - root = get_root() - cfg = get_config_from_root(root) - - assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" - handlers = HANDLERS.get(cfg.VCS) - assert handlers, "unrecognized VCS '%s'" % cfg.VCS - verbose = verbose or cfg.verbose - assert ( - cfg.versionfile_source is not None - ), "please set versioneer.versionfile_source" - assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" - - versionfile_abs = os.path.join(root, cfg.versionfile_source) - - # extract version from first of: _version.py, VCS command (e.g. 'git - # describe'), parentdir. This is meant to work for developers using a - # source checkout, for users of a tarball created by 'setup.py sdist', - # and for users of a tarball/zipball created by 'git archive' or github's - # download-from-tag feature or the equivalent in other VCSes. - - get_keywords_f = handlers.get("get_keywords") - from_keywords_f = handlers.get("keywords") - if get_keywords_f and from_keywords_f: - try: - keywords = get_keywords_f(versionfile_abs) - ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) - if verbose: - print("got version from expanded keyword %s" % ver) - return ver - except NotThisMethod: - pass - - try: - ver = versions_from_file(versionfile_abs) - if verbose: - print("got version from file %s %s" % (versionfile_abs, ver)) - return ver - except NotThisMethod: - pass - - from_vcs_f = handlers.get("pieces_from_vcs") - if from_vcs_f: - try: - pieces = from_vcs_f(cfg.tag_prefix, root, verbose) - ver = render(pieces, cfg.style) - if verbose: - print("got version from VCS %s" % ver) - return ver - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - if verbose: - print("got version from parentdir %s" % ver) - return ver - except NotThisMethod: - pass - - if verbose: - print("unable to compute version") - - return { - "version": "0+unknown", - "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", - "date": None, - } - - -def get_version(): - """Get the short version string for this project.""" - return get_versions()["version"] - - -def get_cmdclass(): - """Get the custom setuptools/distutils subclasses used by Versioneer.""" - if "versioneer" in sys.modules: - del sys.modules["versioneer"] - # this fixes the "python setup.py develop" case (also 'install' and - # 'easy_install .'), in which subdependencies of the main project are - # built (using setup.py bdist_egg) in the same python process. Assume - # a main project A and a dependency B, which use different versions - # of Versioneer. A's setup.py imports A's Versioneer, leaving it in - # sys.modules by the time B's setup.py is executed, causing B to run - # with the wrong versioneer. Setuptools wraps the sub-dep builds in a - # sandbox that restores sys.modules to it's pre-build state, so the - # parent is protected against the child's "import versioneer". By - # removing ourselves from sys.modules here, before the child build - # happens, we protect the child from the parent's versioneer too. - # Also see https://github.com/warner/python-versioneer/issues/52 - - cmds = {} - - # we add "version" to both distutils and setuptools - from distutils.core import Command - - class cmd_version(Command): - description = "report generated version string" - user_options = [] - boolean_options = [] - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - vers = get_versions(verbose=True) - print("Version: %s" % vers["version"]) - print(" full-revisionid: %s" % vers.get("full-revisionid")) - print(" dirty: %s" % vers.get("dirty")) - print(" date: %s" % vers.get("date")) - if vers["error"]: - print(" error: %s" % vers["error"]) - - cmds["version"] = cmd_version - - # we override "build_py" in both distutils and setuptools - # - # most invocation pathways end up running build_py: - # distutils/build -> build_py - # distutils/install -> distutils/build ->.. - # setuptools/bdist_wheel -> distutils/install ->.. - # setuptools/bdist_egg -> distutils/install_lib -> build_py - # setuptools/install -> bdist_egg ->.. - # setuptools/develop -> ? - # pip install: - # copies source tree to a tempdir before running egg_info/etc - # if .git isn't copied too, 'git describe' will fail - # then does setup.py bdist_wheel, or sometimes setup.py install - # setup.py egg_info -> ? - - # we override different "build_py" commands for both environments - if "setuptools" in sys.modules: - from setuptools.command.build_py import build_py as _build_py - else: - from distutils.command.build_py import build_py as _build_py - - class cmd_build_py(_build_py): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - _build_py.run(self) - # now locate _version.py in the new build/ directory and replace - # it with an updated value - if cfg.versionfile_build: - target_versionfile = os.path.join( - self.build_lib, cfg.versionfile_build - ) - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - - cmds["build_py"] = cmd_build_py - - if "cx_Freeze" in sys.modules: # cx_freeze enabled? - from cx_Freeze.dist import build_exe as _build_exe - - # nczeczulin reports that py2exe won't like the pep440-style string - # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. - # setup(console=[{ - # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION - # "product_version": versioneer.get_version(), - # ... - - class cmd_build_exe(_build_exe): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - target_versionfile = cfg.versionfile_source - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - - _build_exe.run(self) - os.unlink(target_versionfile) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write( - LONG - % { - "DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - } - ) - - cmds["build_exe"] = cmd_build_exe - del cmds["build_py"] - - if "py2exe" in sys.modules: # py2exe enabled? - try: - from py2exe.distutils_buildexe import py2exe as _py2exe # py3 - except ImportError: - from py2exe.build_exe import py2exe as _py2exe # py2 - - class cmd_py2exe(_py2exe): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - target_versionfile = cfg.versionfile_source - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - - _py2exe.run(self) - os.unlink(target_versionfile) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write( - LONG - % { - "DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - } - ) - - cmds["py2exe"] = cmd_py2exe - - # we override different "sdist" commands for both environments - if "setuptools" in sys.modules: - from setuptools.command.sdist import sdist as _sdist - else: - from distutils.command.sdist import sdist as _sdist - - class cmd_sdist(_sdist): - def run(self): - versions = get_versions() - self._versioneer_generated_versions = versions - # unless we update this, the command will keep using the old - # version - self.distribution.metadata.version = versions["version"] - return _sdist.run(self) - - def make_release_tree(self, base_dir, files): - root = get_root() - cfg = get_config_from_root(root) - _sdist.make_release_tree(self, base_dir, files) - # now locate _version.py in the new base_dir directory - # (remembering that it may be a hardlink) and replace it with an - # updated value - target_versionfile = os.path.join(base_dir, cfg.versionfile_source) - print("UPDATING %s" % target_versionfile) - write_to_version_file( - target_versionfile, self._versioneer_generated_versions - ) - - cmds["sdist"] = cmd_sdist - - return cmds - - -CONFIG_ERROR = """ -setup.cfg is missing the necessary Versioneer configuration. You need -a section like: - - [versioneer] - VCS = git - style = pep440 - versionfile_source = src/myproject/_version.py - versionfile_build = myproject/_version.py - tag_prefix = - parentdir_prefix = myproject- - -You will also need to edit your setup.py to use the results: - - import versioneer - setup(version=versioneer.get_version(), - cmdclass=versioneer.get_cmdclass(), ...) - -Please read the docstring in ./versioneer.py for configuration instructions, -edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. -""" - -SAMPLE_CONFIG = """ -# See the docstring in versioneer.py for instructions. Note that you must -# re-run 'versioneer.py setup' after changing this section, and commit the -# resulting files. - -[versioneer] -#VCS = git -#style = pep440 -#versionfile_source = -#versionfile_build = -#tag_prefix = -#parentdir_prefix = - -""" - -INIT_PY_SNIPPET = """ -from cudf._version import get_versions -__version__ = get_versions()['version'] -del get_versions -""" - - -def do_setup(): - """Main VCS-independent setup function for installing Versioneer.""" - root = get_root() - try: - cfg = get_config_from_root(root) - except ( - EnvironmentError, - configparser.NoSectionError, - configparser.NoOptionError, - ) as e: - if isinstance(e, (EnvironmentError, configparser.NoSectionError)): - print( - "Adding sample versioneer config to setup.cfg", file=sys.stderr - ) - with open(os.path.join(root, "setup.cfg"), "a") as f: - f.write(SAMPLE_CONFIG) - print(CONFIG_ERROR, file=sys.stderr) - return 1 - - print(" creating %s" % cfg.versionfile_source) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write( - LONG - % { - "DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - } - ) - - ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") - if os.path.exists(ipy): - try: - with open(ipy, "r") as f: - old = f.read() - except EnvironmentError: - old = "" - if INIT_PY_SNIPPET not in old: - print(" appending to %s" % ipy) - with open(ipy, "a") as f: - f.write(INIT_PY_SNIPPET) - else: - print(" %s unmodified" % ipy) - else: - print(" %s doesn't exist, ok" % ipy) - ipy = None - - # Make sure both the top-level "versioneer.py" and versionfile_source - # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so - # they'll be copied into source distributions. Pip won't be able to - # install the package without this. - manifest_in = os.path.join(root, "MANIFEST.in") - simple_includes = set() - try: - with open(manifest_in, "r") as f: - for line in f: - if line.startswith("include "): - for include in line.split()[1:]: - simple_includes.add(include) - except EnvironmentError: - pass - # That doesn't cover everything MANIFEST.in can do - # (http://docs.python.org/2/distutils/sourcedist.html#commands), so - # it might give some false negatives. Appending redundant 'include' - # lines is safe, though. - if "versioneer.py" not in simple_includes: - print(" appending 'versioneer.py' to MANIFEST.in") - with open(manifest_in, "a") as f: - f.write("include versioneer.py\n") - else: - print(" 'versioneer.py' already in MANIFEST.in") - if cfg.versionfile_source not in simple_includes: - print( - " appending versionfile_source ('%s') to MANIFEST.in" - % cfg.versionfile_source - ) - with open(manifest_in, "a") as f: - f.write("include %s\n" % cfg.versionfile_source) - else: - print(" versionfile_source already in MANIFEST.in") - - # Make VCS-specific changes. For git, this means creating/changing - # .gitattributes to mark _version.py for export-subst keyword - # substitution. - do_vcs_install(manifest_in, cfg.versionfile_source, ipy) - return 0 - - -def scan_setup_py(): - """Validate the contents of setup.py against Versioneer's expectations.""" - found = set() - setters = False - errors = 0 - with open("setup.py", "r") as f: - for line in f.readlines(): - if "import versioneer" in line: - found.add("import") - if "versioneer.get_cmdclass()" in line: - found.add("cmdclass") - if "versioneer.get_version()" in line: - found.add("get_version") - if "versioneer.VCS" in line: - setters = True - if "versioneer.versionfile_source" in line: - setters = True - if len(found) != 3: - print("") - print("Your setup.py appears to be missing some important items") - print("(but I might be wrong). Please make sure it has something") - print("roughly like the following:") - print("") - print(" import versioneer") - print(" setup( version=versioneer.get_version(),") - print(" cmdclass=versioneer.get_cmdclass(), ...)") - print("") - errors += 1 - if setters: - print("You should remove lines like 'versioneer.VCS = ' and") - print("'versioneer.versionfile_source = ' . This configuration") - print("now lives in setup.cfg, and should be removed from setup.py") - print("") - errors += 1 - return errors - - -if __name__ == "__main__": - cmd = sys.argv[1] - if cmd == "setup": - errors = do_setup() - errors += scan_setup_py() - if errors: - sys.exit(1) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 9762eacbbed..2ad03d7f7d4 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -14,10 +14,22 @@ cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR) -set(cudf_version 22.10.00) +if(POLICY CMP0094) + # Make FindPython{,2,3} modules use LOCATION for lookup strategy. + # https://cmake.org/cmake/help/latest/policy/CMP0094.html#policy:CMP0094 + cmake_policy(SET CMP0094 NEW) + set(CMAKE_POLICY_DEFAULT_CMP0094 NEW) +endif() include(../../fetch_rapids.cmake) +include(rapids-cuda) +# TODO: This will not be necessary once we upgrade to CMake 3.22, which will pull in the required +# languages for the C++ project even if this project does not require those languges. +rapids_cuda_init_architectures(cudf-python) + +set(cudf_version 22.10.00) + project( cudf-python VERSION ${cudf_version} @@ -26,37 +38,56 @@ project( # various linking options for the python library is hardcoded to build with C, so until # that is fixed we need to keep C. C CXX + # TODO: This will not be necessary once we upgrade to CMake 3.22, which will pull in the required + # languages for the C++ project even if this project does not require those languges. + CUDA ) option(FIND_CUDF_CPP "Search for existing CUDF C++ installations before defaulting to local files" OFF ) +option(CUDF_BUILD_WHEELS "Whether we're building a wheel for pypi" OFF) # If the user requested it we attempt to find CUDF. if(FIND_CUDF_CPP) find_package(cudf ${cudf_version} REQUIRED) else() - set(cudf_FOUND OFF) -endif() - -if(NOT cudf_FOUND) - # TODO: This will not be necessary once we upgrade to CMake 3.22, which will pull in the required - # languages for the C++ project even if this project does not require those languges. - include(rapids-cuda) - rapids_cuda_init_architectures(cudf-python) - enable_language(CUDA) - # Since cudf only enables CUDA optionally we need to manually include the file that - # rapids_cuda_init_architectures relies on `project` including. - include("${CMAKE_PROJECT_cudf-python_INCLUDE}") set(BUILD_TESTS OFF) set(BUILD_BENCHMARKS OFF) - add_subdirectory(../../cpp cudf-cpp) + # Statically link dependencies if building wheels + set(CPM_DOWNLOAD_rmm ${CUDF_BUILD_WHEELS}) + set(CPM_DOWNLOAD_spdlog ${CUDF_BUILD_WHEELS}) + set(CUDA_STATIC_RUNTIME ${CUDF_BUILD_WHEELS}) + + # Need to set this so all the nvcomp targets are global, not only nvcomp::nvcomp + # https://cmake.org/cmake/help/latest/variable/CMAKE_FIND_PACKAGE_TARGETS_GLOBAL.html#variable:CMAKE_FIND_PACKAGE_TARGETS_GLOBAL + set(CMAKE_FIND_PACKAGE_TARGETS_GLOBAL ${CUDF_BUILD_WHEELS}) + + set(_exclude_from_all "") + if(CUDF_BUILD_WHEELS) + # Don't install the cuDF C++ targets into wheels + set(_exclude_from_all EXCLUDE_FROM_ALL) + endif() + + add_subdirectory(../../cpp cudf-cpp ${_exclude_from_all}) + + if(CUDF_BUILD_WHEELS) + include(cmake/Modules/WheelHelpers.cmake) + get_target_property(_nvcomp_link_libs nvcomp::nvcomp INTERFACE_LINK_LIBRARIES) + # Ensure all the shared objects we need at runtime are in the wheel + add_target_libs_to_wheel( + LIB_DIR cudf/_lib + TARGETS arrow_shared + nvcomp::nvcomp + ${_nvcomp_link_libs} + ) + endif() # Since there are multiple subpackages of cudf._lib that require access to libcudf, we place the # library in the _lib/cpp directory as a single source of truth and modify the other rpaths # appropriately. - install(TARGETS cudf DESTINATION cudf/_lib/cpp) + install(TARGETS cudf DESTINATION cudf/_lib) endif() include(rapids-cython) diff --git a/python/cudf/cmake/Modules/WheelHelpers.cmake b/python/cudf/cmake/Modules/WheelHelpers.cmake new file mode 100644 index 00000000000..882cc915641 --- /dev/null +++ b/python/cudf/cmake/Modules/WheelHelpers.cmake @@ -0,0 +1,74 @@ +#============================================================================= +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= +include_guard(GLOBAL) + +function(add_target_libs_to_wheel) + list(APPEND CMAKE_MESSAGE_CONTEXT "add_target_libs_to_wheel") + + set(options "") + set(one_value "LIB_DIR") + set(multi_value "TARGETS") + cmake_parse_arguments(_ "${options}" "${one_value}" "${multi_value}" ${ARGN}) + + message(VERBOSE "Installing targets '${__TARGETS}' into lib_dir '${__LIB_DIR}'") + + foreach(target IN LISTS __TARGETS) + + if(NOT TARGET ${target}) + message(VERBOSE "No target named ${target}") + continue() + endif() + + get_target_property(alias_target ${target} ALIASED_TARGET) + if(alias_target) + set(target ${alias_target}) + endif() + + get_target_property(is_imported ${target} IMPORTED) + if(NOT is_imported) + # If the target isn't imported, install it into the the wheel + install(TARGETS ${target} DESTINATION ${__LIB_DIR}) + message(VERBOSE "install(TARGETS ${target} DESTINATION ${__LIB_DIR})") + else() + # If the target is imported, make sure it's global + get_target_property(already_global ${target} IMPORTED_GLOBAL) + if(NOT already_global) + set_target_properties(${target} PROPERTIES IMPORTED_GLOBAL TRUE) + endif() + + # Find the imported target's library so we can copy it into the wheel + set(lib_loc) + foreach(prop IN ITEMS IMPORTED_LOCATION + IMPORTED_LOCATION_RELEASE + IMPORTED_LOCATION_DEBUG) + get_target_property(lib_loc ${target} ${prop}) + if(lib_loc) + message(VERBOSE "Found ${prop} for ${target}: ${lib_loc}") + break() + endif() + message(VERBOSE "${target} has no value for property ${prop}") + endforeach() + + if(NOT lib_loc) + message(FATAL_ERROR "Found no libs to install for target ${target}") + endif() + + # Copy the imported library into the wheel + install(FILES ${lib_loc} DESTINATION ${__LIB_DIR}) + message(VERBOSE "install(FILES ${lib_loc} DESTINATION ${__LIB_DIR})") + endif() + endforeach() +endfunction() diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index 1f6b2069b49..e0f837a2da1 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -54,6 +54,7 @@ set(cython_sources utils.pyx ) set(linked_libraries cudf::cudf) + rapids_cython_create_modules( CXX SOURCE_FILES "${cython_sources}" @@ -69,8 +70,8 @@ foreach(target IN LISTS targets_using_numpy) target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}") endforeach() -foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/cpp") +foreach(cython_module IN LISTS _RAPIDS_CYTHON_CREATED_TARGETS) + set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN") endforeach() add_subdirectory(io) diff --git a/python/cudf/cudf/_lib/interop.pyx b/python/cudf/cudf/_lib/interop.pyx index ee5ce165f95..a4b8193a61c 100644 --- a/python/cudf/cudf/_lib/interop.pyx +++ b/python/cudf/cudf/_lib/interop.pyx @@ -160,7 +160,21 @@ def to_arrow(list source_columns, object column_dtypes): input_table_view, cpp_metadata ) - return pyarrow_wrap_table(cpp_arrow_table) + print("names = cpp_arrow_table.get().ColumnNames():") + names = cpp_arrow_table.get().ColumnNames() + [print(name.decode('UTF-8')) for name in names] + + print("table = pyarrow_wrap_table(cpp_arrow_table)") + table = pyarrow_wrap_table(cpp_arrow_table) + print("table.column_names:") + print(table.column_names) + print("field = table.field(0):") + field = table.field(0) + print("field.name:") + print(field.name) + print("table.to_string():") + print(table.to_string()) + return table def from_arrow(object input_table): diff --git a/python/cudf/cudf/_lib/io/CMakeLists.txt b/python/cudf/cudf/_lib/io/CMakeLists.txt index b12b085fc76..81ac8fc02d8 100644 --- a/python/cudf/cudf/_lib/io/CMakeLists.txt +++ b/python/cudf/cudf/_lib/io/CMakeLists.txt @@ -25,6 +25,6 @@ foreach(target IN LISTS targets_using_numpy) target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}") endforeach() -foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../cpp") +foreach(cython_module IN LISTS _RAPIDS_CYTHON_CREATED_TARGETS) + set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../") endforeach() diff --git a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt index d96999a077e..7a9abcc029c 100644 --- a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt +++ b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt @@ -22,6 +22,6 @@ rapids_cython_create_modules( LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX nvtext_ ) -foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../cpp") +foreach(cython_module IN LISTS _RAPIDS_CYTHON_CREATED_TARGETS) + set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../") endforeach() diff --git a/python/cudf/cudf/_lib/strings/CMakeLists.txt b/python/cudf/cudf/_lib/strings/CMakeLists.txt index 8ed5c5e03c1..c5430d87d73 100644 --- a/python/cudf/cudf/_lib/strings/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/CMakeLists.txt @@ -41,8 +41,8 @@ rapids_cython_create_modules( LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ) -foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../cpp") +foreach(cython_module IN LISTS _RAPIDS_CYTHON_CREATED_TARGETS) + set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../") endforeach() add_subdirectory(convert) diff --git a/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt b/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt index ea2e3943b5a..e711bbc08cb 100644 --- a/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt @@ -23,6 +23,6 @@ rapids_cython_create_modules( LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ) -foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../../cpp") +foreach(cython_module IN LISTS _RAPIDS_CYTHON_CREATED_TARGETS) + set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../../") endforeach() diff --git a/python/cudf/cudf/_lib/strings/split/CMakeLists.txt b/python/cudf/cudf/_lib/strings/split/CMakeLists.txt index 2d23c0d21cb..e89f3104eee 100644 --- a/python/cudf/cudf/_lib/strings/split/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/split/CMakeLists.txt @@ -21,6 +21,6 @@ rapids_cython_create_modules( LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ) -foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../../cpp") +foreach(cython_module IN LISTS _RAPIDS_CYTHON_CREATED_TARGETS) + set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../../") endforeach() diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index eac267f2ce4..b2e3a69c6c5 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -11,4 +11,5 @@ requires = [ "ninja", "numpy", "pyarrow==9.0.0", + "protoc-wheel" ] diff --git a/python/cudf/setup.py b/python/cudf/setup.py index d0c213aaf5c..009b55efaa0 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -17,7 +17,7 @@ "cachetools", "cuda-python>=11.5,<11.7.1", "fsspec>=0.6.0", - "numba>=0.53.1", + "numba>=0.54", "numpy", "nvtx>=0.2.1", "packaging", @@ -133,6 +133,18 @@ def run(self): cmdclass = versioneer.get_cmdclass() cmdclass["build_ext"] = build_ext_and_proto +cmake_args=[] + +if os.getenv("CUDF_BUILD_WHEELS", "") != "": + import pyarrow as pa + cmake_args=[ + "-DCUDF_BUILD_WHEELS=ON", + f"-DCUDF_PYARROW_WHEEL_DIR={pa.__path__[0]}", + ] + +def exclude_libcxx_symlink(cmake_manifest): + return list(filter(lambda name: not ('include/rapids/libcxx/include' in name), cmake_manifest)) + setup( name="cudf"+os.getenv("PYTHON_PACKAGE_CUDA_SUFFIX", default=""), version=versioneer.get_version(), @@ -149,12 +161,19 @@ def run(self): "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", ], + cmake_args=cmake_args, + cmake_process_manifest_hook=exclude_libcxx_symlink, packages=find_packages(include=["cudf", "cudf.*"]), package_data={ key: ["*.pxd"] for key in find_packages(include=["cudf._lib*"]) }, - cmdclass=cmdclass, - install_requires=install_requires, + setup_requires=[ + f"rmm{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", + ], + install_requires=install_requires + [ + f"rmm{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", + ], extras_require=extras_require, + cmdclass=cmdclass, zip_safe=False, ) diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py index 50fb1d515ce..93f22a8134b 100644 --- a/python/dask_cudf/setup.py +++ b/python/dask_cudf/setup.py @@ -21,7 +21,7 @@ "numpy", "pandas>=1.0,<1.6.0dev0", "pytest", - "numba>=0.53.1", + "numba>=0.54", "dask>=2021.09.1", "distributed>=2021.09.1", ] From 04a0ac6f858499242c4042df20a5c33b34a73fb9 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Mon, 12 Sep 2022 17:52:40 -0700 Subject: [PATCH 023/142] remove print debugging --- python/cudf/cudf/_lib/interop.pyx | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/python/cudf/cudf/_lib/interop.pyx b/python/cudf/cudf/_lib/interop.pyx index a4b8193a61c..ee5ce165f95 100644 --- a/python/cudf/cudf/_lib/interop.pyx +++ b/python/cudf/cudf/_lib/interop.pyx @@ -160,21 +160,7 @@ def to_arrow(list source_columns, object column_dtypes): input_table_view, cpp_metadata ) - print("names = cpp_arrow_table.get().ColumnNames():") - names = cpp_arrow_table.get().ColumnNames() - [print(name.decode('UTF-8')) for name in names] - - print("table = pyarrow_wrap_table(cpp_arrow_table)") - table = pyarrow_wrap_table(cpp_arrow_table) - print("table.column_names:") - print(table.column_names) - print("field = table.field(0):") - field = table.field(0) - print("field.name:") - print(field.name) - print("table.to_string():") - print(table.to_string()) - return table + return pyarrow_wrap_table(cpp_arrow_table) def from_arrow(object input_table): From d8f7b3cd5aa615bfa1827c1556e7b85836ce7657 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Wed, 14 Sep 2022 10:57:02 -0700 Subject: [PATCH 024/142] use the old ABI because arrow --- python/cudf/CMakeLists.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 2ad03d7f7d4..e0c2b582732 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -61,6 +61,14 @@ else() set(CPM_DOWNLOAD_spdlog ${CUDF_BUILD_WHEELS}) set(CUDA_STATIC_RUNTIME ${CUDF_BUILD_WHEELS}) + if(CUDF_BUILD_WHEELS) + # Wheels have to build with the old ABI because pyarrow's `libarrow.so` + # is compiled for manylinux2014 (centos7 toolchain) which uses the old ABI. + list(APPEND CMAKE_C_FLAGS -D_GLIBCXX_USE_CXX11_ABI=0) + list(APPEND CMAKE_CXX_FLAGS -D_GLIBCXX_USE_CXX11_ABI=0) + list(APPEND CMAKE_CUDA_FLAGS -Xcompiler=-D_GLIBCXX_USE_CXX11_ABI=0) + endif() + # Need to set this so all the nvcomp targets are global, not only nvcomp::nvcomp # https://cmake.org/cmake/help/latest/variable/CMAKE_FIND_PACKAGE_TARGETS_GLOBAL.html#variable:CMAKE_FIND_PACKAGE_TARGETS_GLOBAL set(CMAKE_FIND_PACKAGE_TARGETS_GLOBAL ${CUDF_BUILD_WHEELS}) From c95738cf29f6cdb4e241e0c52829122220629cae Mon Sep 17 00:00:00 2001 From: ptaylor Date: Wed, 14 Sep 2022 13:44:50 -0700 Subject: [PATCH 025/142] update github action to only build cudf wheel --- .github/workflows/wheels.yml | 49 ++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index caa92c6867f..cfa28f50225 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -6,31 +6,36 @@ on: - 'pull-request/[0-9]+' jobs: - libcudf-wheel: - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions - with: - package-name: libcudf - package-dir: cpp - python-version: "3.8" - cibw-before-all: "apt-get install -y libzstd-dev liblz4-dev libboost-regex-dev libboost-system-dev libboost-filesystem-dev libsnappy-dev libprotobuf-dev libprotoc-dev protobuf-compiler" - cibw-before-build: "pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" - skbuild-configure-options: '-DBUILD_TESTS=ON -DBUILD_BENCHMARKS=OFF -DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_ORC=ON -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON' - auditwheel-skip-repair: "true" - gpu-smoketest-before-amd64: "apt-get install -y libprotobuf-dev" - gpu-smoketest-before-arm64: "apt-get install -y libprotobuf-dev" - wheel-pattern-override: "py3-none-linux" - cibw-test-command: "for gt in _venv_placeholder/bin/gtests/libcudf*/*; do ${gt}; done" - secrets: inherit + # libcudf-wheel: + # uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions + # with: + # package-name: libcudf + # package-dir: cpp + # python-version: "3.8" + # cibw-before-all: "apt-get install -y libzstd-dev liblz4-dev libboost-regex-dev libboost-system-dev libboost-filesystem-dev libsnappy-dev libprotobuf-dev libprotoc-dev protobuf-compiler" + # cibw-before-build: "pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" + # skbuild-configure-options: '-DBUILD_TESTS=ON -DBUILD_BENCHMARKS=OFF -DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_ORC=ON -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON' + # auditwheel-skip-repair: "true" + # gpu-smoketest-before-amd64: "apt-get install -y libprotobuf-dev" + # gpu-smoketest-before-arm64: "apt-get install -y libprotobuf-dev" + # wheel-pattern-override: "py3-none-linux" + # cibw-test-command: "for gt in _venv_placeholder/bin/gtests/libcudf*/*; do ${gt}; done" + # secrets: inherit cudf-wheel: - needs: libcudf-wheel - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions + uses: trxcllnt/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions with: package-name: cudf package-dir: python/cudf python-version: "3.8 3.9" - cibw-before-all: "apt-get install -y protobuf-compiler && git clone https://github.com/dmlc/dlpack -b v0.5 /opt/dlpack && cp -r /opt/dlpack/include/dlpack /usr/include/dlpack && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - cibw-before-build: "protoc --proto_path={package}/cudf/utils/metadata --python_out={package}/cudf/utils/metadata {package}/cudf/utils/metadata/orc_column_statistics.proto && pip install rmm-cu11 libcudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" - skbuild-configure-options: "-DFIND_CUDF_CPP=ON" + cibw-before-all: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + cibw-environment: | + "CUDF_BUILD_WHEELS=1" + "PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple" + skbuild-configure-options: | + "--log-level=DEBUG" + "-DCUDF_BUILD_WHEELS=ON" + "-DDETECT_CONDA_ENV=OFF" + "-DCMAKE_CUDA_ARCHITECTURES=ALL" cibw-test-extras: "test" gpu-smoketest-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" gpu-smoketest-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" @@ -39,10 +44,12 @@ jobs: secrets: inherit dask_cudf-wheel: needs: cudf-wheel - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions + uses: trxcllnt/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions with: package-name: dask_cudf package-dir: python/dask_cudf + cibw-environment: | + "PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple" gpu-smoketest-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" cibw-test-extras: "test" cibw-test-command: "pytest -v ./python/dask_cudf/dask_cudf/tests" From fae72deecf5f8c8fa7a992cab2d24ec613381173 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Wed, 14 Sep 2022 15:28:50 -0700 Subject: [PATCH 026/142] set include_package_data=True for scikit-build --- python/cudf/setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 009b55efaa0..14b238987e6 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -164,6 +164,7 @@ def exclude_libcxx_symlink(cmake_manifest): cmake_args=cmake_args, cmake_process_manifest_hook=exclude_libcxx_symlink, packages=find_packages(include=["cudf", "cudf.*"]), + include_package_data=True, package_data={ key: ["*.pxd"] for key in find_packages(include=["cudf._lib*"]) }, From aa42c1417c8598e5e49cd3a14cd7128316e35207 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Wed, 14 Sep 2022 16:55:45 -0700 Subject: [PATCH 027/142] remove cibw-environment from workflow that doesn't define it --- .github/workflows/wheels.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index cfa28f50225..48c82e01f8b 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -48,8 +48,6 @@ jobs: with: package-name: dask_cudf package-dir: python/dask_cudf - cibw-environment: | - "PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple" gpu-smoketest-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" cibw-test-extras: "test" cibw-test-command: "pytest -v ./python/dask_cudf/dask_cudf/tests" From 430523372205198784d2d717b49dfb88dbe37d3f Mon Sep 17 00:00:00 2001 From: ptaylor Date: Wed, 14 Sep 2022 16:56:12 -0700 Subject: [PATCH 028/142] add cudf to dask_cudf's install_requires --- python/dask_cudf/setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py index 93f22a8134b..ed2866da8ef 100644 --- a/python/dask_cudf/setup.py +++ b/python/dask_cudf/setup.py @@ -14,6 +14,7 @@ "fsspec>=0.6.0", "numpy", "pandas>=1.0,<1.6.0dev0", + f"cudf{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", ] extras_require = { From d463e1a62a2c6bede257567495dccc394c15c459 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Wed, 14 Sep 2022 17:11:47 -0700 Subject: [PATCH 029/142] switch action repo back to rapidsai --- .github/workflows/wheels.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 48c82e01f8b..c3b564aedda 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -22,7 +22,7 @@ jobs: # cibw-test-command: "for gt in _venv_placeholder/bin/gtests/libcudf*/*; do ${gt}; done" # secrets: inherit cudf-wheel: - uses: trxcllnt/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions-ptaylor with: package-name: cudf package-dir: python/cudf @@ -44,7 +44,7 @@ jobs: secrets: inherit dask_cudf-wheel: needs: cudf-wheel - uses: trxcllnt/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions with: package-name: dask_cudf package-dir: python/dask_cudf From 544b341ee1c1ba2155ad3111f79e64cc59c457b2 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Wed, 14 Sep 2022 17:21:27 -0700 Subject: [PATCH 030/142] pass env on a single line --- .github/workflows/wheels.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index c3b564aedda..9dab21bccbd 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -28,9 +28,7 @@ jobs: package-dir: python/cudf python-version: "3.8 3.9" cibw-before-all: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - cibw-environment: | - "CUDF_BUILD_WHEELS=1" - "PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple" + cibw-environment: "CUDF_BUILD_WHEELS=1 PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple" skbuild-configure-options: | "--log-level=DEBUG" "-DCUDF_BUILD_WHEELS=ON" From db0487fb87f548652c5ff93ea4c0a5d0bbeb98c4 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Wed, 14 Sep 2022 17:27:54 -0700 Subject: [PATCH 031/142] single line all the things --- .github/workflows/wheels.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 9dab21bccbd..0dcd5fee12f 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -29,11 +29,7 @@ jobs: python-version: "3.8 3.9" cibw-before-all: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" cibw-environment: "CUDF_BUILD_WHEELS=1 PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple" - skbuild-configure-options: | - "--log-level=DEBUG" - "-DCUDF_BUILD_WHEELS=ON" - "-DDETECT_CONDA_ENV=OFF" - "-DCMAKE_CUDA_ARCHITECTURES=ALL" + skbuild-configure-options: "--log-level=DEBUG -DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DCMAKE_CUDA_ARCHITECTURES=ALL" cibw-test-extras: "test" gpu-smoketest-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" gpu-smoketest-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" From 50fc10ac5f4f8bc0fa560d9068b0d936ec4c3a1d Mon Sep 17 00:00:00 2001 From: ptaylor Date: Wed, 14 Sep 2022 20:49:37 -0700 Subject: [PATCH 032/142] set python-package-cuda-suffix for dask-cudf --- .github/workflows/wheels.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 0dcd5fee12f..b74c4ba4eab 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -38,10 +38,11 @@ jobs: secrets: inherit dask_cudf-wheel: needs: cudf-wheel - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions-ptaylor with: package-name: dask_cudf package-dir: python/dask_cudf + python-package-cuda-suffix: "-cu11" gpu-smoketest-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" cibw-test-extras: "test" cibw-test-command: "pytest -v ./python/dask_cudf/dask_cudf/tests" From fd4fd49bb47b686318f93084183b8533c8ed4c87 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Wed, 14 Sep 2022 20:50:00 -0700 Subject: [PATCH 033/142] install protoc from apt --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index b74c4ba4eab..07143e04425 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -27,7 +27,7 @@ jobs: package-name: cudf package-dir: python/cudf python-version: "3.8 3.9" - cibw-before-all: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + cibw-before-all: "apt-get install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" cibw-environment: "CUDF_BUILD_WHEELS=1 PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple" skbuild-configure-options: "--log-level=DEBUG -DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DCMAKE_CUDA_ARCHITECTURES=ALL" cibw-test-extras: "test" From 90397006c85f2bb70bf3bb63d2809658847aaf59 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Wed, 14 Sep 2022 23:59:37 -0700 Subject: [PATCH 034/142] check if multiple copies of libcuda.so are in the image on the CI runner --- .github/workflows/wheels.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 07143e04425..31c2e0ef486 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -33,7 +33,7 @@ jobs: cibw-test-extras: "test" gpu-smoketest-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" gpu-smoketest-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" - cibw-test-command: "pytest -v ./python/cudf/cudf/tests" + cibw-test-command: "find / -type f -name 'libcuda.so*' && pytest -v ./python/cudf/cudf/tests" gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" secrets: inherit dask_cudf-wheel: @@ -45,6 +45,6 @@ jobs: python-package-cuda-suffix: "-cu11" gpu-smoketest-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" cibw-test-extras: "test" - cibw-test-command: "pytest -v ./python/dask_cudf/dask_cudf/tests" + cibw-test-command: "find / -type f -name 'libcuda.so*' && pytest -v ./python/dask_cudf/dask_cudf/tests" gpu-smoketest: "import dask_cudf; print(dask_cudf)" secrets: inherit From 8ac6309f3fb06b86cce6d3728e6f29a46cd17449 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Thu, 15 Sep 2022 00:51:11 -0700 Subject: [PATCH 035/142] add syntax for protoc --- .github/workflows/wheels.yml | 2 +- python/cudf/cudf/utils/metadata/orc_column_statistics.proto | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 31c2e0ef486..999a5e539cd 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -27,7 +27,7 @@ jobs: package-name: cudf package-dir: python/cudf python-version: "3.8 3.9" - cibw-before-all: "apt-get install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + cibw-before-all: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" cibw-environment: "CUDF_BUILD_WHEELS=1 PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple" skbuild-configure-options: "--log-level=DEBUG -DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DCMAKE_CUDA_ARCHITECTURES=ALL" cibw-test-extras: "test" diff --git a/python/cudf/cudf/utils/metadata/orc_column_statistics.proto b/python/cudf/cudf/utils/metadata/orc_column_statistics.proto index 9dbaa713c03..1bc0fa6f6bd 100644 --- a/python/cudf/cudf/utils/metadata/orc_column_statistics.proto +++ b/python/cudf/cudf/utils/metadata/orc_column_statistics.proto @@ -1,3 +1,5 @@ +syntax = "proto2"; + message IntegerStatistics { optional sint64 minimum = 1; optional sint64 maximum = 2; From 5d1770c4e6fdfdd52a9f56f84fa73193813d70d5 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Thu, 15 Sep 2022 00:56:43 -0700 Subject: [PATCH 036/142] print ldconfig, set ld_library_path before running tests --- .github/workflows/wheels.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 999a5e539cd..cd699b4c8c2 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -33,7 +33,7 @@ jobs: cibw-test-extras: "test" gpu-smoketest-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" gpu-smoketest-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" - cibw-test-command: "find / -type f -name 'libcuda.so*' && pytest -v ./python/cudf/cudf/tests" + cibw-test-command: "set -x; ldconfig -p && find / -type f -name 'libcuda.so*' && LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib:/usr/local/cuda/lib64:/usr/lib:/usr/lib64 pytest -v ./python/cudf/cudf/tests" gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" secrets: inherit dask_cudf-wheel: @@ -45,6 +45,6 @@ jobs: python-package-cuda-suffix: "-cu11" gpu-smoketest-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" cibw-test-extras: "test" - cibw-test-command: "find / -type f -name 'libcuda.so*' && pytest -v ./python/dask_cudf/dask_cudf/tests" + cibw-test-command: "set -x; ldconfig -p && find / -type f -name 'libcuda.so*' && LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib:/usr/local/cuda/lib64:/usr/lib:/usr/lib64 pytest -v ./python/dask_cudf/dask_cudf/tests" gpu-smoketest: "import dask_cudf; print(dask_cudf)" secrets: inherit From cb805f6f35b06f616c35e859975240487d1f7cd9 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Thu, 15 Sep 2022 01:11:05 -0700 Subject: [PATCH 037/142] apt install protobuf-compiler because 3.8 is doing something odd --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index cd699b4c8c2..b1ee728fba7 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -27,7 +27,7 @@ jobs: package-name: cudf package-dir: python/cudf python-version: "3.8 3.9" - cibw-before-all: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + cibw-before-all: "apt update && apt install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" cibw-environment: "CUDF_BUILD_WHEELS=1 PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple" skbuild-configure-options: "--log-level=DEBUG -DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DCMAKE_CUDA_ARCHITECTURES=ALL" cibw-test-extras: "test" From 2e02df16ef434d502b3ba513fdff2186a319a379 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Thu, 15 Sep 2022 01:32:10 -0700 Subject: [PATCH 038/142] do protoc in CMake --- python/cudf/CMakeLists.txt | 6 +++ .../cudf/cmake/Modules/ProtobufHelpers.cmake | 45 +++++++++++++++++++ python/cudf/setup.py | 41 ----------------- 3 files changed, 51 insertions(+), 41 deletions(-) create mode 100644 python/cudf/cmake/Modules/ProtobufHelpers.cmake diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index e0c2b582732..3ee7753ab34 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -102,3 +102,9 @@ include(rapids-cython) rapids_cython_init() add_subdirectory(cudf/_lib) + +include(cmake/Modules/ProtobufHelpers.cmake) + +codegen_protoc( + cudf/utils/metadata/orc_column_statistics.proto +) diff --git a/python/cudf/cmake/Modules/ProtobufHelpers.cmake b/python/cudf/cmake/Modules/ProtobufHelpers.cmake new file mode 100644 index 00000000000..bdf9157d133 --- /dev/null +++ b/python/cudf/cmake/Modules/ProtobufHelpers.cmake @@ -0,0 +1,45 @@ +#============================================================================= +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= +include_guard(GLOBAL) + +function(codegen_protoc) + if(DEFINED ENV{PROTOC}) + set(protoc_COMMAND $ENV{PROTOC}) + else() + find_program(protoc_COMMAND protoc REQUIRED) + endif() + + foreach(_proto_path IN LISTS ARGV) + string(REPLACE "\.proto" "_pb2\.py" pb2_py_path "${_proto_path}") + set(pb2_py_path "${CMAKE_CURRENT_SOURCE_DIR}/${pb2_py_path}") + execute_process( + COMMAND ${protoc_COMMAND} --python_out=. "${_proto_path}" + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + ECHO_ERROR_VARIABLE + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) + file(READ "${pb2_py_path}" pb2_py) + file(WRITE "${pb2_py_path}" [=[ +# flake8: noqa +# fmt: off +]=]) + file(APPEND "${pb2_py_path}" "${pb2_py}") + file(APPEND "${pb2_py_path}" [=[ +# fmt: on +]=]) + endforeach() +endfunction() diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 14b238987e6..71f43cd7175 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -90,48 +90,7 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): ) -class build_ext_and_proto(build_ext): - def run(self): - # Get protoc - protoc = None - if "PROTOC" in os.environ and os.path.exists(os.environ["PROTOC"]): - protoc = os.environ["PROTOC"] - else: - protoc = find_executable("protoc") - if protoc is None: - sys.stderr.write("protoc not found") - sys.exit(1) - - # Build .proto file - for source in ["cudf/utils/metadata/orc_column_statistics.proto"]: - output = source.replace(".proto", "_pb2.py") - - if not os.path.exists(output) or ( - os.path.getmtime(source) > os.path.getmtime(output) - ): - with open(output, "a") as src: - src.write("# flake8: noqa" + os.linesep) - src.write("# fmt: off" + os.linesep) - subprocess.check_call([protoc, "--python_out=.", source]) - with open(output, "r+") as src: - new_src_content = ( - "# flake8: noqa" - + os.linesep - + "# fmt: off" - + os.linesep - + src.read() - + "# fmt: on" - + os.linesep - ) - src.seek(0) - src.write(new_src_content) - - # Run original Cython build_ext command - super().run() - - cmdclass = versioneer.get_cmdclass() -cmdclass["build_ext"] = build_ext_and_proto cmake_args=[] From d32b4f9157bece5d301a4a010f301283d2f4b349 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Thu, 15 Sep 2022 03:05:41 -0700 Subject: [PATCH 039/142] put /usr/lib/-linux-gnu in front of LD_LIBRARY_PATH in tests --- .github/workflows/wheels.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index b1ee728fba7..89499ace49c 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -29,11 +29,12 @@ jobs: python-version: "3.8 3.9" cibw-before-all: "apt update && apt install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" cibw-environment: "CUDF_BUILD_WHEELS=1 PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple" + skbuild-build-options: "-v" skbuild-configure-options: "--log-level=DEBUG -DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DCMAKE_CUDA_ARCHITECTURES=ALL" cibw-test-extras: "test" gpu-smoketest-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" gpu-smoketest-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" - cibw-test-command: "set -x; ldconfig -p && find / -type f -name 'libcuda.so*' && LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib:/usr/local/cuda/lib64:/usr/lib:/usr/lib64 pytest -v ./python/cudf/cudf/tests" + cibw-test-command: "LD_LIBRARY_PATH\"=/usr/lib/$(uname -p)-linux-gnu:$LD_LIBRARY_PATH\" pytest -v ./python/cudf/cudf/tests" gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" secrets: inherit dask_cudf-wheel: @@ -45,6 +46,6 @@ jobs: python-package-cuda-suffix: "-cu11" gpu-smoketest-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" cibw-test-extras: "test" - cibw-test-command: "set -x; ldconfig -p && find / -type f -name 'libcuda.so*' && LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib:/usr/local/cuda/lib64:/usr/lib:/usr/lib64 pytest -v ./python/dask_cudf/dask_cudf/tests" + cibw-test-command: "LD_LIBRARY_PATH\"=/usr/lib/$(uname -p)-linux-gnu:$LD_LIBRARY_PATH\" pytest -v ./python/dask_cudf/dask_cudf/tests" gpu-smoketest: "import dask_cudf; print(dask_cudf)" secrets: inherit From 4e002b76908f2e2254b4b4ffec7e389da4223d19 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Thu, 15 Sep 2022 03:41:22 -0700 Subject: [PATCH 040/142] fix typo --- .github/workflows/wheels.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 89499ace49c..be92f1730dc 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -34,7 +34,7 @@ jobs: cibw-test-extras: "test" gpu-smoketest-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" gpu-smoketest-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" - cibw-test-command: "LD_LIBRARY_PATH\"=/usr/lib/$(uname -p)-linux-gnu:$LD_LIBRARY_PATH\" pytest -v ./python/cudf/cudf/tests" + cibw-test-command: "LD_LIBRARY_PATH=/usr/lib/$(uname -p)-linux-gnu:$LD_LIBRARY_PATH pytest -v ./python/cudf/cudf/tests" gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" secrets: inherit dask_cudf-wheel: @@ -46,6 +46,6 @@ jobs: python-package-cuda-suffix: "-cu11" gpu-smoketest-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" cibw-test-extras: "test" - cibw-test-command: "LD_LIBRARY_PATH\"=/usr/lib/$(uname -p)-linux-gnu:$LD_LIBRARY_PATH\" pytest -v ./python/dask_cudf/dask_cudf/tests" + cibw-test-command: "LD_LIBRARY_PATH=/usr/lib/$(uname -p)-linux-gnu:$LD_LIBRARY_PATH pytest -v ./python/dask_cudf/dask_cudf/tests" gpu-smoketest: "import dask_cudf; print(dask_cudf)" secrets: inherit From 8475336a528256071accf9e9b6f2177b9ea7695d Mon Sep 17 00:00:00 2001 From: ptaylor Date: Thu, 15 Sep 2022 22:58:15 -0700 Subject: [PATCH 041/142] pin to numpy<1.23 --- python/cudf/pyproject.toml | 2 +- python/cudf/setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index b2e3a69c6c5..912c67adfc7 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -9,7 +9,7 @@ requires = [ "scikit-build>=0.13.1", "cmake>=3.23.1", "ninja", - "numpy", + "numpy<1.23", "pyarrow==9.0.0", "protoc-wheel" ] diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 71f43cd7175..d22dc02498d 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -18,7 +18,7 @@ "cuda-python>=11.5,<11.7.1", "fsspec>=0.6.0", "numba>=0.54", - "numpy", + "numpy<1.23", "nvtx>=0.2.1", "packaging", "pandas>=1.0,<1.6.0dev0", From ecf6e65a14c8a1ab432c30f021cd9d24a2025378 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Thu, 15 Sep 2022 23:08:01 -0700 Subject: [PATCH 042/142] pin mimesis<4.1 and install tzdata --- python/cudf/setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index d22dc02498d..7e164e8963e 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -33,12 +33,13 @@ "pytest-benchmark", "pytest-xdist", "hypothesis", - "mimesis", + "mimesis<4.1", "fastavro>=0.22.9", "python-snappy>=0.6.0", "pyorc", "msgpack", "transformers<=4.10.3", + "tzdata", ] } From adf226bfc95c40aeecdf0c22f11fe4c73a9f5e40 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Fri, 16 Sep 2022 01:35:57 -0700 Subject: [PATCH 043/142] formatting --- python/cudf/setup.py | 81 +++++++++++++++++--------------------------- 1 file changed, 32 insertions(+), 49 deletions(-) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 7e164e8963e..e952b5d92f1 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -13,36 +13,6 @@ from skbuild import setup from skbuild.command.build_ext import build_ext -install_requires = [ - "cachetools", - "cuda-python>=11.5,<11.7.1", - "fsspec>=0.6.0", - "numba>=0.54", - "numpy<1.23", - "nvtx>=0.2.1", - "packaging", - "pandas>=1.0,<1.6.0dev0", - "protobuf>=3.20.1,<3.21.0a0", - "typing_extensions", - "pyarrow==9.0.0", -] - -extras_require = { - "test": [ - "pytest", - "pytest-benchmark", - "pytest-xdist", - "hypothesis", - "mimesis<4.1", - "fastavro>=0.22.9", - "python-snappy>=0.6.0", - "pyorc", - "msgpack", - "transformers<=4.10.3", - "tzdata", - ] -} - def get_cuda_version_from_header(cuda_include_dir, delimeter=""): @@ -81,16 +51,6 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): cuda_include_dir = os.path.join(CUDA_HOME, "include") -myplat = platform.machine() - -if myplat == 'x86_64': - install_requires.append( - "cupy-cuda" - + get_cuda_version_from_header(cuda_include_dir) - + ">=9.5.0,<11.0.0a0" - ) - - cmdclass = versioneer.get_cmdclass() cmake_args=[] @@ -102,9 +62,6 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): f"-DCUDF_PYARROW_WHEEL_DIR={pa.__path__[0]}", ] -def exclude_libcxx_symlink(cmake_manifest): - return list(filter(lambda name: not ('include/rapids/libcxx/include' in name), cmake_manifest)) - setup( name="cudf"+os.getenv("PYTHON_PACKAGE_CUDA_SUFFIX", default=""), version=versioneer.get_version(), @@ -121,20 +78,46 @@ def exclude_libcxx_symlink(cmake_manifest): "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", ], + cmdclass=cmdclass, cmake_args=cmake_args, - cmake_process_manifest_hook=exclude_libcxx_symlink, - packages=find_packages(include=["cudf", "cudf.*"]), include_package_data=True, + packages=find_packages(include=["cudf", "cudf.*"]), package_data={ key: ["*.pxd"] for key in find_packages(include=["cudf._lib*"]) }, setup_requires=[ f"rmm{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", ], - install_requires=install_requires + [ + install_requires=[ + "cachetools", + "cuda-python>=11.5,<11.7.1", + "fsspec>=0.6.0", + "numba>=0.54", + "numpy<1.23", + "nvtx>=0.2.1", + "packaging", + "pandas>=1.0,<1.5.0dev0", + "protobuf>=3.20.1,<3.21.0a0", + "typing_extensions", + "pyarrow==9.0.0", f"rmm{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", - ], - extras_require=extras_require, - cmdclass=cmdclass, + ] + ([ + f"cupy-cuda{get_cuda_version_from_header(cuda_include_dir)}>=9.5.0,<12.0.0a0" + ] if platform.machine() is "x86_64" else []), + extras_require={ + "test": [ + "pytest", + "pytest-benchmark", + "pytest-xdist", + "hypothesis", + "mimesis<4.1", + "fastavro>=0.22.9", + "python-snappy>=0.6.0", + "pyorc", + "msgpack", + "transformers<=4.10.3", + "tzdata", + ] + }, zip_safe=False, ) From 88a91ae223c710bf4a108092d468e903a55ba8da Mon Sep 17 00:00:00 2001 From: ptaylor Date: Fri, 16 Sep 2022 03:23:11 -0700 Subject: [PATCH 044/142] define ld_library_path for the dask_cudf gpu smoketest --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index be92f1730dc..10352b11ac0 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -44,7 +44,7 @@ jobs: package-name: dask_cudf package-dir: python/dask_cudf python-package-cuda-suffix: "-cu11" - gpu-smoketest-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" + gpu-smoketest-before: "export LD_LIBRARY_PATH=/usr/lib/$(uname -p)-linux-gnu:$LD_LIBRARY_PATH && pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" cibw-test-extras: "test" cibw-test-command: "LD_LIBRARY_PATH=/usr/lib/$(uname -p)-linux-gnu:$LD_LIBRARY_PATH pytest -v ./python/dask_cudf/dask_cudf/tests" gpu-smoketest: "import dask_cudf; print(dask_cudf)" From b74adbd2e8c5c78b1a46c9ca0d65c496549d94cb Mon Sep 17 00:00:00 2001 From: ptaylor Date: Fri, 16 Sep 2022 06:21:47 -0700 Subject: [PATCH 045/142] do syntax differently --- python/cudf/setup.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index e952b5d92f1..ad9e6b70026 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -62,6 +62,13 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): f"-DCUDF_PYARROW_WHEEL_DIR={pa.__path__[0]}", ] +cupy_requirement = [] + +if platform.machine() == "x86_64": + cupy_requirement = [ + f"cupy-cuda{get_cuda_version_from_header(cuda_include_dir)}>=9.5.0,<12.0.0a0" + ] + setup( name="cudf"+os.getenv("PYTHON_PACKAGE_CUDA_SUFFIX", default=""), version=versioneer.get_version(), @@ -101,9 +108,7 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): "typing_extensions", "pyarrow==9.0.0", f"rmm{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", - ] + ([ - f"cupy-cuda{get_cuda_version_from_header(cuda_include_dir)}>=9.5.0,<12.0.0a0" - ] if platform.machine() is "x86_64" else []), + ] + cupy_requirement, extras_require={ "test": [ "pytest", From 1b42a4d988280ec44773403546b177dfe6bb19d8 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Mon, 19 Sep 2022 17:15:49 -0400 Subject: [PATCH 046/142] Try to build cuDF with manylinux_2_27 Also fixed LD_LIBRARY_PATH in manylinux containers --- .github/workflows/wheels.yml | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 10352b11ac0..b4bcb64b52c 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -6,23 +6,8 @@ on: - 'pull-request/[0-9]+' jobs: - # libcudf-wheel: - # uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions - # with: - # package-name: libcudf - # package-dir: cpp - # python-version: "3.8" - # cibw-before-all: "apt-get install -y libzstd-dev liblz4-dev libboost-regex-dev libboost-system-dev libboost-filesystem-dev libsnappy-dev libprotobuf-dev libprotoc-dev protobuf-compiler" - # cibw-before-build: "pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" - # skbuild-configure-options: '-DBUILD_TESTS=ON -DBUILD_BENCHMARKS=OFF -DCUDF_ENABLE_ARROW_S3=OFF -DCUDF_ENABLE_ARROW_ORC=ON -DCUDF_ENABLE_ARROW_PYTHON=ON -DCUDF_ENABLE_ARROW_PARQUET=ON' - # auditwheel-skip-repair: "true" - # gpu-smoketest-before-amd64: "apt-get install -y libprotobuf-dev" - # gpu-smoketest-before-arm64: "apt-get install -y libprotobuf-dev" - # wheel-pattern-override: "py3-none-linux" - # cibw-test-command: "for gt in _venv_placeholder/bin/gtests/libcudf*/*; do ${gt}; done" - # secrets: inherit cudf-wheel: - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions-ptaylor + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions-2 with: package-name: cudf package-dir: python/cudf @@ -31,21 +16,23 @@ jobs: cibw-environment: "CUDF_BUILD_WHEELS=1 PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple" skbuild-build-options: "-v" skbuild-configure-options: "--log-level=DEBUG -DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DCMAKE_CUDA_ARCHITECTURES=ALL" + manylinux-container: "manylinux_2_27" + manylinux-test-container: "manylinux_2_31" cibw-test-extras: "test" gpu-smoketest-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" gpu-smoketest-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" - cibw-test-command: "LD_LIBRARY_PATH=/usr/lib/$(uname -p)-linux-gnu:$LD_LIBRARY_PATH pytest -v ./python/cudf/cudf/tests" + cibw-test-command: "pytest -v ./python/cudf/cudf/tests" gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" secrets: inherit dask_cudf-wheel: needs: cudf-wheel - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions-ptaylor + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions-2 with: package-name: dask_cudf package-dir: python/dask_cudf python-package-cuda-suffix: "-cu11" - gpu-smoketest-before: "export LD_LIBRARY_PATH=/usr/lib/$(uname -p)-linux-gnu:$LD_LIBRARY_PATH && pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" + gpu-smoketest-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" cibw-test-extras: "test" - cibw-test-command: "LD_LIBRARY_PATH=/usr/lib/$(uname -p)-linux-gnu:$LD_LIBRARY_PATH pytest -v ./python/dask_cudf/dask_cudf/tests" + cibw-test-command: "pytest -v ./python/dask_cudf/dask_cudf/tests" gpu-smoketest: "import dask_cudf; print(dask_cudf)" secrets: inherit From 7ddc54cb1b5e8e2d403466a01e20da921995c15e Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Tue, 20 Sep 2022 12:36:20 -0400 Subject: [PATCH 047/142] Use different manylinux for x86, aarch64 --- .github/workflows/wheels.yml | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index b4bcb64b52c..616a7060fb7 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -6,26 +6,42 @@ on: - 'pull-request/[0-9]+' jobs: - cudf-wheel: - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions-2 + cudf-wheel-x86_64: + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-amd64.yml@feat/wheel-ci-actions-2 with: package-name: cudf package-dir: python/cudf python-version: "3.8 3.9" - cibw-before-all: "apt update && apt install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + cibw-before-all: "yum update -y && yum install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" cibw-environment: "CUDF_BUILD_WHEELS=1 PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple" skbuild-build-options: "-v" skbuild-configure-options: "--log-level=DEBUG -DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DCMAKE_CUDA_ARCHITECTURES=ALL" - manylinux-container: "manylinux_2_27" + manylinux-container: "manylinux2014" + manylinux-test-container: "manylinux_2_27" + cibw-test-extras: "test" + gpu-smoketest-before: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" + cibw-test-command: "pytest -v ./python/cudf/cudf/tests" + gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" + secrets: inherit + cudf-wheel-aarch64: + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-arm64.yml@feat/wheel-ci-actions-2 + with: + package-name: cudf + package-dir: python/cudf + python-version: "3.8 3.9" + cibw-before-all: "apt update -y && apt install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + cibw-environment: "CUDF_BUILD_WHEELS=1 PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple" + skbuild-build-options: "-v" + skbuild-configure-options: "--log-level=DEBUG -DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DCMAKE_CUDA_ARCHITECTURES=ALL" + manylinux-container: "manylinux_2_31" manylinux-test-container: "manylinux_2_31" cibw-test-extras: "test" - gpu-smoketest-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" - gpu-smoketest-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" + gpu-smoketest-before: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" cibw-test-command: "pytest -v ./python/cudf/cudf/tests" gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" secrets: inherit dask_cudf-wheel: - needs: cudf-wheel + needs: [cudf-wheel-x86_64, cudf-wheel-aarch64] uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions-2 with: package-name: dask_cudf From 0273d6350f50101ab384c9ff32c4126803ad6e21 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Wed, 21 Sep 2022 12:47:27 -0400 Subject: [PATCH 048/142] Upgrade pandas to 1.5 in setup.py --- python/cudf/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index ad9e6b70026..706ad53d66d 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -103,7 +103,7 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): "numpy<1.23", "nvtx>=0.2.1", "packaging", - "pandas>=1.0,<1.5.0dev0", + "pandas>=1.0,<1.6.0dev0", "protobuf>=3.20.1,<3.21.0a0", "typing_extensions", "pyarrow==9.0.0", From b9bd5bf842be185d61bdcfb425f77238671cca0f Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Wed, 21 Sep 2022 14:06:45 -0400 Subject: [PATCH 049/142] Switch to unified workflow for both arches --- .github/workflows/wheels.yml | 33 +++++++++------------------------ 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 616a7060fb7..dc4ed291128 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -6,42 +6,27 @@ on: - 'pull-request/[0-9]+' jobs: - cudf-wheel-x86_64: - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-amd64.yml@feat/wheel-ci-actions-2 + cudf-wheels: + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions-2 with: package-name: cudf package-dir: python/cudf python-version: "3.8 3.9" - cibw-before-all: "yum update -y && yum install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + cibw-before-all-amd64: "yum update -y && yum install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + cibw-before-all-arm64: "apt update -y && apt install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + manylinux-container-amd64: "manylinux2014" + manylinux-container-arm64: "manylinux_2_31" cibw-environment: "CUDF_BUILD_WHEELS=1 PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple" skbuild-build-options: "-v" skbuild-configure-options: "--log-level=DEBUG -DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DCMAKE_CUDA_ARCHITECTURES=ALL" - manylinux-container: "manylinux2014" - manylinux-test-container: "manylinux_2_27" cibw-test-extras: "test" - gpu-smoketest-before: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" - cibw-test-command: "pytest -v ./python/cudf/cudf/tests" - gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" - secrets: inherit - cudf-wheel-aarch64: - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-arm64.yml@feat/wheel-ci-actions-2 - with: - package-name: cudf - package-dir: python/cudf - python-version: "3.8 3.9" - cibw-before-all: "apt update -y && apt install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - cibw-environment: "CUDF_BUILD_WHEELS=1 PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple" - skbuild-build-options: "-v" - skbuild-configure-options: "--log-level=DEBUG -DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DCMAKE_CUDA_ARCHITECTURES=ALL" - manylinux-container: "manylinux_2_31" - manylinux-test-container: "manylinux_2_31" - cibw-test-extras: "test" - gpu-smoketest-before: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" + gpu-smoketest-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" + gpu-smoketest-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" cibw-test-command: "pytest -v ./python/cudf/cudf/tests" gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" secrets: inherit dask_cudf-wheel: - needs: [cudf-wheel-x86_64, cudf-wheel-aarch64] + needs: cudf-wheels uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions-2 with: package-name: dask_cudf From 508b9e496fd4dc0169e994a42f79dfea7da369c9 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Wed, 21 Sep 2022 14:51:17 -0400 Subject: [PATCH 050/142] Can only test 2_31 aarch64 wheel on 2_31 --- .github/workflows/wheels.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index dc4ed291128..e76c88ecc72 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -16,6 +16,7 @@ jobs: cibw-before-all-arm64: "apt update -y && apt install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" manylinux-container-amd64: "manylinux2014" manylinux-container-arm64: "manylinux_2_31" + manylinux-test-container-arm64: "manylinux_2_31" cibw-environment: "CUDF_BUILD_WHEELS=1 PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple" skbuild-build-options: "-v" skbuild-configure-options: "--log-level=DEBUG -DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DCMAKE_CUDA_ARCHITECTURES=ALL" From 39a6054cfbf07bda2af3ae700b18142726e53d93 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Wed, 21 Sep 2022 21:04:04 -0400 Subject: [PATCH 051/142] Switch to new parameter names --- .github/workflows/wheels.yml | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index e76c88ecc72..b3478b8d61b 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -12,19 +12,27 @@ jobs: package-name: cudf package-dir: python/cudf python-version: "3.8 3.9" + + manylinux-container-amd64: "rapidsai/manylinux2014" + manylinux-container-arm64: "rapidsai/manylinux_2_31" + + auditwheel-repair-command-amd64: "auditwheel --verbose repair -w {dest_dir} --plat manylinux_2_17_x86_64 {wheel}" + auditwheel-repair-command-arm64: "auditwheel --verbose repair -w {dest_dir} --plat manylinux_2_31_aarch64 {wheel}" + cibw-before-all-amd64: "yum update -y && yum install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" cibw-before-all-arm64: "apt update -y && apt install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - manylinux-container-amd64: "manylinux2014" - manylinux-container-arm64: "manylinux_2_31" - manylinux-test-container-arm64: "manylinux_2_31" cibw-environment: "CUDF_BUILD_WHEELS=1 PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple" + skbuild-build-options: "-v" skbuild-configure-options: "--log-level=DEBUG -DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DCMAKE_CUDA_ARCHITECTURES=ALL" - cibw-test-extras: "test" - gpu-smoketest-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" - gpu-smoketest-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" - cibw-test-command: "pytest -v ./python/cudf/cudf/tests" - gpu-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" + + test-container-amd64: "rapidsai/manylinux_2_27" # test cuDF x86_64 on our 18.04 container + test-container-arm64: "rapidsai/manylinux_2_31" + test-extras: "test" + test-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" + test-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" + test-unittest: "pytest -v ./python/cudf/cudf/tests" + test-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" secrets: inherit dask_cudf-wheel: needs: cudf-wheels @@ -33,8 +41,8 @@ jobs: package-name: dask_cudf package-dir: python/dask_cudf python-package-cuda-suffix: "-cu11" - gpu-smoketest-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" - cibw-test-extras: "test" - cibw-test-command: "pytest -v ./python/dask_cudf/dask_cudf/tests" - gpu-smoketest: "import dask_cudf; print(dask_cudf)" + test-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" + test-extras: "test" + test-unittest: "pytest -v ./python/dask_cudf/dask_cudf/tests" + test-smoketest: "import dask_cudf; print(dask_cudf)" secrets: inherit From a28293ee4b9fdf2a8895c116bff2acde3e5776ff Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Fri, 23 Sep 2022 13:57:05 -0400 Subject: [PATCH 052/142] Add platform specifier for CuPy dependency --- python/cudf/setup.py | 10 +---- python/dask_cudf/setup.py | 79 +++++++++++++++++---------------------- 2 files changed, 37 insertions(+), 52 deletions(-) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 706ad53d66d..c890e2b12bd 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -5,7 +5,6 @@ import shutil import subprocess import sys -import platform from distutils.spawn import find_executable import versioneer @@ -62,12 +61,6 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): f"-DCUDF_PYARROW_WHEEL_DIR={pa.__path__[0]}", ] -cupy_requirement = [] - -if platform.machine() == "x86_64": - cupy_requirement = [ - f"cupy-cuda{get_cuda_version_from_header(cuda_include_dir)}>=9.5.0,<12.0.0a0" - ] setup( name="cudf"+os.getenv("PYTHON_PACKAGE_CUDA_SUFFIX", default=""), @@ -108,7 +101,8 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): "typing_extensions", "pyarrow==9.0.0", f"rmm{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", - ] + cupy_requirement, + f"cupy-cuda{get_cuda_version_from_header(cuda_include_dir)}>=9.5.0,<12.0.0a0; platform_machine=='x86_64'", + ], extras_require={ "test": [ "pytest", diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py index ed2866da8ef..3653a8a7c3e 100644 --- a/python/dask_cudf/setup.py +++ b/python/dask_cudf/setup.py @@ -3,30 +3,27 @@ import os import re import shutil -import platform import versioneer from setuptools import find_packages, setup -install_requires = [ - "dask>=2022.7.1", - "distributed>=2022.7.1", - "fsspec>=0.6.0", - "numpy", - "pandas>=1.0,<1.6.0dev0", - f"cudf{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", -] -extras_require = { - "test": [ - "numpy", - "pandas>=1.0,<1.6.0dev0", - "pytest", - "numba>=0.54", - "dask>=2021.09.1", - "distributed>=2021.09.1", - ] -} +CUDA_HOME = os.environ.get("CUDA_HOME", False) +if not CUDA_HOME: + path_to_cuda_gdb = shutil.which("cuda-gdb") + if path_to_cuda_gdb is None: + raise OSError( + "Could not locate CUDA. " + "Please set the environment variable " + "CUDA_HOME to the path to the CUDA installation " + "and try again." + ) + CUDA_HOME = os.path.dirname(os.path.dirname(path_to_cuda_gdb)) + +if not os.path.isdir(CUDA_HOME): + raise OSError(f"Invalid CUDA_HOME: directory does not exist: {CUDA_HOME}") + +cuda_include_dir = os.path.join(CUDA_HOME, "include") def get_cuda_version_from_header(cuda_include_dir, delimeter=""): @@ -49,32 +46,26 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): ) -CUDA_HOME = os.environ.get("CUDA_HOME", False) -if not CUDA_HOME: - path_to_cuda_gdb = shutil.which("cuda-gdb") - if path_to_cuda_gdb is None: - raise OSError( - "Could not locate CUDA. " - "Please set the environment variable " - "CUDA_HOME to the path to the CUDA installation " - "and try again." - ) - CUDA_HOME = os.path.dirname(os.path.dirname(path_to_cuda_gdb)) - -if not os.path.isdir(CUDA_HOME): - raise OSError(f"Invalid CUDA_HOME: directory does not exist: {CUDA_HOME}") - -cuda_include_dir = os.path.join(CUDA_HOME, "include") - -myplat = platform.machine() - -if myplat == 'x86_64': - install_requires.append( - "cupy-cuda" - + get_cuda_version_from_header(cuda_include_dir) - + ">=9.5.0,<11.0.0a0" - ) +install_requires = [ + "dask>=2022.7.1", + "distributed>=2022.7.1", + "fsspec>=0.6.0", + "numpy", + "pandas>=1.0,<1.6.0dev0", + f"cudf{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", + f"cupy-cuda{get_cuda_version_from_header(cuda_include_dir)}>=9.5.0,<11.0.0a0; platform_machine=='x86_64'", +] +extras_require = { + "test": [ + "numpy", + "pandas>=1.0,<1.6.0dev0", + "pytest", + "numba>=0.54", + "dask>=2021.09.1", + "distributed>=2021.09.1", + ] +} setup( name="dask-cudf"+os.getenv("PYTHON_PACKAGE_CUDA_SUFFIX", default=""), From 35a43151a544df0955737b3c06234ac9918791e3 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Tue, 27 Sep 2022 14:34:16 -0400 Subject: [PATCH 053/142] rapids-cmake PR #276 fixes this --- python/cudf/cudf/_lib/CMakeLists.txt | 2 +- python/cudf/cudf/_lib/io/CMakeLists.txt | 2 +- python/cudf/cudf/_lib/nvtext/CMakeLists.txt | 2 +- python/cudf/cudf/_lib/strings/CMakeLists.txt | 2 +- python/cudf/cudf/_lib/strings/convert/CMakeLists.txt | 2 +- python/cudf/cudf/_lib/strings/split/CMakeLists.txt | 2 +- python/strings_udf/strings_udf/_lib/CMakeLists.txt | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index e0f837a2da1..5a63edd13ad 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -70,7 +70,7 @@ foreach(target IN LISTS targets_using_numpy) target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}") endforeach() -foreach(cython_module IN LISTS _RAPIDS_CYTHON_CREATED_TARGETS) +foreach(cython_module IN LISTS RAPID_CYTHON_CREATED_TARGETS) set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN") endforeach() diff --git a/python/cudf/cudf/_lib/io/CMakeLists.txt b/python/cudf/cudf/_lib/io/CMakeLists.txt index 81ac8fc02d8..15690afb083 100644 --- a/python/cudf/cudf/_lib/io/CMakeLists.txt +++ b/python/cudf/cudf/_lib/io/CMakeLists.txt @@ -25,6 +25,6 @@ foreach(target IN LISTS targets_using_numpy) target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}") endforeach() -foreach(cython_module IN LISTS _RAPIDS_CYTHON_CREATED_TARGETS) +foreach(cython_module IN LISTS RAPID_CYTHON_CREATED_TARGETS) set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../") endforeach() diff --git a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt index 7a9abcc029c..b79c559ba30 100644 --- a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt +++ b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt @@ -22,6 +22,6 @@ rapids_cython_create_modules( LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX nvtext_ ) -foreach(cython_module IN LISTS _RAPIDS_CYTHON_CREATED_TARGETS) +foreach(cython_module IN LISTS RAPID_CYTHON_CREATED_TARGETS) set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../") endforeach() diff --git a/python/cudf/cudf/_lib/strings/CMakeLists.txt b/python/cudf/cudf/_lib/strings/CMakeLists.txt index c5430d87d73..3c3a2c3e41c 100644 --- a/python/cudf/cudf/_lib/strings/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/CMakeLists.txt @@ -41,7 +41,7 @@ rapids_cython_create_modules( LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ) -foreach(cython_module IN LISTS _RAPIDS_CYTHON_CREATED_TARGETS) +foreach(cython_module IN LISTS RAPID_CYTHON_CREATED_TARGETS) set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../") endforeach() diff --git a/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt b/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt index e711bbc08cb..056be67664d 100644 --- a/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt @@ -23,6 +23,6 @@ rapids_cython_create_modules( LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ) -foreach(cython_module IN LISTS _RAPIDS_CYTHON_CREATED_TARGETS) +foreach(cython_module IN LISTS RAPID_CYTHON_CREATED_TARGETS) set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../../") endforeach() diff --git a/python/cudf/cudf/_lib/strings/split/CMakeLists.txt b/python/cudf/cudf/_lib/strings/split/CMakeLists.txt index e89f3104eee..60278977c85 100644 --- a/python/cudf/cudf/_lib/strings/split/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/split/CMakeLists.txt @@ -21,6 +21,6 @@ rapids_cython_create_modules( LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ) -foreach(cython_module IN LISTS _RAPIDS_CYTHON_CREATED_TARGETS) +foreach(cython_module IN LISTS RAPID_CYTHON_CREATED_TARGETS) set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../../") endforeach() diff --git a/python/strings_udf/strings_udf/_lib/CMakeLists.txt b/python/strings_udf/strings_udf/_lib/CMakeLists.txt index 91069a43891..8481b02377d 100644 --- a/python/strings_udf/strings_udf/_lib/CMakeLists.txt +++ b/python/strings_udf/strings_udf/_lib/CMakeLists.txt @@ -20,6 +20,6 @@ rapids_cython_create_modules( LINKED_LIBRARIES "${linked_libraries}" ) -foreach(cython_module IN LISTS _RAPIDS_CYTHON_CREATED_TARGETS) +foreach(cython_module IN LISTS RAPID_CYTHON_CREATED_TARGETS) set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/cpp") endforeach() From 9bb11094025e6159fc2782cd5f50be948364587d Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Tue, 27 Sep 2022 14:57:26 -0400 Subject: [PATCH 054/142] Typo: RAPID_ -> RAPIDS_ --- python/cudf/cudf/_lib/CMakeLists.txt | 2 +- python/cudf/cudf/_lib/io/CMakeLists.txt | 2 +- python/cudf/cudf/_lib/nvtext/CMakeLists.txt | 2 +- python/cudf/cudf/_lib/strings/CMakeLists.txt | 2 +- python/cudf/cudf/_lib/strings/convert/CMakeLists.txt | 2 +- python/cudf/cudf/_lib/strings/split/CMakeLists.txt | 2 +- python/strings_udf/strings_udf/_lib/CMakeLists.txt | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index 5a63edd13ad..5ca597d6cf6 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -70,7 +70,7 @@ foreach(target IN LISTS targets_using_numpy) target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}") endforeach() -foreach(cython_module IN LISTS RAPID_CYTHON_CREATED_TARGETS) +foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN") endforeach() diff --git a/python/cudf/cudf/_lib/io/CMakeLists.txt b/python/cudf/cudf/_lib/io/CMakeLists.txt index 15690afb083..c231b141bf6 100644 --- a/python/cudf/cudf/_lib/io/CMakeLists.txt +++ b/python/cudf/cudf/_lib/io/CMakeLists.txt @@ -25,6 +25,6 @@ foreach(target IN LISTS targets_using_numpy) target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}") endforeach() -foreach(cython_module IN LISTS RAPID_CYTHON_CREATED_TARGETS) +foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../") endforeach() diff --git a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt index b79c559ba30..8321bc15d15 100644 --- a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt +++ b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt @@ -22,6 +22,6 @@ rapids_cython_create_modules( LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX nvtext_ ) -foreach(cython_module IN LISTS RAPID_CYTHON_CREATED_TARGETS) +foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../") endforeach() diff --git a/python/cudf/cudf/_lib/strings/CMakeLists.txt b/python/cudf/cudf/_lib/strings/CMakeLists.txt index 3c3a2c3e41c..9a7f1b93d8e 100644 --- a/python/cudf/cudf/_lib/strings/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/CMakeLists.txt @@ -41,7 +41,7 @@ rapids_cython_create_modules( LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ) -foreach(cython_module IN LISTS RAPID_CYTHON_CREATED_TARGETS) +foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../") endforeach() diff --git a/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt b/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt index 056be67664d..ba2990701d8 100644 --- a/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt @@ -23,6 +23,6 @@ rapids_cython_create_modules( LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ) -foreach(cython_module IN LISTS RAPID_CYTHON_CREATED_TARGETS) +foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../../") endforeach() diff --git a/python/cudf/cudf/_lib/strings/split/CMakeLists.txt b/python/cudf/cudf/_lib/strings/split/CMakeLists.txt index 60278977c85..052f6dc17c0 100644 --- a/python/cudf/cudf/_lib/strings/split/CMakeLists.txt +++ b/python/cudf/cudf/_lib/strings/split/CMakeLists.txt @@ -21,6 +21,6 @@ rapids_cython_create_modules( LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ) -foreach(cython_module IN LISTS RAPID_CYTHON_CREATED_TARGETS) +foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../../") endforeach() diff --git a/python/strings_udf/strings_udf/_lib/CMakeLists.txt b/python/strings_udf/strings_udf/_lib/CMakeLists.txt index 8481b02377d..042ea98a55d 100644 --- a/python/strings_udf/strings_udf/_lib/CMakeLists.txt +++ b/python/strings_udf/strings_udf/_lib/CMakeLists.txt @@ -20,6 +20,6 @@ rapids_cython_create_modules( LINKED_LIBRARIES "${linked_libraries}" ) -foreach(cython_module IN LISTS RAPID_CYTHON_CREATED_TARGETS) +foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/cpp") endforeach() From 55eb9e30c7f2709877709e9d4087ce21a354c7ec Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Wed, 28 Sep 2022 01:01:15 -0400 Subject: [PATCH 055/142] Override versioneer with env var --- .github/workflows/wheels.yml | 2 +- python/cudf/setup.py | 2 +- python/dask_cudf/setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index b3478b8d61b..60f66dca956 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -21,7 +21,7 @@ jobs: cibw-before-all-amd64: "yum update -y && yum install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" cibw-before-all-arm64: "apt update -y && apt install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - cibw-environment: "CUDF_BUILD_WHEELS=1 PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple" + cibw-environment: "CUDF_BUILD_WHEELS=1 PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple PYTHON_PACKAGE_VERSIONEER_OVERRIDE=22.10.00a" skbuild-build-options: "-v" skbuild-configure-options: "--log-level=DEBUG -DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DCMAKE_CUDA_ARCHITECTURES=ALL" diff --git a/python/cudf/setup.py b/python/cudf/setup.py index c890e2b12bd..e1fe8666427 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -64,7 +64,7 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): setup( name="cudf"+os.getenv("PYTHON_PACKAGE_CUDA_SUFFIX", default=""), - version=versioneer.get_version(), + version=os.getenv("PYTHON_PACKAGE_VERSIONEER_OVERRIDE", default=versioneer.get_version()), description="cuDF - GPU Dataframe", url="https://github.com/rapidsai/cudf", author="NVIDIA Corporation", diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py index 3653a8a7c3e..1ef41007c10 100644 --- a/python/dask_cudf/setup.py +++ b/python/dask_cudf/setup.py @@ -69,7 +69,7 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): setup( name="dask-cudf"+os.getenv("PYTHON_PACKAGE_CUDA_SUFFIX", default=""), - version=versioneer.get_version(), + version=os.getenv("PYTHON_PACKAGE_VERSIONEER_OVERRIDE", default=versioneer.get_version()), description="Utilities for Dask and cuDF interactions", url="https://github.com/rapidsai/cudf", author="NVIDIA Corporation", From 4338b6146b436607b9d0b08f0c3ca3d7bc0bf066 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Wed, 28 Sep 2022 07:48:23 -0400 Subject: [PATCH 056/142] Also override version of dask-cudf --- .github/workflows/wheels.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 60f66dca956..54eea7d21c3 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -41,6 +41,8 @@ jobs: package-name: dask_cudf package-dir: python/dask_cudf python-package-cuda-suffix: "-cu11" + python-package-versioneer-override: "22.10.00a" + test-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" test-extras: "test" test-unittest: "pytest -v ./python/dask_cudf/dask_cudf/tests" From a95bfa5c9d348c8a58211efd7c01abdabfda3f2d Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Wed, 28 Sep 2022 17:21:51 -0400 Subject: [PATCH 057/142] Testing build-tag incrementer --- .github/workflows/wheels.yml | 2 +- python/cudf/setup.py | 4 ++-- python/dask_cudf/setup.py | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 54eea7d21c3..b1ddd180c6d 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -21,7 +21,7 @@ jobs: cibw-before-all-amd64: "yum update -y && yum install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" cibw-before-all-arm64: "apt update -y && apt install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - cibw-environment: "CUDF_BUILD_WHEELS=1 PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple PYTHON_PACKAGE_VERSIONEER_OVERRIDE=22.10.00a" + cibw-environment: "CUDF_BUILD_WHEELS=1 PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE=22.10.00a" skbuild-build-options: "-v" skbuild-configure-options: "--log-level=DEBUG -DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DCMAKE_CUDA_ARCHITECTURES=ALL" diff --git a/python/cudf/setup.py b/python/cudf/setup.py index e1fe8666427..5c235931382 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -63,8 +63,8 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): setup( - name="cudf"+os.getenv("PYTHON_PACKAGE_CUDA_SUFFIX", default=""), - version=os.getenv("PYTHON_PACKAGE_VERSIONEER_OVERRIDE", default=versioneer.get_version()), + name="cudf"+os.getenv("RAPIDS_PY_WHEEL_CUDA_SUFFIX", default=""), + version=os.getenv("RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE", default=versioneer.get_version()), description="cuDF - GPU Dataframe", url="https://github.com/rapidsai/cudf", author="NVIDIA Corporation", diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py index 1ef41007c10..7c9a3c3e5e4 100644 --- a/python/dask_cudf/setup.py +++ b/python/dask_cudf/setup.py @@ -52,7 +52,7 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): "fsspec>=0.6.0", "numpy", "pandas>=1.0,<1.6.0dev0", - f"cudf{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", + f"cudf{os.getenv('RAPIDS_PY_WHEEL_CUDA_SUFFIX', default='')}", f"cupy-cuda{get_cuda_version_from_header(cuda_include_dir)}>=9.5.0,<11.0.0a0; platform_machine=='x86_64'", ] @@ -68,8 +68,8 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): } setup( - name="dask-cudf"+os.getenv("PYTHON_PACKAGE_CUDA_SUFFIX", default=""), - version=os.getenv("PYTHON_PACKAGE_VERSIONEER_OVERRIDE", default=versioneer.get_version()), + name="dask-cudf"+os.getenv("RAPIDS_PY_WHEEL_CUDA_SUFFIX", default=""), + version=os.getenv("RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE", default=versioneer.get_version()), description="Utilities for Dask and cuDF interactions", url="https://github.com/rapidsai/cudf", author="NVIDIA Corporation", From b0256c09aa9a712640be0fbd669e48bd3c65098a Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Thu, 29 Sep 2022 08:54:28 -0400 Subject: [PATCH 058/142] Testing build-tag incrementer --- .github/workflows/wheels.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index b1ddd180c6d..616f9360daa 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -12,6 +12,7 @@ jobs: package-name: cudf package-dir: python/cudf python-version: "3.8 3.9" + python-package-versioneer-override: "22.10.00a" manylinux-container-amd64: "rapidsai/manylinux2014" manylinux-container-arm64: "rapidsai/manylinux_2_31" @@ -21,7 +22,7 @@ jobs: cibw-before-all-amd64: "yum update -y && yum install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" cibw-before-all-arm64: "apt update -y && apt install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - cibw-environment: "CUDF_BUILD_WHEELS=1 PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE=22.10.00a" + cibw-environment: "CUDF_BUILD_WHEELS=1 PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple" skbuild-build-options: "-v" skbuild-configure-options: "--log-level=DEBUG -DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DCMAKE_CUDA_ARCHITECTURES=ALL" From c08095b7cdbfa45d0a1b15a03df5c4c3520bf230 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Thu, 29 Sep 2022 12:14:32 -0400 Subject: [PATCH 059/142] Manually set -0- build tag --- .github/workflows/wheels.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 616f9360daa..4fbb781a08b 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -13,6 +13,7 @@ jobs: package-dir: python/cudf python-version: "3.8 3.9" python-package-versioneer-override: "22.10.00a" + python-package-build-tag: "0" manylinux-container-amd64: "rapidsai/manylinux2014" manylinux-container-arm64: "rapidsai/manylinux_2_31" @@ -43,6 +44,7 @@ jobs: package-dir: python/dask_cudf python-package-cuda-suffix: "-cu11" python-package-versioneer-override: "22.10.00a" + python-package-build-tag: "0" test-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" test-extras: "test" From c450202a4930fd2e8e5f0ef6195b3b903285b89c Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Mon, 3 Oct 2022 12:01:43 -0400 Subject: [PATCH 060/142] Bump build-tag and add LICENSE files --- .github/workflows/wheels.yml | 4 +- python/cudf/LICENSE | 201 +++++++++++++++++++++++++++++++++++ python/dask_cudf/LICENSE | 201 +++++++++++++++++++++++++++++++++++ 3 files changed, 404 insertions(+), 2 deletions(-) create mode 100644 python/cudf/LICENSE create mode 100644 python/dask_cudf/LICENSE diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 4fbb781a08b..005cb2fa8fc 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -13,7 +13,7 @@ jobs: package-dir: python/cudf python-version: "3.8 3.9" python-package-versioneer-override: "22.10.00a" - python-package-build-tag: "0" + python-package-build-tag: "1" manylinux-container-amd64: "rapidsai/manylinux2014" manylinux-container-arm64: "rapidsai/manylinux_2_31" @@ -44,7 +44,7 @@ jobs: package-dir: python/dask_cudf python-package-cuda-suffix: "-cu11" python-package-versioneer-override: "22.10.00a" - python-package-build-tag: "0" + python-package-build-tag: "1" test-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" test-extras: "test" diff --git a/python/cudf/LICENSE b/python/cudf/LICENSE new file mode 100644 index 00000000000..18bcb4316e6 --- /dev/null +++ b/python/cudf/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2018 NVIDIA Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/python/dask_cudf/LICENSE b/python/dask_cudf/LICENSE new file mode 100644 index 00000000000..18bcb4316e6 --- /dev/null +++ b/python/dask_cudf/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2018 NVIDIA Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. From e68fe6d058269a6bd7ea371769e8c24462376fb0 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Mon, 3 Oct 2022 14:00:34 -0400 Subject: [PATCH 061/142] Typo in dask-cudf setup.py --- .github/workflows/wheels.yml | 62 ++++++++++++++++++------------------ python/dask_cudf/setup.py | 1 - 2 files changed, 31 insertions(+), 32 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 005cb2fa8fc..8047684c65c 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -6,38 +6,38 @@ on: - 'pull-request/[0-9]+' jobs: - cudf-wheels: - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions-2 - with: - package-name: cudf - package-dir: python/cudf - python-version: "3.8 3.9" - python-package-versioneer-override: "22.10.00a" - python-package-build-tag: "1" - - manylinux-container-amd64: "rapidsai/manylinux2014" - manylinux-container-arm64: "rapidsai/manylinux_2_31" - - auditwheel-repair-command-amd64: "auditwheel --verbose repair -w {dest_dir} --plat manylinux_2_17_x86_64 {wheel}" - auditwheel-repair-command-arm64: "auditwheel --verbose repair -w {dest_dir} --plat manylinux_2_31_aarch64 {wheel}" - - cibw-before-all-amd64: "yum update -y && yum install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - cibw-before-all-arm64: "apt update -y && apt install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - cibw-environment: "CUDF_BUILD_WHEELS=1 PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple" - - skbuild-build-options: "-v" - skbuild-configure-options: "--log-level=DEBUG -DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DCMAKE_CUDA_ARCHITECTURES=ALL" - - test-container-amd64: "rapidsai/manylinux_2_27" # test cuDF x86_64 on our 18.04 container - test-container-arm64: "rapidsai/manylinux_2_31" - test-extras: "test" - test-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" - test-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" - test-unittest: "pytest -v ./python/cudf/cudf/tests" - test-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" - secrets: inherit + #cudf-wheels: + # uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions-2 + # with: + # package-name: cudf + # package-dir: python/cudf + # python-version: "3.8 3.9" + # python-package-versioneer-override: "22.10.00a" + # python-package-build-tag: "1" + + # manylinux-container-amd64: "rapidsai/manylinux2014" + # manylinux-container-arm64: "rapidsai/manylinux_2_31" + + # auditwheel-repair-command-amd64: "auditwheel --verbose repair -w {dest_dir} --plat manylinux_2_17_x86_64 {wheel}" + # auditwheel-repair-command-arm64: "auditwheel --verbose repair -w {dest_dir} --plat manylinux_2_31_aarch64 {wheel}" + + # cibw-before-all-amd64: "yum update -y && yum install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + # cibw-before-all-arm64: "apt update -y && apt install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + # cibw-environment: "CUDF_BUILD_WHEELS=1 PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple" + + # skbuild-build-options: "-v" + # skbuild-configure-options: "--log-level=DEBUG -DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DCMAKE_CUDA_ARCHITECTURES=ALL" + + # test-container-amd64: "rapidsai/manylinux_2_27" # test cuDF x86_64 on our 18.04 container + # test-container-arm64: "rapidsai/manylinux_2_31" + # test-extras: "test" + # test-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" + # test-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" + # test-unittest: "pytest -v ./python/cudf/cudf/tests" + # test-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" + # secrets: inherit dask_cudf-wheel: - needs: cudf-wheels + #needs: cudf-wheels uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions-2 with: package-name: dask_cudf diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py index ae4d27e6904..7c9a3c3e5e4 100644 --- a/python/dask_cudf/setup.py +++ b/python/dask_cudf/setup.py @@ -8,7 +8,6 @@ from setuptools import find_packages, setup -<<<<<<< HEAD CUDA_HOME = os.environ.get("CUDA_HOME", False) if not CUDA_HOME: path_to_cuda_gdb = shutil.which("cuda-gdb") From 10cef133b53c98136b6b97b0a2cb60944b4d5f7c Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Tue, 4 Oct 2022 00:58:02 -0400 Subject: [PATCH 062/142] Add ptxcompiler and cubinlinker to cuDF install_requires --- .github/workflows/wheels.yml | 80 ++++++++++++++++++------------------ python/cudf/setup.py | 2 + 2 files changed, 42 insertions(+), 40 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 8047684c65c..90f1c2e85ae 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -6,48 +6,48 @@ on: - 'pull-request/[0-9]+' jobs: - #cudf-wheels: - # uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions-2 - # with: - # package-name: cudf - # package-dir: python/cudf - # python-version: "3.8 3.9" - # python-package-versioneer-override: "22.10.00a" - # python-package-build-tag: "1" - - # manylinux-container-amd64: "rapidsai/manylinux2014" - # manylinux-container-arm64: "rapidsai/manylinux_2_31" - - # auditwheel-repair-command-amd64: "auditwheel --verbose repair -w {dest_dir} --plat manylinux_2_17_x86_64 {wheel}" - # auditwheel-repair-command-arm64: "auditwheel --verbose repair -w {dest_dir} --plat manylinux_2_31_aarch64 {wheel}" - - # cibw-before-all-amd64: "yum update -y && yum install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - # cibw-before-all-arm64: "apt update -y && apt install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - # cibw-environment: "CUDF_BUILD_WHEELS=1 PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple" - - # skbuild-build-options: "-v" - # skbuild-configure-options: "--log-level=DEBUG -DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DCMAKE_CUDA_ARCHITECTURES=ALL" - - # test-container-amd64: "rapidsai/manylinux_2_27" # test cuDF x86_64 on our 18.04 container - # test-container-arm64: "rapidsai/manylinux_2_31" - # test-extras: "test" - # test-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" - # test-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" - # test-unittest: "pytest -v ./python/cudf/cudf/tests" - # test-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" - # secrets: inherit - dask_cudf-wheel: - #needs: cudf-wheels - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions-2 + cudf-wheels: + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions-2 with: - package-name: dask_cudf - package-dir: python/dask_cudf - python-package-cuda-suffix: "-cu11" + package-name: cudf + package-dir: python/cudf + python-version: "3.8 3.9" python-package-versioneer-override: "22.10.00a" - python-package-build-tag: "1" + python-package-build-tag: "2" - test-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" + manylinux-container-amd64: "rapidsai/manylinux2014" + manylinux-container-arm64: "rapidsai/manylinux_2_31" + + auditwheel-repair-command-amd64: "auditwheel --verbose repair -w {dest_dir} --plat manylinux_2_17_x86_64 {wheel}" + auditwheel-repair-command-arm64: "auditwheel --verbose repair -w {dest_dir} --plat manylinux_2_31_aarch64 {wheel}" + + cibw-before-all-amd64: "yum update -y && yum install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + cibw-before-all-arm64: "apt update -y && apt install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + cibw-environment: "CUDF_BUILD_WHEELS=1 PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple" + + skbuild-build-options: "-v" + skbuild-configure-options: "--log-level=DEBUG -DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DCMAKE_CUDA_ARCHITECTURES=ALL" + + test-container-amd64: "rapidsai/manylinux_2_27" # test cuDF x86_64 on our 18.04 container + test-container-arm64: "rapidsai/manylinux_2_31" test-extras: "test" - test-unittest: "pytest -v ./python/dask_cudf/dask_cudf/tests" - test-smoketest: "import dask_cudf; print(dask_cudf)" + test-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" + test-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" + test-unittest: "pytest -v ./python/cudf/cudf/tests" + test-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" secrets: inherit + #dask_cudf-wheel: + # needs: cudf-wheels + # uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions-2 + # with: + # package-name: dask_cudf + # package-dir: python/dask_cudf + # python-package-cuda-suffix: "-cu11" + # python-package-versioneer-override: "22.10.00a" + # python-package-build-tag: "1" + + # test-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" + # test-extras: "test" + # test-unittest: "pytest -v ./python/dask_cudf/dask_cudf/tests" + # test-smoketest: "import dask_cudf; print(dask_cudf)" + # secrets: inherit diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 5c235931382..3fbec87d9d0 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -101,6 +101,8 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): "typing_extensions", "pyarrow==9.0.0", f"rmm{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", + f"ptxcompiler{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", + f"cubinlinker{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", f"cupy-cuda{get_cuda_version_from_header(cuda_include_dir)}>=9.5.0,<12.0.0a0; platform_machine=='x86_64'", ], extras_require={ From 572f67afa3a4d151fd09090e319b8be16afe36e9 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Tue, 4 Oct 2022 02:21:31 -0400 Subject: [PATCH 063/142] Install ptxcompiler and cubinlinker in pre-test step --- .github/workflows/wheels.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 90f1c2e85ae..8f924c9217b 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -31,8 +31,8 @@ jobs: test-container-amd64: "rapidsai/manylinux_2_27" # test cuDF x86_64 on our 18.04 container test-container-arm64: "rapidsai/manylinux_2_31" test-extras: "test" - test-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple" - test-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" + test-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 ptxcompiler-cu11 cubinlinker-cu11 --index-url https://pypi.k8s.rapids.ai/simple" + test-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 ptxcompiler-cu11 cubinlinker-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" test-unittest: "pytest -v ./python/cudf/cudf/tests" test-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" secrets: inherit From c09012a261ca54a09d675fbeb5a06409b9cafa4a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 10 Oct 2022 09:43:40 -0700 Subject: [PATCH 064/142] Bump build tag. --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 8f924c9217b..909389fb35c 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -13,7 +13,7 @@ jobs: package-dir: python/cudf python-version: "3.8 3.9" python-package-versioneer-override: "22.10.00a" - python-package-build-tag: "2" + python-package-build-tag: "3" manylinux-container-amd64: "rapidsai/manylinux2014" manylinux-container-arm64: "rapidsai/manylinux_2_31" From d11625ab072b722676c6c5bb9e3e358592f2bf6d Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 10 Oct 2022 10:57:00 -0700 Subject: [PATCH 065/142] Add dask_cudf wheels back. --- .github/workflows/wheels.yml | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 909389fb35c..147e4a3189e 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -36,18 +36,18 @@ jobs: test-unittest: "pytest -v ./python/cudf/cudf/tests" test-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" secrets: inherit - #dask_cudf-wheel: - # needs: cudf-wheels - # uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions-2 - # with: - # package-name: dask_cudf - # package-dir: python/dask_cudf - # python-package-cuda-suffix: "-cu11" - # python-package-versioneer-override: "22.10.00a" - # python-package-build-tag: "1" - - # test-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" - # test-extras: "test" - # test-unittest: "pytest -v ./python/dask_cudf/dask_cudf/tests" - # test-smoketest: "import dask_cudf; print(dask_cudf)" - # secrets: inherit + dask_cudf-wheel: + needs: cudf-wheels + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions-2 + with: + package-name: dask_cudf + package-dir: python/dask_cudf + python-package-cuda-suffix: "-cu11" + python-package-versioneer-override: "22.10.00a" + python-package-build-tag: "2" + + test-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" + test-extras: "test" + test-unittest: "pytest -v ./python/dask_cudf/dask_cudf/tests" + test-smoketest: "import dask_cudf; print(dask_cudf)" + secrets: inherit From 40ea0e34985c460a13cfcf028564b09157684457 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 10 Oct 2022 10:59:28 -0700 Subject: [PATCH 066/142] Fix indentation. --- .github/workflows/wheels.yml | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 147e4a3189e..0178a98440e 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -36,18 +36,18 @@ jobs: test-unittest: "pytest -v ./python/cudf/cudf/tests" test-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" secrets: inherit - dask_cudf-wheel: - needs: cudf-wheels - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions-2 - with: - package-name: dask_cudf - package-dir: python/dask_cudf - python-package-cuda-suffix: "-cu11" - python-package-versioneer-override: "22.10.00a" - python-package-build-tag: "2" - - test-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" - test-extras: "test" - test-unittest: "pytest -v ./python/dask_cudf/dask_cudf/tests" - test-smoketest: "import dask_cudf; print(dask_cudf)" - secrets: inherit + dask_cudf-wheel: + needs: cudf-wheels + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions-2 + with: + package-name: dask_cudf + package-dir: python/dask_cudf + python-package-cuda-suffix: "-cu11" + python-package-versioneer-override: "22.10.00a" + python-package-build-tag: "2" + + test-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" + test-extras: "test" + test-unittest: "pytest -v ./python/dask_cudf/dask_cudf/tests" + test-smoketest: "import dask_cudf; print(dask_cudf)" + secrets: inherit From 8080fb88bd9847d9ce120ca8cba4733ea9a735fd Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 10 Oct 2022 11:31:59 -0700 Subject: [PATCH 067/142] Add concurrency rules so prior runs are canceled. --- .github/workflows/wheels.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 0178a98440e..cf662aab55d 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -5,6 +5,10 @@ on: branches: - 'pull-request/[0-9]+' +concurrency: + group: "${{ github.workflow }}-${{ github.ref }}" + cancel-in-progress: true + jobs: cudf-wheels: uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions-2 @@ -13,7 +17,7 @@ jobs: package-dir: python/cudf python-version: "3.8 3.9" python-package-versioneer-override: "22.10.00a" - python-package-build-tag: "3" + python-package-build-tag: "4" manylinux-container-amd64: "rapidsai/manylinux2014" manylinux-container-arm64: "rapidsai/manylinux_2_31" @@ -44,7 +48,7 @@ jobs: package-dir: python/dask_cudf python-package-cuda-suffix: "-cu11" python-package-versioneer-override: "22.10.00a" - python-package-build-tag: "2" + python-package-build-tag: "3" test-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" test-extras: "test" From 7e5dcf0e31e81a2b0391d303a79211eac3b73f40 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 11 Oct 2022 10:53:55 -0700 Subject: [PATCH 068/142] Bump build tag. --- .github/workflows/wheels.yml | 4 ++-- python/cudf/setup.py | 13 ++++++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index cf662aab55d..59e0df9e5cf 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -17,7 +17,7 @@ jobs: package-dir: python/cudf python-version: "3.8 3.9" python-package-versioneer-override: "22.10.00a" - python-package-build-tag: "4" + python-package-build-tag: "5" manylinux-container-amd64: "rapidsai/manylinux2014" manylinux-container-arm64: "rapidsai/manylinux_2_31" @@ -48,7 +48,7 @@ jobs: package-dir: python/dask_cudf python-package-cuda-suffix: "-cu11" python-package-versioneer-override: "22.10.00a" - python-package-build-tag: "3" + python-package-build-tag: "5" test-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" test-extras: "test" diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 3fbec87d9d0..4f6294976c9 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -52,19 +52,22 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): cmdclass = versioneer.get_cmdclass() -cmake_args=[] +cmake_args = [] if os.getenv("CUDF_BUILD_WHEELS", "") != "": import pyarrow as pa - cmake_args=[ + + cmake_args = [ "-DCUDF_BUILD_WHEELS=ON", f"-DCUDF_PYARROW_WHEEL_DIR={pa.__path__[0]}", ] setup( - name="cudf"+os.getenv("RAPIDS_PY_WHEEL_CUDA_SUFFIX", default=""), - version=os.getenv("RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE", default=versioneer.get_version()), + name="cudf" + os.getenv("RAPIDS_PY_WHEEL_CUDA_SUFFIX", default=""), + version=os.getenv( + "RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE", default=versioneer.get_version() + ), description="cuDF - GPU Dataframe", url="https://github.com/rapidsai/cudf", author="NVIDIA Corporation", @@ -93,7 +96,7 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): "cuda-python>=11.5,<11.7.1", "fsspec>=0.6.0", "numba>=0.54", - "numpy<1.23", + "numpy", "nvtx>=0.2.1", "packaging", "pandas>=1.0,<1.6.0dev0", From a09f081db70267a6836592a118018852ce80693f Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 11 Oct 2022 10:54:55 -0700 Subject: [PATCH 069/142] Fix style. --- python/cudf/setup.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 4f6294976c9..0e521fc9f67 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -3,14 +3,10 @@ import os import re import shutil -import subprocess -import sys -from distutils.spawn import find_executable import versioneer from setuptools import find_packages from skbuild import setup -from skbuild.command.build_ext import build_ext def get_cuda_version_from_header(cuda_include_dir, delimeter=""): @@ -106,7 +102,10 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): f"rmm{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", f"ptxcompiler{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", f"cubinlinker{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", - f"cupy-cuda{get_cuda_version_from_header(cuda_include_dir)}>=9.5.0,<12.0.0a0; platform_machine=='x86_64'", + ( + f"cupy-cuda{get_cuda_version_from_header(cuda_include_dir)}>=9.5.0" + ",<12.0.0a0; platform_machine=='x86_64'", + ), ], extras_require={ "test": [ From 9cc87e9fdde2db489ed5df1ad8a1b15e7afe4a7a Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Wed, 12 Oct 2022 17:44:58 -0400 Subject: [PATCH 070/142] Bump cudf to use new ptxcompiler/cubinlinker --- .github/workflows/wheels.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 59e0df9e5cf..db5a48f4a0a 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -17,7 +17,7 @@ jobs: package-dir: python/cudf python-version: "3.8 3.9" python-package-versioneer-override: "22.10.00a" - python-package-build-tag: "5" + python-package-build-tag: "6" manylinux-container-amd64: "rapidsai/manylinux2014" manylinux-container-arm64: "rapidsai/manylinux_2_31" @@ -48,7 +48,7 @@ jobs: package-dir: python/dask_cudf python-package-cuda-suffix: "-cu11" python-package-versioneer-override: "22.10.00a" - python-package-build-tag: "5" + python-package-build-tag: "6" test-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" test-extras: "test" From b645233bc5b5de5047b18dc7bbf4841ea598865d Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 12 Oct 2022 17:25:30 -0700 Subject: [PATCH 071/142] Centralized logic in used workflow. --- .github/workflows/wheels.yml | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index db5a48f4a0a..ed4072e2a2f 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -19,26 +19,16 @@ jobs: python-package-versioneer-override: "22.10.00a" python-package-build-tag: "6" - manylinux-container-amd64: "rapidsai/manylinux2014" - manylinux-container-arm64: "rapidsai/manylinux_2_31" + cibw-before-all-amd64: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + cibw-before-all-arm64: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + cibw-environment: "PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple" - auditwheel-repair-command-amd64: "auditwheel --verbose repair -w {dest_dir} --plat manylinux_2_17_x86_64 {wheel}" - auditwheel-repair-command-arm64: "auditwheel --verbose repair -w {dest_dir} --plat manylinux_2_31_aarch64 {wheel}" - - cibw-before-all-amd64: "yum update -y && yum install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - cibw-before-all-arm64: "apt update -y && apt install -y protobuf-compiler && tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - cibw-environment: "CUDF_BUILD_WHEELS=1 PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple" - - skbuild-build-options: "-v" skbuild-configure-options: "--log-level=DEBUG -DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DCMAKE_CUDA_ARCHITECTURES=ALL" - test-container-amd64: "rapidsai/manylinux_2_27" # test cuDF x86_64 on our 18.04 container - test-container-arm64: "rapidsai/manylinux_2_31" test-extras: "test" test-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 ptxcompiler-cu11 cubinlinker-cu11 --index-url https://pypi.k8s.rapids.ai/simple" test-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 ptxcompiler-cu11 cubinlinker-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" test-unittest: "pytest -v ./python/cudf/cudf/tests" - test-smoketest: "import cudf, requests, numpy; from io import StringIO; url = 'https://github.com/plotly/datasets/raw/master/tips.csv'; content = requests.get(url).content.decode('utf-8'); tips_df = cudf.read_csv(StringIO(content)); tips_df['tip_percentage'] = tips_df['tip'] / tips_df['total_bill'] * 100; mean_tips = tips_df.groupby('size').tip_percentage.mean(); print(mean_tips); print(mean_tips.iloc[0]); assert numpy.isclose(mean_tips.iloc[0], 15.622920); import pyarrow as pa; n_legs = pa.array([2, 4, 5, 100]); animals = pa.array(['Flamingo', 'Horse', 'Brittle stars', 'Centipede']); names = ['n_legs', 'animals']; print(animals); foo = pa.table([n_legs, animals], names=names); print(foo); import cudf; df = cudf.DataFrame.from_arrow(foo); print(df); print(df.loc[df['animals'] == 'Centipede']['n_legs']); assert df.loc[df['animals'] == 'Centipede']['n_legs'].iloc[0] == 100" secrets: inherit dask_cudf-wheel: needs: cudf-wheels @@ -53,5 +43,4 @@ jobs: test-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" test-extras: "test" test-unittest: "pytest -v ./python/dask_cudf/dask_cudf/tests" - test-smoketest: "import dask_cudf; print(dask_cudf)" secrets: inherit From b2192deb5de082a14ad0cb2fc3639d5c65d0ac51 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 12 Oct 2022 17:39:41 -0700 Subject: [PATCH 072/142] Also remove PIP_INDEX_URL. --- .github/workflows/wheels.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index ed4072e2a2f..28233fbc24f 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -21,7 +21,6 @@ jobs: cibw-before-all-amd64: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" cibw-before-all-arm64: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - cibw-environment: "PIP_INDEX_URL=https://pypi.k8s.rapids.ai/simple" skbuild-configure-options: "--log-level=DEBUG -DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DCMAKE_CUDA_ARCHITECTURES=ALL" From e1d12bd1d80e81811adb29fb6a700d9ff8739b96 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Thu, 13 Oct 2022 09:03:15 -0400 Subject: [PATCH 073/142] Bump build tag --- .github/workflows/wheels.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 28233fbc24f..6e1a08e8eeb 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -17,7 +17,7 @@ jobs: package-dir: python/cudf python-version: "3.8 3.9" python-package-versioneer-override: "22.10.00a" - python-package-build-tag: "6" + python-package-build-tag: "7" cibw-before-all-amd64: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" cibw-before-all-arm64: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" @@ -37,7 +37,7 @@ jobs: package-dir: python/dask_cudf python-package-cuda-suffix: "-cu11" python-package-versioneer-override: "22.10.00a" - python-package-build-tag: "6" + python-package-build-tag: "7" test-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" test-extras: "test" From d659c7759a6b0c28439a801a8ab25f9b463bcfda Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Thu, 13 Oct 2022 09:09:26 -0400 Subject: [PATCH 074/142] Add missing CUDF_BUILD_WHEELS=1 option --- .github/workflows/wheels.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 6e1a08e8eeb..6aefb7eefc4 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -21,6 +21,7 @@ jobs: cibw-before-all-amd64: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" cibw-before-all-arm64: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + cibw-environment: "CUDF_BUILD_WHEELS=1" skbuild-configure-options: "--log-level=DEBUG -DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DCMAKE_CUDA_ARCHITECTURES=ALL" From d83778177701e4d60323e6353f3100f14a05e596 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Thu, 13 Oct 2022 09:33:34 -0400 Subject: [PATCH 075/142] Bump build tags for x86_64 runners --- .github/workflows/wheels.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 6aefb7eefc4..a6d0f706162 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -17,7 +17,7 @@ jobs: package-dir: python/cudf python-version: "3.8 3.9" python-package-versioneer-override: "22.10.00a" - python-package-build-tag: "7" + python-package-build-tag: "8" cibw-before-all-amd64: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" cibw-before-all-arm64: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" @@ -38,7 +38,7 @@ jobs: package-dir: python/dask_cudf python-package-cuda-suffix: "-cu11" python-package-versioneer-override: "22.10.00a" - python-package-build-tag: "7" + python-package-build-tag: "8" test-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" test-extras: "test" From 732503307d1477027c22a953d33d97e5e95dde15 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Thu, 13 Oct 2022 10:18:10 -0400 Subject: [PATCH 076/142] Bump build tags for x86_64 runners --- .github/workflows/wheels.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index a6d0f706162..b43f3c05405 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -17,7 +17,7 @@ jobs: package-dir: python/cudf python-version: "3.8 3.9" python-package-versioneer-override: "22.10.00a" - python-package-build-tag: "8" + python-package-build-tag: "9" cibw-before-all-amd64: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" cibw-before-all-arm64: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" @@ -38,7 +38,7 @@ jobs: package-dir: python/dask_cudf python-package-cuda-suffix: "-cu11" python-package-versioneer-override: "22.10.00a" - python-package-build-tag: "8" + python-package-build-tag: "9" test-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" test-extras: "test" From 1558403753cb597a4c5732fa5dc533f2c5bd4436 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Mon, 17 Oct 2022 15:02:31 -0400 Subject: [PATCH 077/142] Clean rebuild for stable version 22.10.00 --- .github/workflows/wheels.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index b43f3c05405..9082e024a4d 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -15,9 +15,7 @@ jobs: with: package-name: cudf package-dir: python/cudf - python-version: "3.8 3.9" - python-package-versioneer-override: "22.10.00a" - python-package-build-tag: "9" + python-package-versioneer-override: "22.10.00" cibw-before-all-amd64: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" cibw-before-all-arm64: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" @@ -37,8 +35,7 @@ jobs: package-name: dask_cudf package-dir: python/dask_cudf python-package-cuda-suffix: "-cu11" - python-package-versioneer-override: "22.10.00a" - python-package-build-tag: "9" + python-package-versioneer-override: "22.10.00" test-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" test-extras: "test" From 52b1d2f1766fa38b4a6be93efa021c932f28123b Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 8 Nov 2022 15:01:41 -0800 Subject: [PATCH 078/142] Remove LICENSE files. --- python/cudf/LICENSE | 201 --------------------------------------- python/dask_cudf/LICENSE | 201 --------------------------------------- 2 files changed, 402 deletions(-) delete mode 100644 python/cudf/LICENSE delete mode 100644 python/dask_cudf/LICENSE diff --git a/python/cudf/LICENSE b/python/cudf/LICENSE deleted file mode 100644 index 18bcb4316e6..00000000000 --- a/python/cudf/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2018 NVIDIA Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/python/dask_cudf/LICENSE b/python/dask_cudf/LICENSE deleted file mode 100644 index 18bcb4316e6..00000000000 --- a/python/dask_cudf/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2018 NVIDIA Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. From 653031cc669af6dbb693ff266b1e4c2ad6c2a05e Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 8 Nov 2022 15:02:00 -0800 Subject: [PATCH 079/142] Symlink license files. --- python/cudf/LICENSE | 1 + 1 file changed, 1 insertion(+) create mode 120000 python/cudf/LICENSE diff --git a/python/cudf/LICENSE b/python/cudf/LICENSE new file mode 120000 index 00000000000..30cff7403da --- /dev/null +++ b/python/cudf/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file From a15516ddc12eda2b4c1c23d18827fa23b3766455 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 8 Nov 2022 15:14:56 -0800 Subject: [PATCH 080/142] Rename the C++ option for disabling test utils to be more explicit. --- cpp/CMakeLists.txt | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index c6f8ae0b4a9..de6a9ae4a95 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -48,7 +48,7 @@ option(BUILD_TESTS "Configure CMake to build tests" ON) option(BUILD_BENCHMARKS "Configure CMake to build (google & nvbench) benchmarks" OFF) option(BUILD_SHARED_LIBS "Build cuDF shared libraries" ON) option(JITIFY_USE_CACHE "Use a file cache for JIT compiled kernels" ON) -option(CUDF_BUILD_WHEELS "Whether we're building libcudf to go in a wheel for pypi" OFF) +option(CUDF_BUILD_TESTUTIL "Whether to build the test utilities contained in libcudf" ON) option(CUDF_USE_PROPRIETARY_NVCOMP "Download and use NVCOMP with proprietary extensions" ON) option(CUDF_USE_ARROW_STATIC "Build and statically link Arrow libraries" OFF) option(CUDF_ENABLE_ARROW_ORC "Build the Arrow ORC adapter" OFF) @@ -95,6 +95,12 @@ message(VERBOSE "CUDF: Statically link the CUDA runtime: ${CUDA_STATIC_RUNTIME}" rapids_cmake_build_type("Release") set(CUDF_BUILD_TESTS ${BUILD_TESTS}) set(CUDF_BUILD_BENCHMARKS ${BUILD_BENCHMARKS}) +if(BUILD_TESTS AND NOT CUDF_BUILD_TESTUTIL) + message( + FATAL_ERROR + "Tests cannot be built without building cudf test utils. Please set CUDF_BUILD_TESTUTIL=ON or BUILD_TESTS=OFF" + ) +endif() set(CUDF_CXX_FLAGS "") set(CUDF_CUDA_FLAGS "") @@ -134,7 +140,7 @@ include(cmake/Modules/ConfigureCUDA.cmake) # set other CUDA compilation flags # find zlib rapids_find_package(ZLIB REQUIRED) -if(NOT CUDF_BUILD_WHEELS) +if(CUDF_BUILD_TESTUTIL) # find Threads (needed by cudftestutil) rapids_find_package( Threads REQUIRED @@ -163,7 +169,7 @@ rapids_cpm_libcudacxx(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-expo # find cuCollections Should come after including thrust and libcudacxx include(cmake/thirdparty/get_cucollections.cmake) # find or install GoogleTest -if(NOT CUDF_BUILD_WHEELS) +if(CUDF_BUILD_TESTUTIL) include(cmake/thirdparty/get_gtest.cmake) endif() # preprocess jitify-able kernels @@ -694,7 +700,7 @@ add_library(cudf::cudf ALIAS cudf) # ################################################################################################## # * build cudftestutil ---------------------------------------------------------------------------- -if(NOT CUDF_BUILD_WHEELS) +if(CUDF_BUILD_TESTUTIL) add_library( cudftestutil STATIC tests/io/metadata_utilities.cpp @@ -790,7 +796,7 @@ install(DIRECTORY ${CUDF_SOURCE_DIR}/include/cudf ${CUDF_SOURCE_DIR}/include/cud ${CUDF_SOURCE_DIR}/include/nvtext DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} ) -if(NOT CUDF_BUILD_WHEELS) +if(CUDF_BUILD_TESTUTIL) install( TARGETS cudftestutil DESTINATION ${lib_dir} @@ -911,7 +917,7 @@ rapids_export( FINAL_CODE_BLOCK build_code_string ) -if(NOT CUDF_BUILD_WHEELS) +if(CUDF_BUILD_TESTUTIL) export( EXPORT cudf-testing-exports FILE ${CUDF_BINARY_DIR}/cudf-testing-targets.cmake From a688ede3dfcc35e56ec00e79b23c1c7728e483ab Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 8 Nov 2022 15:19:52 -0800 Subject: [PATCH 081/142] Revert unnecessary change to arrow files. --- cpp/include/cudf/detail/interop.hpp | 2 +- cpp/src/interop/to_arrow.cu | 15 +++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/cpp/include/cudf/detail/interop.hpp b/cpp/include/cudf/detail/interop.hpp index 510d5cb4dd2..5a5bbe7f683 100644 --- a/cpp/include/cudf/detail/interop.hpp +++ b/cpp/include/cudf/detail/interop.hpp @@ -55,7 +55,7 @@ DLManagedTensor* to_dlpack( // Creating arrow as per given type_id and buffer arguments template -inline std::shared_ptr to_arrow_array(cudf::type_id id, Ts&&... args) +std::shared_ptr to_arrow_array(cudf::type_id id, Ts&&... args) { switch (id) { case type_id::BOOL8: return std::make_shared(std::forward(args)...); diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu index d0c5ac36542..fb203e6c3c1 100644 --- a/cpp/src/interop/to_arrow.cu +++ b/cpp/src/interop/to_arrow.cu @@ -393,15 +393,14 @@ std::shared_ptr to_arrow(table_view input, : std::make_shared(c.size()); }); - std::transform(arrays.begin(), - arrays.end(), - metadata.begin(), - std::back_inserter(fields), - [](auto const& array, auto const& meta) { - return std::make_shared(meta.name, array->type()); - }); + std::transform( + arrays.begin(), + arrays.end(), + metadata.begin(), + std::back_inserter(fields), + [](auto const& array, auto const& meta) { return arrow::field(meta.name, array->type()); }); - auto result = arrow::Table::Make(arrow::schema(fields), arrays, input.num_rows()); + auto result = arrow::Table::Make(arrow::schema(fields), arrays); // synchronize the stream because after the return the data may be accessed from the host before // the above `cudaMemcpyAsync` calls have completed their copies (especially if pinned host From 202b5e58924cf3c847e4bb53cff3a614b15b09e3 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 8 Nov 2022 15:19:58 -0800 Subject: [PATCH 082/142] Update version. --- python/cudf/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index a563c7d3327..7e98b266f95 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -14,7 +14,7 @@ cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR) -set(cudf_version 22.10.00) +set(cudf_version 22.12.00) include(../../fetch_rapids.cmake) From fee22f4ce65a4ab59822f38d95a6c54418fb272c Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 8 Nov 2022 15:22:22 -0800 Subject: [PATCH 083/142] Revert redundant EXCLUDE_FROM_ALL. --- cpp/tests/CMakeLists.txt | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 4c7df26e8f8..56e9131e37d 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -36,20 +36,11 @@ function(ConfigureTest CMAKE_TEST_NAME) $ ) add_test(NAME ${CMAKE_TEST_NAME} COMMAND ${CMAKE_TEST_NAME}) - if(SKBUILD) - install( - TARGETS ${CMAKE_TEST_NAME} - COMPONENT testing - DESTINATION bin/gtests/libcudf - ) - else() - install( - TARGETS ${CMAKE_TEST_NAME} - COMPONENT testing - DESTINATION bin/gtests/libcudf - EXCLUDE_FROM_ALL - ) - endif() + install( + TARGETS ${CMAKE_TEST_NAME} + COMPONENT testing + DESTINATION bin/gtests/libcudf + ) endfunction() # ################################################################################################## From f592969b232258ee217c5e0dacc7f85291dcd555 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 8 Nov 2022 15:29:55 -0800 Subject: [PATCH 084/142] Clean up some of the Python CMake logic. --- python/cudf/CMakeLists.txt | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 7e98b266f95..06cd89d368d 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -35,6 +35,11 @@ option(CUDF_BUILD_WHEELS "Whether we're building a wheel for pypi" OFF) option(USE_LIBARROW_FROM_PYARROW "Use the libarrow contained within pyarrow." OFF) mark_as_advanced(USE_LIBARROW_FROM_PYARROW) +# Always build wheels against the pyarrow libarrow. +if(CUDF_BUILD_WHEELS) + set(USE_LIBARROW_FROM_PYARROW ON) +endif() + # If the user requested it we attempt to find CUDF. if(FIND_CUDF_CPP) if(USE_LIBARROW_FROM_PYARROW) @@ -71,17 +76,23 @@ if(NOT cudf_FOUND) set(BUILD_TESTS OFF) set(BUILD_BENCHMARKS OFF) - # Statically link dependencies if building wheels - set(CPM_DOWNLOAD_rmm ${CUDF_BUILD_WHEELS}) - set(CPM_DOWNLOAD_spdlog ${CUDF_BUILD_WHEELS}) - set(CUDA_STATIC_RUNTIME ${CUDF_BUILD_WHEELS}) - - # Need to set this so all the nvcomp targets are global, not only nvcomp::nvcomp - # https://cmake.org/cmake/help/latest/variable/CMAKE_FIND_PACKAGE_TARGETS_GLOBAL.html#variable:CMAKE_FIND_PACKAGE_TARGETS_GLOBAL - set(CMAKE_FIND_PACKAGE_TARGETS_GLOBAL ${CUDF_BUILD_WHEELS}) - set(_exclude_from_all "") if(CUDF_BUILD_WHEELS) + # We don't build C++ tests when building wheels, so we can also omit the test util and shrink + # the wheel by avoiding embedding GTest. + set(CUDF_BUILD_TESTUTIL OFF) + + # TODO: Unclear if we should need to require this for rmm and spdlog. rmm is already header-only + set(CPM_DOWNLOAD_rmm ON) + set(CPM_DOWNLOAD_spdlog ON) + + # Statically link cudart if building wheels + set(CUDA_STATIC_RUNTIME ON) + + # Need to set this so all the nvcomp targets are global, not only nvcomp::nvcomp + # https://cmake.org/cmake/help/latest/variable/CMAKE_FIND_PACKAGE_TARGETS_GLOBAL.html#variable:CMAKE_FIND_PACKAGE_TARGETS_GLOBAL + set(CMAKE_FIND_PACKAGE_TARGETS_GLOBAL ON) + # Don't install the cuDF C++ targets into wheels set(_exclude_from_all EXCLUDE_FROM_ALL) endif() From 48d1b58be42e4278f33538b464fc5a0213fdfa35 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 8 Nov 2022 15:30:48 -0800 Subject: [PATCH 085/142] Add back dropped EXCLUDE_FROM_ALL. --- cpp/tests/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 56e9131e37d..5ff2e9bf6d6 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -40,6 +40,7 @@ function(ConfigureTest CMAKE_TEST_NAME) TARGETS ${CMAKE_TEST_NAME} COMPONENT testing DESTINATION bin/gtests/libcudf + EXCLUDE_FROM_ALL ) endfunction() From 881ca4306f9ae32f2bc1b9d4b20c94156b6ec679 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 8 Nov 2022 17:19:59 -0800 Subject: [PATCH 086/142] Match new library location for arrow. --- python/cudf/CMakeLists.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 06cd89d368d..937fa850dea 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -103,9 +103,7 @@ if(NOT cudf_FOUND) include(cmake/Modules/WheelHelpers.cmake) get_target_property(_nvcomp_link_libs nvcomp::nvcomp INTERFACE_LINK_LIBRARIES) # Ensure all the shared objects we need at runtime are in the wheel - add_target_libs_to_wheel( - LIB_DIR cudf/_lib TARGETS arrow_shared nvcomp::nvcomp ${_nvcomp_link_libs} - ) + add_target_libs_to_wheel(LIB_DIR cudf TARGETS arrow_shared nvcomp::nvcomp ${_nvcomp_link_libs}) endif() # Since there are multiple subpackages of cudf._lib that require access to libcudf, we place the # library in the cudf directory as a single source of truth and modify the other rpaths From b379d51a4ea08a142f8bfe43bc611b9b096e6ba1 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 8 Nov 2022 18:47:00 -0800 Subject: [PATCH 087/142] Update required versions and remove now unnecessary CUDA versioning code since we're using cupy-cuda11x. --- python/cudf/setup.py | 48 +++----------------------------------------- 1 file changed, 3 insertions(+), 45 deletions(-) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 1b1f4bc43b8..6951ac4fe5e 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -1,54 +1,15 @@ # Copyright (c) 2018-2022, NVIDIA CORPORATION. import os -import re -import shutil import versioneer from setuptools import find_packages from skbuild import setup -def get_cuda_version_from_header(cuda_include_dir, delimeter=""): - - cuda_version = None - - with open(os.path.join(cuda_include_dir, "cuda.h"), encoding="utf-8") as f: - for line in f.readlines(): - if re.search(r"#define CUDA_VERSION ", line) is not None: - cuda_version = line - break - - if cuda_version is None: - raise TypeError("CUDA_VERSION not found in cuda.h") - cuda_version = int(cuda_version.split()[2]) - return "%d%s%d" % ( - cuda_version // 1000, - delimeter, - (cuda_version % 1000) // 10, - ) - - -CUDA_HOME = os.environ.get("CUDA_HOME", False) -if not CUDA_HOME: - path_to_cuda_gdb = shutil.which("cuda-gdb") - if path_to_cuda_gdb is None: - raise OSError( - "Could not locate CUDA. " - "Please set the environment variable " - "CUDA_HOME to the path to the CUDA installation " - "and try again." - ) - CUDA_HOME = os.path.dirname(os.path.dirname(path_to_cuda_gdb)) - -if not os.path.isdir(CUDA_HOME): - raise OSError(f"Invalid CUDA_HOME: directory does not exist: {CUDA_HOME}") - -cuda_include_dir = os.path.join(CUDA_HOME, "include") - install_requires = [ "cachetools", - "cuda-python>=11.5,<11.7.1", + "cuda-python>=11.7.1,<12.0", "fsspec>=0.6.0", "numba>=0.54", "numpy", @@ -61,10 +22,7 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): f"rmm{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", f"ptxcompiler{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", f"cubinlinker{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", - ( - f"cupy-cuda{get_cuda_version_from_header(cuda_include_dir)}>=9.5.0" - ",<12.0.0a0; platform_machine=='x86_64'", - ), + "cupy-cuda11x", ] extras_require = { @@ -73,7 +31,7 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): "pytest-benchmark", "pytest-xdist", "hypothesis", - "mimesis<4.1", + "mimesis>=4.1.0", "fastavro>=0.22.9", "python-snappy>=0.6.0", "pyorc", From 4fcd35ea822bfab0cc0710ba7d75d6c12444b4ca Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 8 Nov 2022 19:24:51 -0800 Subject: [PATCH 088/142] Temporarily revert cuda-python update. --- python/cudf/setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 6951ac4fe5e..5f41dcafc2b 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -9,7 +9,8 @@ install_requires = [ "cachetools", - "cuda-python>=11.7.1,<12.0", + # "cuda-python>=11.7.1,<12.0", + "cuda-python>=11.5,<11.7.1", "fsspec>=0.6.0", "numba>=0.54", "numpy", From 8d5ab49b6f859f50df9ebfc3a32329334d24566d Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 8 Nov 2022 22:09:53 -0800 Subject: [PATCH 089/142] Try removing potentially unnecessary flags. --- python/cudf/CMakeLists.txt | 8 -------- 1 file changed, 8 deletions(-) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 937fa850dea..c40829aa8c2 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -82,17 +82,9 @@ if(NOT cudf_FOUND) # the wheel by avoiding embedding GTest. set(CUDF_BUILD_TESTUTIL OFF) - # TODO: Unclear if we should need to require this for rmm and spdlog. rmm is already header-only - set(CPM_DOWNLOAD_rmm ON) - set(CPM_DOWNLOAD_spdlog ON) - # Statically link cudart if building wheels set(CUDA_STATIC_RUNTIME ON) - # Need to set this so all the nvcomp targets are global, not only nvcomp::nvcomp - # https://cmake.org/cmake/help/latest/variable/CMAKE_FIND_PACKAGE_TARGETS_GLOBAL.html#variable:CMAKE_FIND_PACKAGE_TARGETS_GLOBAL - set(CMAKE_FIND_PACKAGE_TARGETS_GLOBAL ON) - # Don't install the cuDF C++ targets into wheels set(_exclude_from_all EXCLUDE_FROM_ALL) endif() From 7ec36ffc69f864416dee500552e2fea0de6b6f90 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 9 Nov 2022 09:58:25 -0500 Subject: [PATCH 090/142] Put back the global targets flag for nvcomp. --- python/cudf/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index c40829aa8c2..810901d7d3b 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -85,6 +85,10 @@ if(NOT cudf_FOUND) # Statically link cudart if building wheels set(CUDA_STATIC_RUNTIME ON) + # Need to set this so all the nvcomp targets are global, not only nvcomp::nvcomp + # https://cmake.org/cmake/help/latest/variable/CMAKE_FIND_PACKAGE_TARGETS_GLOBAL.html#variable:CMAKE_FIND_PACKAGE_TARGETS_GLOBAL + set(CMAKE_FIND_PACKAGE_TARGETS_GLOBAL ON) + # Don't install the cuDF C++ targets into wheels set(_exclude_from_all EXCLUDE_FROM_ALL) endif() From 2ff487d969b965784bb3569f6d2a14762a37c036 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 9 Nov 2022 10:31:00 -0500 Subject: [PATCH 091/142] Revert "Temporarily revert cuda-python update." This reverts commit 4fcd35ea822bfab0cc0710ba7d75d6c12444b4ca. --- python/cudf/setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 5f41dcafc2b..6951ac4fe5e 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -9,8 +9,7 @@ install_requires = [ "cachetools", - # "cuda-python>=11.7.1,<12.0", - "cuda-python>=11.5,<11.7.1", + "cuda-python>=11.7.1,<12.0", "fsspec>=0.6.0", "numba>=0.54", "numpy", From f0a71deaadc9ef1619738e3a8eeba1655c60f8fe Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 10 Nov 2022 12:48:32 -0800 Subject: [PATCH 092/142] Fix style. --- python/cudf/cmake/Modules/WheelHelpers.cmake | 25 +++++++++----------- python/cudf/setup.py | 1 - 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/python/cudf/cmake/Modules/WheelHelpers.cmake b/python/cudf/cmake/Modules/WheelHelpers.cmake index 882cc915641..28ea33240fa 100644 --- a/python/cudf/cmake/Modules/WheelHelpers.cmake +++ b/python/cudf/cmake/Modules/WheelHelpers.cmake @@ -1,20 +1,19 @@ -#============================================================================= +# ============================================================================= # Copyright (c) 2022, NVIDIA CORPORATION. # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -#============================================================================= +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= include_guard(GLOBAL) +# Making libraries available inside wheels by installing the associated targets. function(add_target_libs_to_wheel) list(APPEND CMAKE_MESSAGE_CONTEXT "add_target_libs_to_wheel") @@ -51,9 +50,7 @@ function(add_target_libs_to_wheel) # Find the imported target's library so we can copy it into the wheel set(lib_loc) - foreach(prop IN ITEMS IMPORTED_LOCATION - IMPORTED_LOCATION_RELEASE - IMPORTED_LOCATION_DEBUG) + foreach(prop IN ITEMS IMPORTED_LOCATION IMPORTED_LOCATION_RELEASE IMPORTED_LOCATION_DEBUG) get_target_property(lib_loc ${target} ${prop}) if(lib_loc) message(VERBOSE "Found ${prop} for ${target}: ${lib_loc}") diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 6951ac4fe5e..5a94e375d79 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -6,7 +6,6 @@ from setuptools import find_packages from skbuild import setup - install_requires = [ "cachetools", "cuda-python>=11.7.1,<12.0", From 8ec10b160e9c91ed37ef802d39617542bdaa029a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 10 Nov 2022 12:55:22 -0800 Subject: [PATCH 093/142] Update to new workflow script. --- .github/workflows/wheels.yml | 39 +++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 9082e024a4d..b43254efe2f 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -1,32 +1,48 @@ name: cuDF wheels on: - push: - branches: - - 'pull-request/[0-9]+' + workflow_call: + inputs: + versioneer-override: + type: string + default: '' + build-tag: + type: string + default: '' + branch: + required: true + type: string + date: + required: true + type: string + sha: + required: true + type: string + build-type: + type: string + default: nightly concurrency: - group: "${{ github.workflow }}-${{ github.ref }}" + group: "${{ github.repository }}-${{ github.workflow }}-${{ github.ref }}" cancel-in-progress: true jobs: cudf-wheels: - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/wheel-ci-actions-2 + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@main with: package-name: cudf package-dir: python/cudf - python-package-versioneer-override: "22.10.00" + python-package-versioneer-override: ${{ inputs.versioneer-override }} cibw-before-all-amd64: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" cibw-before-all-arm64: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - cibw-environment: "CUDF_BUILD_WHEELS=1" - skbuild-configure-options: "--log-level=DEBUG -DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DCMAKE_CUDA_ARCHITECTURES=ALL" + skbuild-configure-options: "-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF" test-extras: "test" test-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 ptxcompiler-cu11 cubinlinker-cu11 --index-url https://pypi.k8s.rapids.ai/simple" test-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 ptxcompiler-cu11 cubinlinker-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" - test-unittest: "pytest -v ./python/cudf/cudf/tests" + test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests" secrets: inherit dask_cudf-wheel: needs: cudf-wheels @@ -34,10 +50,9 @@ jobs: with: package-name: dask_cudf package-dir: python/dask_cudf - python-package-cuda-suffix: "-cu11" - python-package-versioneer-override: "22.10.00" + python-package-versioneer-override: ${{ inputs.versioneer-override }} test-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" test-extras: "test" - test-unittest: "pytest -v ./python/dask_cudf/dask_cudf/tests" + test-unittest: "pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests" secrets: inherit From 568b65dfb969a7fee3dfed3053d5642c8e98190e Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 10 Nov 2022 13:11:23 -0800 Subject: [PATCH 094/142] Add back push workflow for now to keep testing. --- .github/workflows/wheels.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index b43254efe2f..a50f5508c52 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -1,6 +1,9 @@ name: cuDF wheels on: + push: + branches: + - 'pull-request/[0-9]+' workflow_call: inputs: versioneer-override: From 7d69dbe163dc8761768de576f0c09496f808de98 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 10 Nov 2022 13:17:24 -0800 Subject: [PATCH 095/142] Update all fields. --- .github/workflows/wheels.yml | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index a50f5508c52..03cc6838edc 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -33,9 +33,18 @@ jobs: cudf-wheels: uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@main with: - package-name: cudf + repo: rapidsai/rmm + + build-type: ${{ inputs.build-type }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + package-dir: python/cudf + package-name: cudf + python-package-versioneer-override: ${{ inputs.versioneer-override }} + python-package-build-tag: ${{ inputs.build-tag }} cibw-before-all-amd64: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" cibw-before-all-arm64: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" @@ -51,11 +60,20 @@ jobs: needs: cudf-wheels uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions-2 with: - package-name: dask_cudf + repo: rapidsai/rmm + + build-type: ${{ inputs.build-type }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + package-dir: python/dask_cudf + package-name: dask_cudf + python-package-versioneer-override: ${{ inputs.versioneer-override }} + python-package-build-tag: ${{ inputs.build-tag }} + test-extras: test test-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" - test-extras: "test" test-unittest: "pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests" secrets: inherit From 502c3723cb1d01efec1993bc72886cf966ddb875 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 10 Nov 2022 13:17:44 -0800 Subject: [PATCH 096/142] Temporarily comment out dask_cudf workflow since pure needs to be added back. --- .github/workflows/wheels.yml | 42 ++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 03cc6838edc..fc2688b565d 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -56,24 +56,24 @@ jobs: test-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 ptxcompiler-cu11 cubinlinker-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests" secrets: inherit - dask_cudf-wheel: - needs: cudf-wheels - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions-2 - with: - repo: rapidsai/rmm - - build-type: ${{ inputs.build-type }} - branch: ${{ inputs.branch }} - sha: ${{ inputs.sha }} - date: ${{ inputs.date }} - - package-dir: python/dask_cudf - package-name: dask_cudf - - python-package-versioneer-override: ${{ inputs.versioneer-override }} - python-package-build-tag: ${{ inputs.build-tag }} - - test-extras: test - test-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" - test-unittest: "pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests" - secrets: inherit + #dask_cudf-wheel: + # needs: cudf-wheels + # uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions-2 + # with: + # repo: rapidsai/rmm + # + # build-type: ${{ inputs.build-type }} + # branch: ${{ inputs.branch }} + # sha: ${{ inputs.sha }} + # date: ${{ inputs.date }} + # + # package-dir: python/dask_cudf + # package-name: dask_cudf + # + # python-package-versioneer-override: ${{ inputs.versioneer-override }} + # python-package-build-tag: ${{ inputs.build-tag }} + # + # test-extras: test + # test-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" + # test-unittest: "pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests" + # secrets: inherit From 89afdfb0c851b793d23126f9cb78ac9275848cb1 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 10 Nov 2022 13:23:04 -0800 Subject: [PATCH 097/142] Fix typo in repo. --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index fc2688b565d..2f4aef32003 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -33,7 +33,7 @@ jobs: cudf-wheels: uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@main with: - repo: rapidsai/rmm + repo: rapidsai/cudf build-type: ${{ inputs.build-type }} branch: ${{ inputs.branch }} From 0339b594f68074bf8189145540e17c013fd8bfc6 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 10 Nov 2022 13:48:28 -0800 Subject: [PATCH 098/142] Set cupy-cuda11x in dask-cudf. --- python/dask_cudf/setup.py | 45 +-------------------------------------- 1 file changed, 1 insertion(+), 44 deletions(-) diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py index a957a9cd849..bd534f20a6a 100644 --- a/python/dask_cudf/setup.py +++ b/python/dask_cudf/setup.py @@ -1,50 +1,10 @@ # Copyright (c) 2019-2022, NVIDIA CORPORATION. import os -import re -import shutil import versioneer from setuptools import find_packages, setup -CUDA_HOME = os.environ.get("CUDA_HOME", False) -if not CUDA_HOME: - path_to_cuda_gdb = shutil.which("cuda-gdb") - if path_to_cuda_gdb is None: - raise OSError( - "Could not locate CUDA. " - "Please set the environment variable " - "CUDA_HOME to the path to the CUDA installation " - "and try again." - ) - CUDA_HOME = os.path.dirname(os.path.dirname(path_to_cuda_gdb)) - -if not os.path.isdir(CUDA_HOME): - raise OSError(f"Invalid CUDA_HOME: directory does not exist: {CUDA_HOME}") - -cuda_include_dir = os.path.join(CUDA_HOME, "include") - - -def get_cuda_version_from_header(cuda_include_dir, delimeter=""): - - cuda_version = None - - with open(os.path.join(cuda_include_dir, "cuda.h"), encoding="utf-8") as f: - for line in f.readlines(): - if re.search(r"#define CUDA_VERSION ", line) is not None: - cuda_version = line - break - - if cuda_version is None: - raise TypeError("CUDA_VERSION not found in cuda.h") - cuda_version = int(cuda_version.split()[2]) - return "%d%s%d" % ( - cuda_version // 1000, - delimeter, - (cuda_version % 1000) // 10, - ) - - install_requires = [ "cudf", "dask==2022.9.2", @@ -53,10 +13,7 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): "numpy", "pandas>=1.0,<1.6.0dev0", f"cudf{os.getenv('RAPIDS_PY_WHEEL_CUDA_SUFFIX', default='')}", - ( - f"cupy-cuda{get_cuda_version_from_header(cuda_include_dir)}>=9.5.0," - "<11.0.0a0; platform_machine=='x86_64'" - ), + "cupy-cuda11x", ] extras_require = { From 163ddd3db824e1181693c43c5e7ce4f780c6d56d Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 14 Nov 2022 07:42:58 -0800 Subject: [PATCH 099/142] Fix build type for testing. --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 2f4aef32003..30fdd6f8c0e 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -35,7 +35,7 @@ jobs: with: repo: rapidsai/cudf - build-type: ${{ inputs.build-type }} + build-type: ${{ inputs.build-type || 'pull-request' }} branch: ${{ inputs.branch }} sha: ${{ inputs.sha }} date: ${{ inputs.date }} From 60d6284d6008e590c7add7a99484f27fa1ac3d04 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 14 Nov 2022 07:44:46 -0800 Subject: [PATCH 100/142] Enable dask-cudf tests. --- .github/workflows/wheels.yml | 42 ++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 30fdd6f8c0e..8039c95fd18 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -56,24 +56,24 @@ jobs: test-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 ptxcompiler-cu11 cubinlinker-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests" secrets: inherit - #dask_cudf-wheel: - # needs: cudf-wheels - # uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/wheel-ci-actions-2 - # with: - # repo: rapidsai/rmm - # - # build-type: ${{ inputs.build-type }} - # branch: ${{ inputs.branch }} - # sha: ${{ inputs.sha }} - # date: ${{ inputs.date }} - # - # package-dir: python/dask_cudf - # package-name: dask_cudf - # - # python-package-versioneer-override: ${{ inputs.versioneer-override }} - # python-package-build-tag: ${{ inputs.build-tag }} - # - # test-extras: test - # test-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" - # test-unittest: "pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests" - # secrets: inherit + dask_cudf-wheel: + needs: cudf-wheels + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/pure-python-wheels + with: + repo: rapidsai/cudf + + build-type: ${{ inputs.build-type || 'pull-request' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + + package-dir: python/dask_cudf + package-name: dask_cudf + + python-package-versioneer-override: ${{ inputs.versioneer-override }} + python-package-build-tag: ${{ inputs.build-tag }} + + test-extras: test + test-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" + test-unittest: "pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests" + secrets: inherit From 022a319496b56b519eab0f5b6b0cd8f2861b8248 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 14 Nov 2022 07:44:56 -0800 Subject: [PATCH 101/142] Fix whitespace. --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 8039c95fd18..04d988a2978 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -51,7 +51,7 @@ jobs: skbuild-configure-options: "-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF" - test-extras: "test" + test-extras: test test-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 ptxcompiler-cu11 cubinlinker-cu11 --index-url https://pypi.k8s.rapids.ai/simple" test-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 ptxcompiler-cu11 cubinlinker-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests" From d354b38ff35afbdc0a1729dac533e2f38fde9571 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 14 Nov 2022 08:36:16 -0800 Subject: [PATCH 102/142] Test commit. --- .github/workflows/wheels.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 04d988a2978..616eeddd97c 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -66,7 +66,6 @@ jobs: branch: ${{ inputs.branch }} sha: ${{ inputs.sha }} date: ${{ inputs.date }} - package-dir: python/dask_cudf package-name: dask_cudf From d93bb690e134bc37455d47705335da1ceeab6c74 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 14 Nov 2022 08:36:23 -0800 Subject: [PATCH 103/142] Undo test. --- .github/workflows/wheels.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 616eeddd97c..04d988a2978 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -66,6 +66,7 @@ jobs: branch: ${{ inputs.branch }} sha: ${{ inputs.sha }} date: ${{ inputs.date }} + package-dir: python/dask_cudf package-name: dask_cudf From 82d2f6a1fa79621459ecc3fa5cadc66b23a79711 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 14 Nov 2022 10:59:59 -0800 Subject: [PATCH 104/142] Add missing license file. --- python/dask_cudf/LICENSE | 1 + 1 file changed, 1 insertion(+) create mode 120000 python/dask_cudf/LICENSE diff --git a/python/dask_cudf/LICENSE b/python/dask_cudf/LICENSE new file mode 120000 index 00000000000..30cff7403da --- /dev/null +++ b/python/dask_cudf/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file From 6d8ac44b29f8c9bf4f8ea521c523d5e3bdeb0e5a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 14 Nov 2022 14:04:28 -0800 Subject: [PATCH 105/142] Remove setup_requires. --- python/cudf/setup.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 5a94e375d79..553a1eb59bb 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -65,9 +65,6 @@ package_data={ key: ["*.pxd"] for key in find_packages(include=["cudf._lib*"]) }, - setup_requires=[ - f"rmm{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", - ], install_requires=install_requires, extras_require=extras_require, zip_safe=False, From 4915a1a972a5228d7a8ee83b5cf68f38531f5a4a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 14 Nov 2022 14:15:45 -0800 Subject: [PATCH 106/142] Put back setup_requires. --- python/cudf/setup.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 553a1eb59bb..2ff319c099d 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -65,6 +65,12 @@ package_data={ key: ["*.pxd"] for key in find_packages(include=["cudf._lib*"]) }, + # TODO: We need this to be dynamic, so it doesn't work to put it into + # pyproject.toml, but setup_requires is deprecated so we need to find a + # better solution for this. + setup_requires=[ + f"rmm{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", + ], install_requires=install_requires, extras_require=extras_require, zip_safe=False, From aa4e8024b28ec977fde027bdd3b134a3c851b90b Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 14 Nov 2022 15:03:26 -0800 Subject: [PATCH 107/142] Remove redundant cudf requirement and also clean up dask test requirements. --- python/dask_cudf/setup.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py index bd534f20a6a..d5ad1554002 100644 --- a/python/dask_cudf/setup.py +++ b/python/dask_cudf/setup.py @@ -6,7 +6,6 @@ from setuptools import find_packages, setup install_requires = [ - "cudf", "dask==2022.9.2", "distributed==2022.9.2", "fsspec>=0.6.0", @@ -22,8 +21,6 @@ "pandas>=1.0,<1.6.0dev0", "pytest", "numba>=0.54", - "dask>=2021.09.1", - "distributed>=2021.09.1", ] } From 88eb81a564efd9c6b00ae19f947d845e53979f6f Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 14 Nov 2022 15:56:40 -0800 Subject: [PATCH 108/142] Temporarily set cuda suffix until wheel is updated. --- .github/workflows/wheels.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 04d988a2978..c1eeb76fed3 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -72,6 +72,7 @@ jobs: python-package-versioneer-override: ${{ inputs.versioneer-override }} python-package-build-tag: ${{ inputs.build-tag }} + python-package-cuda-suffix: "-cu11" test-extras: test test-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" From dd6aacb9abd69af1644f5de60dacd575e8267e26 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 14 Nov 2022 17:48:21 -0800 Subject: [PATCH 109/142] Add pytest-xdist for dask_cudf. --- python/dask_cudf/setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py index d5ad1554002..871aaaa3337 100644 --- a/python/dask_cudf/setup.py +++ b/python/dask_cudf/setup.py @@ -20,6 +20,7 @@ "numpy", "pandas>=1.0,<1.6.0dev0", "pytest", + "pytest-xdist", "numba>=0.54", ] } From 1ca15febedf62629575a30a98cfa3487c4658aaf Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 15 Nov 2022 07:31:11 -0800 Subject: [PATCH 110/142] Add cupy requirements by arch to install_requires. --- .github/workflows/wheels.yml | 2 +- python/cudf/setup.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index c1eeb76fed3..61fdfacb6f0 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -53,7 +53,7 @@ jobs: test-extras: test test-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 ptxcompiler-cu11 cubinlinker-cu11 --index-url https://pypi.k8s.rapids.ai/simple" - test-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 ptxcompiler-cu11 cubinlinker-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" + test-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 ptxcompiler-cu11 cubinlinker-cu11 --index-url https://pypi.k8s.rapids.ai/simple" test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests" secrets: inherit dask_cudf-wheel: diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 2ff319c099d..b6b7180b4b6 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -21,7 +21,12 @@ f"rmm{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", f"ptxcompiler{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", f"cubinlinker{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", - "cupy-cuda11x", + # We may need to account for other architectures eventually. PEP 508 does + # not appear to support an 'in list' syntax + # `platform_machine in ('arch1', 'arch2', ...) + # so we'll need to use multiple `or` conditions to support that case. + "cupy-cuda11x;'x86' in platform_machine", + "cupy-cuda11x @ https://pip.cupy.dev/aarch64;'aarch' in platform_machine", ] extras_require = { From 79b8946124f5909839888c0ddf9213e50dd8e164 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 15 Nov 2022 07:33:56 -0800 Subject: [PATCH 111/142] Try removing requests and tokenizers. --- .github/workflows/wheels.yml | 4 ++-- python/cudf/setup.py | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 61fdfacb6f0..42a39a61c8a 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -52,8 +52,8 @@ jobs: skbuild-configure-options: "-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF" test-extras: test - test-before-amd64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 ptxcompiler-cu11 cubinlinker-cu11 --index-url https://pypi.k8s.rapids.ai/simple" - test-before-arm64: "pip install requests tokenizers==0.10.2 && pip install rmm-cu11 ptxcompiler-cu11 cubinlinker-cu11 --index-url https://pypi.k8s.rapids.ai/simple" + test-before-amd64: "pip install rmm-cu11 ptxcompiler-cu11 cubinlinker-cu11 --index-url https://pypi.k8s.rapids.ai/simple" + test-before-arm64: "pip install rmm-cu11 ptxcompiler-cu11 cubinlinker-cu11 --index-url https://pypi.k8s.rapids.ai/simple" test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests" secrets: inherit dask_cudf-wheel: diff --git a/python/cudf/setup.py b/python/cudf/setup.py index b6b7180b4b6..780b289e3ab 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -25,6 +25,9 @@ # not appear to support an 'in list' syntax # `platform_machine in ('arch1', 'arch2', ...) # so we'll need to use multiple `or` conditions to support that case. + # This SO post has a pretty exhaustive list of possible platform names we + # may need to support: + # https://stackoverflow.com/questions/45125516/possible-values-for-uname-m "cupy-cuda11x;'x86' in platform_machine", "cupy-cuda11x @ https://pip.cupy.dev/aarch64;'aarch' in platform_machine", ] From 8d8743d92c68292a7ea9f9219cab0cabeb3dbdb2 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 15 Nov 2022 07:34:16 -0800 Subject: [PATCH 112/142] Try removing Python static libs. --- .github/workflows/wheels.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 42a39a61c8a..8a766ca0924 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -46,9 +46,6 @@ jobs: python-package-versioneer-override: ${{ inputs.versioneer-override }} python-package-build-tag: ${{ inputs.build-tag }} - cibw-before-all-amd64: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - cibw-before-all-arm64: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - skbuild-configure-options: "-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF" test-extras: test From 3ec9b2ea3e7dd4b4adcbbfeb980ec4c0f44e580e Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 15 Nov 2022 08:17:33 -0800 Subject: [PATCH 113/142] Revert "Add cupy requirements by arch to install_requires." This reverts commit 1ca15febedf62629575a30a98cfa3487c4658aaf. --- .github/workflows/wheels.yml | 2 +- python/cudf/setup.py | 10 +--------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 8a766ca0924..91fe717528f 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -50,7 +50,7 @@ jobs: test-extras: test test-before-amd64: "pip install rmm-cu11 ptxcompiler-cu11 cubinlinker-cu11 --index-url https://pypi.k8s.rapids.ai/simple" - test-before-arm64: "pip install rmm-cu11 ptxcompiler-cu11 cubinlinker-cu11 --index-url https://pypi.k8s.rapids.ai/simple" + test-before-arm64: "pip install rmm-cu11 ptxcompiler-cu11 cubinlinker-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests" secrets: inherit dask_cudf-wheel: diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 780b289e3ab..2ff319c099d 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -21,15 +21,7 @@ f"rmm{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", f"ptxcompiler{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", f"cubinlinker{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", - # We may need to account for other architectures eventually. PEP 508 does - # not appear to support an 'in list' syntax - # `platform_machine in ('arch1', 'arch2', ...) - # so we'll need to use multiple `or` conditions to support that case. - # This SO post has a pretty exhaustive list of possible platform names we - # may need to support: - # https://stackoverflow.com/questions/45125516/possible-values-for-uname-m - "cupy-cuda11x;'x86' in platform_machine", - "cupy-cuda11x @ https://pip.cupy.dev/aarch64;'aarch' in platform_machine", + "cupy-cuda11x", ] extras_require = { From b9b79f2cb5b45c66239ec81bbe5e070d96386af3 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 15 Nov 2022 08:19:50 -0800 Subject: [PATCH 114/142] Revert "Revert "Add cupy requirements by arch to install_requires."" This reverts commit 3ec9b2ea3e7dd4b4adcbbfeb980ec4c0f44e580e. --- .github/workflows/wheels.yml | 2 +- python/cudf/setup.py | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 91fe717528f..8a766ca0924 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -50,7 +50,7 @@ jobs: test-extras: test test-before-amd64: "pip install rmm-cu11 ptxcompiler-cu11 cubinlinker-cu11 --index-url https://pypi.k8s.rapids.ai/simple" - test-before-arm64: "pip install rmm-cu11 ptxcompiler-cu11 cubinlinker-cu11 --index-url https://pypi.k8s.rapids.ai/simple && pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" + test-before-arm64: "pip install rmm-cu11 ptxcompiler-cu11 cubinlinker-cu11 --index-url https://pypi.k8s.rapids.ai/simple" test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests" secrets: inherit dask_cudf-wheel: diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 2ff319c099d..780b289e3ab 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -21,7 +21,15 @@ f"rmm{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", f"ptxcompiler{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", f"cubinlinker{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", - "cupy-cuda11x", + # We may need to account for other architectures eventually. PEP 508 does + # not appear to support an 'in list' syntax + # `platform_machine in ('arch1', 'arch2', ...) + # so we'll need to use multiple `or` conditions to support that case. + # This SO post has a pretty exhaustive list of possible platform names we + # may need to support: + # https://stackoverflow.com/questions/45125516/possible-values-for-uname-m + "cupy-cuda11x;'x86' in platform_machine", + "cupy-cuda11x @ https://pip.cupy.dev/aarch64;'aarch' in platform_machine", ] extras_require = { From 9d901ba794f40829f7e663b6c3698d94e211dfff Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 15 Nov 2022 08:39:04 -0800 Subject: [PATCH 115/142] Try avoiding static libs explicitly. --- python/cudf/cudf/_lib/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index e1dea1376dc..0f48bf5b6b1 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -64,7 +64,7 @@ rapids_cython_create_modules( # TODO: Finding NumPy currently requires finding Development due to a bug in CMake. This bug was # fixed in https://gitlab.kitware.com/cmake/cmake/-/merge_requests/7410 and will be available in # CMake 3.24, so we can remove the Development component once we upgrade to CMake 3.24. -find_package(Python REQUIRED COMPONENTS Development NumPy) +find_package(Python REQUIRED COMPONENTS Development NumPy HINTS Python_USE_STATIC_LIBS FALSE) set(targets_using_numpy interop avro csv orc json parquet) foreach(target IN LISTS targets_using_numpy) target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}") From a490a0fbe1a4b1dff2562155ff36fedf1d69d9d0 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 15 Nov 2022 08:44:10 -0800 Subject: [PATCH 116/142] Fix cupy. --- python/cudf/setup.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 780b289e3ab..2ff319c099d 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -21,15 +21,7 @@ f"rmm{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", f"ptxcompiler{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", f"cubinlinker{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", - # We may need to account for other architectures eventually. PEP 508 does - # not appear to support an 'in list' syntax - # `platform_machine in ('arch1', 'arch2', ...) - # so we'll need to use multiple `or` conditions to support that case. - # This SO post has a pretty exhaustive list of possible platform names we - # may need to support: - # https://stackoverflow.com/questions/45125516/possible-values-for-uname-m - "cupy-cuda11x;'x86' in platform_machine", - "cupy-cuda11x @ https://pip.cupy.dev/aarch64;'aarch' in platform_machine", + "cupy-cuda11x", ] extras_require = { From 7cf69956697eef4498ef31deb9d7e360fbbb310e Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 15 Nov 2022 09:06:33 -0800 Subject: [PATCH 117/142] Debug find. --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 8a766ca0924..8f4c1fcd217 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -46,7 +46,7 @@ jobs: python-package-versioneer-override: ${{ inputs.versioneer-override }} python-package-build-tag: ${{ inputs.build-tag }} - skbuild-configure-options: "-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF" + skbuild-configure-options: "--debug-find -DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF" test-extras: test test-before-amd64: "pip install rmm-cu11 ptxcompiler-cu11 cubinlinker-cu11 --index-url https://pypi.k8s.rapids.ai/simple" From 5f1e8c64152f943078aa801333d9bd5fb9e35e19 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 15 Nov 2022 10:00:01 -0800 Subject: [PATCH 118/142] Hint properly. --- python/cudf/cudf/_lib/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index 0f48bf5b6b1..3723d4bf12c 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -64,7 +64,8 @@ rapids_cython_create_modules( # TODO: Finding NumPy currently requires finding Development due to a bug in CMake. This bug was # fixed in https://gitlab.kitware.com/cmake/cmake/-/merge_requests/7410 and will be available in # CMake 3.24, so we can remove the Development component once we upgrade to CMake 3.24. -find_package(Python REQUIRED COMPONENTS Development NumPy HINTS Python_USE_STATIC_LIBS FALSE) +set(Python_USE_STATIC_LIBS FALSE) +find_package(Python REQUIRED COMPONENTS Development NumPy) set(targets_using_numpy interop avro csv orc json parquet) foreach(target IN LISTS targets_using_numpy) target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}") From 00f36e03e273ca16584e394e35eaeac64afabe73 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 15 Nov 2022 10:48:19 -0800 Subject: [PATCH 119/142] Put back libs for now. --- .github/workflows/wheels.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 8f4c1fcd217..42a39a61c8a 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -46,7 +46,10 @@ jobs: python-package-versioneer-override: ${{ inputs.versioneer-override }} python-package-build-tag: ${{ inputs.build-tag }} - skbuild-configure-options: "--debug-find -DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF" + cibw-before-all-amd64: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + cibw-before-all-arm64: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" + + skbuild-configure-options: "-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF" test-extras: test test-before-amd64: "pip install rmm-cu11 ptxcompiler-cu11 cubinlinker-cu11 --index-url https://pypi.k8s.rapids.ai/simple" From 8ccfe53e57416b7e785335719eec13679d01602e Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 15 Nov 2022 10:48:48 -0800 Subject: [PATCH 120/142] Remove dependency installation since that should be automated with a PIP_INDEX_URL set. --- .github/workflows/wheels.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 42a39a61c8a..1bda0717845 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -52,8 +52,6 @@ jobs: skbuild-configure-options: "-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF" test-extras: test - test-before-amd64: "pip install rmm-cu11 ptxcompiler-cu11 cubinlinker-cu11 --index-url https://pypi.k8s.rapids.ai/simple" - test-before-arm64: "pip install rmm-cu11 ptxcompiler-cu11 cubinlinker-cu11 --index-url https://pypi.k8s.rapids.ai/simple" test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests" secrets: inherit dask_cudf-wheel: @@ -75,6 +73,5 @@ jobs: python-package-cuda-suffix: "-cu11" test-extras: test - test-before: "pip install rmm-cu11 cudf-cu11 --index-url https://pypi.k8s.rapids.ai/simple" test-unittest: "pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests" secrets: inherit From ce91f54528793979d7bd3e83cec806c122d82de2 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 15 Nov 2022 10:55:43 -0800 Subject: [PATCH 121/142] Remove hint. --- python/cudf/cudf/_lib/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index 3723d4bf12c..e1dea1376dc 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -64,7 +64,6 @@ rapids_cython_create_modules( # TODO: Finding NumPy currently requires finding Development due to a bug in CMake. This bug was # fixed in https://gitlab.kitware.com/cmake/cmake/-/merge_requests/7410 and will be available in # CMake 3.24, so we can remove the Development component once we upgrade to CMake 3.24. -set(Python_USE_STATIC_LIBS FALSE) find_package(Python REQUIRED COMPONENTS Development NumPy) set(targets_using_numpy interop avro csv orc json parquet) foreach(target IN LISTS targets_using_numpy) From 6e14318ef46f9679c7519739cec678164bcfe69f Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 15 Nov 2022 11:27:09 -0800 Subject: [PATCH 122/142] Try switching to FindNumpy.cmake. --- python/cudf/cudf/_lib/CMakeLists.txt | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index e1dea1376dc..e260fc77d63 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -64,10 +64,20 @@ rapids_cython_create_modules( # TODO: Finding NumPy currently requires finding Development due to a bug in CMake. This bug was # fixed in https://gitlab.kitware.com/cmake/cmake/-/merge_requests/7410 and will be available in # CMake 3.24, so we can remove the Development component once we upgrade to CMake 3.24. -find_package(Python REQUIRED COMPONENTS Development NumPy) +# find_package(Python REQUIRED COMPONENTS Development NumPy) + +# Note: The bug noted above prevents us from finding NumPy successfully using FindPython.cmake +# inside the manylinux images used to build wheels because manylinux images do not contain +# libpython.so and therefore Development cannot be found. Until we upgrade to CMake 3.24, we should +# use FindNumpy.cmake instead (provided by scikit-build). When we switch to 3.24 we can try +# switching back, but it may not work if that implicitly still requires Python libraries. In that +# case we'll need to follow up with the CMake team to remove that dependency. +find_package(NumPy REQUIRED) set(targets_using_numpy interop avro csv orc json parquet) foreach(target IN LISTS targets_using_numpy) - target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}") + target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}") + # Switch to the line below when we switch back to FindPython.cmake in CMake 3.24. + # target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}") endforeach() add_subdirectory(io) From 2ce829203a4746487991af9954fcd49ef773475b Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 15 Nov 2022 11:40:15 -0800 Subject: [PATCH 123/142] Remove libraries again. --- .github/workflows/wheels.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 1bda0717845..9144179439e 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -46,9 +46,6 @@ jobs: python-package-versioneer-override: ${{ inputs.versioneer-override }} python-package-build-tag: ${{ inputs.build-tag }} - cibw-before-all-amd64: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - cibw-before-all-arm64: "tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C /opt/_internal" - skbuild-configure-options: "-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF" test-extras: test From 572aea701fb665f086423949acd2a200b6a71dc9 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 15 Nov 2022 11:50:46 -0800 Subject: [PATCH 124/142] Extend comment about FindPython.cmake. --- python/cudf/cudf/_lib/CMakeLists.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index e260fc77d63..c6d4becdbec 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -71,7 +71,11 @@ rapids_cython_create_modules( # libpython.so and therefore Development cannot be found. Until we upgrade to CMake 3.24, we should # use FindNumpy.cmake instead (provided by scikit-build). When we switch to 3.24 we can try # switching back, but it may not work if that implicitly still requires Python libraries. In that -# case we'll need to follow up with the CMake team to remove that dependency. +# case we'll need to follow up with the CMake team to remove that dependency. The stopgap solution +# is to unpack the static lib tarballs in the wheel building jobs so that there are at least static +# libs to be found, but that should be a last resort since it implies a dependency that isn't really +# necessary. The relevant command is tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C +# /opt/_internal" find_package(NumPy REQUIRED) set(targets_using_numpy interop avro csv orc json parquet) foreach(target IN LISTS targets_using_numpy) From 21b0c297b0b629cdc03147babb696d924678e023 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 15 Nov 2022 11:50:59 -0800 Subject: [PATCH 125/142] Remove suffix specification. --- .github/workflows/wheels.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 9144179439e..20e71ac3f37 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -67,7 +67,6 @@ jobs: python-package-versioneer-override: ${{ inputs.versioneer-override }} python-package-build-tag: ${{ inputs.build-tag }} - python-package-cuda-suffix: "-cu11" test-extras: test test-unittest: "pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests" From b135b7f88da76b0b7160ffb970c81af43b2a5f6d Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 15 Nov 2022 11:59:21 -0800 Subject: [PATCH 126/142] Add cupy install to arm. --- .github/workflows/wheels.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 20e71ac3f37..736708db30c 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -49,6 +49,8 @@ jobs: skbuild-configure-options: "-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF" test-extras: test + # Have to manually specify the cupy install location on arm. + test-before-arm64: "pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests" secrets: inherit dask_cudf-wheel: From 51b653e4f0c20d1328b20e1d03cd825a7c1290a5 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 15 Nov 2022 12:40:10 -0800 Subject: [PATCH 127/142] Switch to the modern solution for dynamic dependencies. --- python/cudf/_custom_build/backend.py | 37 ++++++++++++++++++++++++++++ python/cudf/pyproject.toml | 5 +++- python/cudf/setup.py | 6 ----- 3 files changed, 41 insertions(+), 7 deletions(-) create mode 100644 python/cudf/_custom_build/backend.py diff --git a/python/cudf/_custom_build/backend.py b/python/cudf/_custom_build/backend.py new file mode 100644 index 00000000000..c2197a71a1e --- /dev/null +++ b/python/cudf/_custom_build/backend.py @@ -0,0 +1,37 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. + +"""Custom build backend for cudf to get versioned requirements. + +Based on https://setuptools.pypa.io/en/latest/build_meta.html +""" +import os +from functools import wraps + +from setuptools import build_meta as _orig + +# Alias the required bits +build_wheel = _orig.build_wheel +build_sdist = _orig.build_sdist + + +def replace_requirements(func): + @wraps(func) + def wrapper(config_settings=None): + orig_list = getattr(_orig, func.__name__)(config_settings) + append_list = [ + f"rmm{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}" + ] + return orig_list + append_list + + return wrapper + + +get_requires_for_build_wheel = replace_requirements( + _orig.get_requires_for_build_wheel +) +get_requires_for_build_sdist = replace_requirements( + _orig.get_requires_for_build_sdist +) +get_requires_for_build_editable = replace_requirements( + _orig.get_requires_for_build_editable +) diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index 912c67adfc7..1fc287e5aba 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -11,5 +11,8 @@ requires = [ "ninja", "numpy<1.23", "pyarrow==9.0.0", - "protoc-wheel" + "protoc-wheel", + "versioneer", ] +build-backend = "backend" +backend-path = ["_custom_build"] diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 2ff319c099d..553a1eb59bb 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -65,12 +65,6 @@ package_data={ key: ["*.pxd"] for key in find_packages(include=["cudf._lib*"]) }, - # TODO: We need this to be dynamic, so it doesn't work to put it into - # pyproject.toml, but setup_requires is deprecated so we need to find a - # better solution for this. - setup_requires=[ - f"rmm{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", - ], install_requires=install_requires, extras_require=extras_require, zip_safe=False, From 938c3d72d626c86d059f10da3570ed0bb91c9e9c Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Tue, 15 Nov 2022 16:53:21 -0500 Subject: [PATCH 128/142] Hardcode repo name in concurrency group --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 736708db30c..e70e91acd36 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -26,7 +26,7 @@ on: default: nightly concurrency: - group: "${{ github.repository }}-${{ github.workflow }}-${{ github.ref }}" + group: "cudf-${{ github.workflow }}-${{ github.ref }}" cancel-in-progress: true jobs: From 98f59a404fb9c700b4d29448c8f687421089281e Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Tue, 15 Nov 2022 17:59:49 -0500 Subject: [PATCH 129/142] Use feat/pure-python-wheels branch of cibw --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index e70e91acd36..174aab9fd0c 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -31,7 +31,7 @@ concurrency: jobs: cudf-wheels: - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@main + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/pure-python-wheels with: repo: rapidsai/cudf From 789a5357fbd8a0061b4a3b2a8d4d9875df1c3eff Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Tue, 15 Nov 2022 18:48:57 -0500 Subject: [PATCH 130/142] Install tokenizers==0.10.2 for aarch64 to avoid Rust compile --- .github/workflows/wheels.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 174aab9fd0c..c2d4fe399a3 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -49,8 +49,13 @@ jobs: skbuild-configure-options: "-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF" test-extras: test + # Have to manually specify the cupy install location on arm. - test-before-arm64: "pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64" + # Have to also manually install tokenizers==0.10.2, which is the last tokenizers + # to have a binary aarch64 wheel available on PyPI + # Otherwise, the tokenizers sdist is used, which needs a Rust compiler, and blah blah blah + test-before-arm64: "pip install tokenizers==0.10.2 && cupy-cuda11x -f https://pip.cupy.dev/aarch64" + test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests" secrets: inherit dask_cudf-wheel: From 4b8d62d8500bda06ae173dbc564287a8a1cccb3c Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Tue, 15 Nov 2022 19:25:19 -0500 Subject: [PATCH 131/142] Fix pip install command in test-before --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index c2d4fe399a3..6d723341166 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -54,7 +54,7 @@ jobs: # Have to also manually install tokenizers==0.10.2, which is the last tokenizers # to have a binary aarch64 wheel available on PyPI # Otherwise, the tokenizers sdist is used, which needs a Rust compiler, and blah blah blah - test-before-arm64: "pip install tokenizers==0.10.2 && cupy-cuda11x -f https://pip.cupy.dev/aarch64" + test-before-arm64: "pip install tokenizers==0.10.2 cupy-cuda11x -f https://pip.cupy.dev/aarch64" test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests" secrets: inherit From 5e1f0943a4c6e0acd32cfd9397f44eb1d2ebd9bb Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 15 Nov 2022 21:43:54 -0800 Subject: [PATCH 132/142] Fix style. --- .github/workflows/wheels.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 6d723341166..60560e30888 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -51,9 +51,9 @@ jobs: test-extras: test # Have to manually specify the cupy install location on arm. - # Have to also manually install tokenizers==0.10.2, which is the last tokenizers + # Have to also manually install tokenizers==0.10.2, which is the last tokenizers # to have a binary aarch64 wheel available on PyPI - # Otherwise, the tokenizers sdist is used, which needs a Rust compiler, and blah blah blah + # Otherwise, the tokenizers sdist is used, which needs a Rust compiler test-before-arm64: "pip install tokenizers==0.10.2 cupy-cuda11x -f https://pip.cupy.dev/aarch64" test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests" From f2e561742485054d090cd65c7a57de877a34758d Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 15 Nov 2022 22:00:30 -0800 Subject: [PATCH 133/142] Try removing numpy pin. --- python/cudf/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index 1fc287e5aba..92b86649564 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -9,7 +9,7 @@ requires = [ "scikit-build>=0.13.1", "cmake>=3.23.1", "ninja", - "numpy<1.23", + "numpy", "pyarrow==9.0.0", "protoc-wheel", "versioneer", From 5842b731a60972bfcaefcd1fb247836153314f0c Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 15 Nov 2022 22:31:00 -0800 Subject: [PATCH 134/142] Revert numba version change. --- python/cudf/setup.py | 2 +- python/dask_cudf/setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 553a1eb59bb..84c957f6c45 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -10,7 +10,7 @@ "cachetools", "cuda-python>=11.7.1,<12.0", "fsspec>=0.6.0", - "numba>=0.54", + "numba>=0.56.2", "numpy", "nvtx>=0.2.1", "packaging", diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py index 871aaaa3337..21b30512be1 100644 --- a/python/dask_cudf/setup.py +++ b/python/dask_cudf/setup.py @@ -21,7 +21,7 @@ "pandas>=1.0,<1.6.0dev0", "pytest", "pytest-xdist", - "numba>=0.54", + "numba>=0.56.2", ] } From fb2dc7acc2c1814a78eb91f76a94f7cee365fcd8 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 16 Nov 2022 09:40:47 -0800 Subject: [PATCH 135/142] Address PR reviews. --- cpp/CMakeLists.txt | 1 + python/cudf/CMakeLists.txt | 2 +- python/cudf/setup.py | 10 ++++++---- python/dask_cudf/setup.py | 6 ++++-- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d872a397dd9..79951cdabbe 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -49,6 +49,7 @@ option(BUILD_BENCHMARKS "Configure CMake to build (google & nvbench) benchmarks" option(BUILD_SHARED_LIBS "Build cuDF shared libraries" ON) option(JITIFY_USE_CACHE "Use a file cache for JIT compiled kernels" ON) option(CUDF_BUILD_TESTUTIL "Whether to build the test utilities contained in libcudf" ON) +mark_as_advanced(CUDF_BUILD_TESTUTIL) option(CUDF_USE_PROPRIETARY_NVCOMP "Download and use NVCOMP with proprietary extensions" ON) option(CUDF_USE_ARROW_STATIC "Build and statically link Arrow libraries" OFF) option(CUDF_ENABLE_ARROW_ORC "Build the Arrow ORC adapter" OFF) diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 810901d7d3b..9e5f845496e 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -31,7 +31,7 @@ project( option(FIND_CUDF_CPP "Search for existing CUDF C++ installations before defaulting to local files" OFF ) -option(CUDF_BUILD_WHEELS "Whether we're building a wheel for pypi" OFF) +option(CUDF_BUILD_WHEELS "Whether this build is generating a Python wheel." OFF) option(USE_LIBARROW_FROM_PYARROW "Use the libarrow contained within pyarrow." OFF) mark_as_advanced(USE_LIBARROW_FROM_PYARROW) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 84c957f6c45..58fb3e3d816 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -6,6 +6,8 @@ from setuptools import find_packages from skbuild import setup +cuda_suffix = os.getenv("PYTHON_PACKAGE_CUDA_SUFFIX", default="") + install_requires = [ "cachetools", "cuda-python>=11.7.1,<12.0", @@ -18,9 +20,9 @@ "protobuf>=3.20.1,<3.21.0a0", "typing_extensions", "pyarrow==9.0.0", - f"rmm{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", - f"ptxcompiler{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", - f"cubinlinker{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}", + f"rmm{cuda_suffix}", + f"ptxcompiler{cuda_suffix}", + f"cubinlinker{cuda_suffix}", "cupy-cuda11x", ] @@ -42,7 +44,7 @@ setup( - name="cudf" + os.getenv("RAPIDS_PY_WHEEL_CUDA_SUFFIX", default=""), + name=f"cudf{cuda_suffix}", version=os.getenv( "RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE", default=versioneer.get_version() ), diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py index 21b30512be1..43275cc3bdb 100644 --- a/python/dask_cudf/setup.py +++ b/python/dask_cudf/setup.py @@ -5,13 +5,15 @@ import versioneer from setuptools import find_packages, setup +cuda_suffix = os.getenv("PYTHON_PACKAGE_CUDA_SUFFIX", default="") + install_requires = [ "dask==2022.9.2", "distributed==2022.9.2", "fsspec>=0.6.0", "numpy", "pandas>=1.0,<1.6.0dev0", - f"cudf{os.getenv('RAPIDS_PY_WHEEL_CUDA_SUFFIX', default='')}", + f"cudf{cuda_suffix}", "cupy-cuda11x", ] @@ -26,7 +28,7 @@ } setup( - name="dask-cudf" + os.getenv("RAPIDS_PY_WHEEL_CUDA_SUFFIX", default=""), + name=f"dask-cudf{cuda_suffix}", version=os.getenv( "RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE", default=versioneer.get_version() ), From f638b3c0f40cfebcb3c7b87f6562353e29b01f05 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Wed, 16 Nov 2022 17:06:55 -0500 Subject: [PATCH 136/142] Use RAPIDS_PY_WHEEL_CUDA_SUFFIX instead of PYTHON_PACKAGE_CUDA_SUFFIX --- python/cudf/_custom_build/backend.py | 2 +- python/cudf/setup.py | 2 +- python/dask_cudf/setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/cudf/_custom_build/backend.py b/python/cudf/_custom_build/backend.py index c2197a71a1e..37b7edf2432 100644 --- a/python/cudf/_custom_build/backend.py +++ b/python/cudf/_custom_build/backend.py @@ -19,7 +19,7 @@ def replace_requirements(func): def wrapper(config_settings=None): orig_list = getattr(_orig, func.__name__)(config_settings) append_list = [ - f"rmm{os.getenv('PYTHON_PACKAGE_CUDA_SUFFIX', default='')}" + f"rmm{os.getenv('RAPIDS_PY_WHEEL_CUDA_SUFFIX', default='')}" ] return orig_list + append_list diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 58fb3e3d816..dea7b5f4d46 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -6,7 +6,7 @@ from setuptools import find_packages from skbuild import setup -cuda_suffix = os.getenv("PYTHON_PACKAGE_CUDA_SUFFIX", default="") +cuda_suffix = os.getenv("RAPIDS_PY_WHEEL_CUDA_SUFFIX", default="") install_requires = [ "cachetools", diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py index 43275cc3bdb..08db92e422c 100644 --- a/python/dask_cudf/setup.py +++ b/python/dask_cudf/setup.py @@ -5,7 +5,7 @@ import versioneer from setuptools import find_packages, setup -cuda_suffix = os.getenv("PYTHON_PACKAGE_CUDA_SUFFIX", default="") +cuda_suffix = os.getenv("RAPIDS_PY_WHEEL_CUDA_SUFFIX", default="") install_requires = [ "dask==2022.9.2", From dad07605db192075408056203ce22e2f209a1675 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Thu, 17 Nov 2022 10:11:57 -0500 Subject: [PATCH 137/142] Use main branch of shared-action-workflows --- .github/workflows/wheels.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 60560e30888..637d7bcaaab 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -31,7 +31,7 @@ concurrency: jobs: cudf-wheels: - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@feat/pure-python-wheels + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@main with: repo: rapidsai/cudf @@ -60,7 +60,7 @@ jobs: secrets: inherit dask_cudf-wheel: needs: cudf-wheels - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@feat/pure-python-wheels + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@main with: repo: rapidsai/cudf From cb144a5d68c2421f0210650607cef7fc54e02c8c Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Thu, 17 Nov 2022 12:19:09 -0500 Subject: [PATCH 138/142] Patch __version__ to match wheel version --- python/cudf/setup.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index dea7b5f4d46..5db95e42a17 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -42,6 +42,19 @@ ] } +if "RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE" in os.environ: + # borrow a similar hack from dask-cuda: https://github.com/rapidsai/dask-cuda/blob/b3ed9029a1ad02a61eb7fbd899a5a6826bb5cfac/setup.py#L12-L31 + orig_get_versions = versioneer.get_versions + + version_override = os.environ.get("RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE", "") + + def get_versions(): + data = orig_get_versions() + if version_override != "": + data["version"] = version_override + return data + + versioneer.get_versions = get_versions setup( name=f"cudf{cuda_suffix}", From 6088862e5cd536a9c6ce1d047c652c095e5ba0c9 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Thu, 17 Nov 2022 12:32:43 -0500 Subject: [PATCH 139/142] Apply __version__ fix to dask-cudf also --- python/dask_cudf/setup.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py index 08db92e422c..bbe90596910 100644 --- a/python/dask_cudf/setup.py +++ b/python/dask_cudf/setup.py @@ -27,6 +27,20 @@ ] } +if "RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE" in os.environ: + # borrow a similar hack from dask-cuda: https://github.com/rapidsai/dask-cuda/blob/b3ed9029a1ad02a61eb7fbd899a5a6826bb5cfac/setup.py#L12-L31 + orig_get_versions = versioneer.get_versions + + version_override = os.environ.get("RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE", "") + + def get_versions(): + data = orig_get_versions() + if version_override != "": + data["version"] = version_override + return data + + versioneer.get_versions = get_versions + setup( name=f"dask-cudf{cuda_suffix}", version=os.getenv( From e7a291fcc86007f0f79eebae63e6074186ac1fe7 Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Thu, 17 Nov 2022 14:11:48 -0500 Subject: [PATCH 140/142] Use patched versioneer.get_version --- python/cudf/setup.py | 4 +--- python/dask_cudf/setup.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 5db95e42a17..fe038d232a1 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -58,9 +58,7 @@ def get_versions(): setup( name=f"cudf{cuda_suffix}", - version=os.getenv( - "RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE", default=versioneer.get_version() - ), + version=versioneer.get_version(), description="cuDF - GPU Dataframe", url="https://github.com/rapidsai/cudf", author="NVIDIA Corporation", diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py index bbe90596910..5e3d2e0e10f 100644 --- a/python/dask_cudf/setup.py +++ b/python/dask_cudf/setup.py @@ -43,9 +43,7 @@ def get_versions(): setup( name=f"dask-cudf{cuda_suffix}", - version=os.getenv( - "RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE", default=versioneer.get_version() - ), + version=versioneer.get_version(), description="Utilities for Dask and cuDF interactions", url="https://github.com/rapidsai/cudf", author="NVIDIA Corporation", From e6d91ec2c53f1c3bce163f7b14180e8e308736ee Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Thu, 17 Nov 2022 14:45:01 -0500 Subject: [PATCH 141/142] Complain if RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE is set but empty --- python/cudf/setup.py | 6 ++---- python/dask_cudf/setup.py | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index fe038d232a1..2d5defc2849 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -43,15 +43,13 @@ } if "RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE" in os.environ: - # borrow a similar hack from dask-cuda: https://github.com/rapidsai/dask-cuda/blob/b3ed9029a1ad02a61eb7fbd899a5a6826bb5cfac/setup.py#L12-L31 orig_get_versions = versioneer.get_versions - version_override = os.environ.get("RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE", "") + version_override = os.environ["RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE"] def get_versions(): data = orig_get_versions() - if version_override != "": - data["version"] = version_override + data["version"] = version_override return data versioneer.get_versions = get_versions diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py index 5e3d2e0e10f..d9d4da9c4ab 100644 --- a/python/dask_cudf/setup.py +++ b/python/dask_cudf/setup.py @@ -28,15 +28,13 @@ } if "RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE" in os.environ: - # borrow a similar hack from dask-cuda: https://github.com/rapidsai/dask-cuda/blob/b3ed9029a1ad02a61eb7fbd899a5a6826bb5cfac/setup.py#L12-L31 orig_get_versions = versioneer.get_versions - version_override = os.environ.get("RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE", "") + version_override = os.environ["RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE"] def get_versions(): data = orig_get_versions() - if version_override != "": - data["version"] = version_override + data["version"] = version_override return data versioneer.get_versions = get_versions From 882746afa1533295d597b4f10bf01566ff59c93e Mon Sep 17 00:00:00 2001 From: Sevag Hanssian Date: Thu, 17 Nov 2022 16:26:13 -0500 Subject: [PATCH 142/142] Remove pull-request push trigger --- .github/workflows/wheels.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 637d7bcaaab..7f1c708c9a7 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -1,9 +1,6 @@ name: cuDF wheels on: - push: - branches: - - 'pull-request/[0-9]+' workflow_call: inputs: versioneer-override: @@ -35,7 +32,7 @@ jobs: with: repo: rapidsai/cudf - build-type: ${{ inputs.build-type || 'pull-request' }} + build-type: ${{ inputs.build-type }} branch: ${{ inputs.branch }} sha: ${{ inputs.sha }} date: ${{ inputs.date }} @@ -64,7 +61,7 @@ jobs: with: repo: rapidsai/cudf - build-type: ${{ inputs.build-type || 'pull-request' }} + build-type: ${{ inputs.build-type }} branch: ${{ inputs.branch }} sha: ${{ inputs.sha }} date: ${{ inputs.date }}