diff --git a/ci/checks/copyright.py b/ci/checks/copyright.py index ef88183754d..2440e61cb15 100644 --- a/ci/checks/copyright.py +++ b/ci/checks/copyright.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,7 +18,17 @@ import argparse import io import os -import git_helpers +import sys + +SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.expanduser(__file__))) + +# Add the scripts dir for gitutils +sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, + "../../cpp/scripts"))) + +# Now import gitutils. Ignore flake8 error here since there is no other way to +# set up imports +import gitutils # noqa: E402 FilesToCheck = [ re.compile(r"[.](cmake|cpp|cu|cuh|h|hpp|sh|pxd|py|pyx)$"), @@ -28,22 +38,25 @@ re.compile(r"[.]flake8[.]cython$"), re.compile(r"meta[.]yaml$") ] +ExemptFiles = [] # this will break starting at year 10000, which is probably OK :) -CheckSimple = re.compile(r"Copyright \(c\) (\d{4}), NVIDIA CORPORATION") +CheckSimple = re.compile( + r"Copyright *(?:\(c\))? *(\d{4}),? *NVIDIA C(?:ORPORATION|orporation)") CheckDouble = re.compile( - r"Copyright \(c\) (\d{4})-(\d{4}), NVIDIA CORPORATION") + r"Copyright *(?:\(c\))? *(\d{4})-(\d{4}),? *NVIDIA C(?:ORPORATION|orporation)" # noqa: E501 +) def checkThisFile(f): # This check covers things like symlinks which point to files that DNE - if not(os.path.exists(f)): + if not (os.path.exists(f)): return False - if git_helpers and git_helpers.isFileEmpty(f): - return False - # Special case for versioneer.py - it uses a separate copyright. - if os.path.basename(f) == "versioneer.py": + if gitutils and gitutils.isFileEmpty(f): return False + for exempt in ExemptFiles: + if exempt.search(f): + return False for checker in FilesToCheck: if checker.search(f): return True @@ -87,12 +100,22 @@ def checkCopyright(f, update_current_year): continue crFound = True if start > end: - e = [f, lineNum, "First year after second year in the copyright " - "header (manual fix required)", None] + e = [ + f, + lineNum, + "First year after second year in the copyright " + "header (manual fix required)", + None + ] errs.append(e) if thisYear < start or thisYear > end: - e = [f, lineNum, "Current year not included in the " - "copyright header", None] + e = [ + f, + lineNum, + "Current year not included in the " + "copyright header", + None + ] if thisYear < start: e[-1] = replaceCurrentYear(line, thisYear, end) if thisYear > end: @@ -103,8 +126,13 @@ def checkCopyright(f, update_current_year): fp.close() # copyright header itself not found if not crFound: - e = [f, 0, "Copyright header missing or formatted incorrectly " - "(manual fix required)", None] + e = [ + f, + 0, + "Copyright header missing or formatted incorrectly " + "(manual fix required)", + None + ] errs.append(e) # even if the year matches a copyright header, make the check pass if yearMatched: @@ -125,7 +153,6 @@ def checkCopyright(f, update_current_year): return errs - def getAllFilesUnderDir(root, pathFilter=None): retList = [] for (dirpath, dirnames, filenames) in os.walk(root): @@ -143,25 +170,47 @@ def checkCopyright_main(): it compares between branches "$PR_TARGET_BRANCH" and "current-pr-branch" """ retVal = 0 + global ExemptFiles argparser = argparse.ArgumentParser( - description="Checks for a consistent copyright header") - argparser.add_argument("--update-current-year", dest='update_current_year', - action="store_true", required=False, help="If set, " - "update the current year if a header is already " - "present and well formatted.") - argparser.add_argument("--git-modified-only", dest='git_modified_only', - action="store_true", required=False, help="If set, " - "only files seen as modified by git will be " - "processed.") + "Checks for a consistent copyright header in git's modified files") + argparser.add_argument("--update-current-year", + dest='update_current_year', + action="store_true", + required=False, + help="If set, " + "update the current year if a header " + "is already present and well formatted.") + argparser.add_argument("--git-modified-only", + dest='git_modified_only', + action="store_true", + required=False, + help="If set, " + "only files seen as modified by git will be " + "processed.") + argparser.add_argument("--exclude", + dest='exclude', + action="append", + required=False, + default=["python/cuml/_thirdparty/"], + help=("Exclude the paths specified (regexp). " + "Can be specified multiple times.")) (args, dirs) = argparser.parse_known_args() + try: + ExemptFiles = ExemptFiles + [pathName for pathName in args.exclude] + ExemptFiles = [re.compile(file) for file in ExemptFiles] + except re.error as reException: + print("Regular expression error:") + print(reException) + return 1 + if args.git_modified_only: - files = git_helpers.modifiedFiles(pathFilter=checkThisFile) + files = gitutils.modifiedFiles(pathFilter=checkThisFile) else: files = [] for d in [os.path.abspath(d) for d in dirs]: - if not(os.path.isdir(d)): + if not (os.path.isdir(d)): raise ValueError(f"{d} is not a directory.") files += getAllFilesUnderDir(d, pathFilter=checkThisFile) @@ -178,8 +227,9 @@ def checkCopyright_main(): path_parts = os.path.abspath(__file__).split(os.sep) file_from_repo = os.sep.join(path_parts[path_parts.index("ci"):]) if n_fixable > 0: - print("You can run {} --update-current-year to fix {} of these " - "errors.\n".format(file_from_repo, n_fixable)) + print(("You can run `python {} --git-modified-only " + "--update-current-year` to fix {} of these " + "errors.\n").format(file_from_repo, n_fixable)) retVal = 1 else: print("Copyright check passed") diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml index 2bcc41daccf..c95e53ff8b3 100644 --- a/conda/recipes/libcugraph/meta.yaml +++ b/conda/recipes/libcugraph/meta.yaml @@ -49,6 +49,7 @@ requirements: run: - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }} - libraft-headers {{ minor_version }} + - librmm {{ minor_version }} - nccl>=2.9.9 - ucx-proc=*=gpu - libcugraphops {{ minor_version }}.* diff --git a/conda/recipes/libcugraph_etl/meta.yaml b/conda/recipes/libcugraph_etl/meta.yaml index ea4e0a4cdae..d039f30fb4a 100644 --- a/conda/recipes/libcugraph_etl/meta.yaml +++ b/conda/recipes/libcugraph_etl/meta.yaml @@ -45,6 +45,7 @@ requirements: - libcudf {{ minor_version }}.* - libcugraph {{ minor_version }}.* - libraft-headers {{ minor_version }} + - librmm {{ minor_version }} about: home: http://rapids.ai/ diff --git a/cpp/cmake/thirdparty/get_libcugraphops.cmake b/cpp/cmake/thirdparty/get_libcugraphops.cmake index f2abfb7f4fd..f490652fc8e 100644 --- a/cpp/cmake/thirdparty/get_libcugraphops.cmake +++ b/cpp/cmake/thirdparty/get_libcugraphops.cmake @@ -21,7 +21,7 @@ function(find_and_configure_cugraphops) endif() rapids_find_generate_module(cugraphops - HEADER_NAMES graph/sampling.h + HEADER_NAMES graph/sampling.hpp LIBRARY_NAMES cugraph-ops++ INCLUDE_SUFFIXES cugraph-ops BUILD_EXPORT_SET cugraph-exports diff --git a/cpp/include/cugraph/visitors/enum_mapping.hpp b/cpp/include/cugraph/visitors/enum_mapping.hpp old mode 100755 new mode 100644 index ab72f87bcab..99b8d9eeb3b --- a/cpp/include/cugraph/visitors/enum_mapping.hpp +++ b/cpp/include/cugraph/visitors/enum_mapping.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/cugraph/visitors/graph_enum.hpp b/cpp/include/cugraph/visitors/graph_enum.hpp old mode 100755 new mode 100644 index eca53035313..1e3a6901af7 --- a/cpp/include/cugraph/visitors/graph_enum.hpp +++ b/cpp/include/cugraph/visitors/graph_enum.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/cugraph/visitors/graph_envelope.hpp b/cpp/include/cugraph/visitors/graph_envelope.hpp old mode 100755 new mode 100644 index 39f647ff66c..293d52b4045 --- a/cpp/include/cugraph/visitors/graph_envelope.hpp +++ b/cpp/include/cugraph/visitors/graph_envelope.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/scripts/gitutils.py b/cpp/scripts/gitutils.py new file mode 100644 index 00000000000..4e30f8fcb03 --- /dev/null +++ b/cpp/scripts/gitutils.py @@ -0,0 +1,286 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import subprocess +import os +import re + + +def isFileEmpty(f): + return os.stat(f).st_size == 0 + + +def __git(*opts): + """Runs a git command and returns its output""" + cmd = "git " + " ".join(list(opts)) + ret = subprocess.check_output(cmd, shell=True) + return ret.decode("UTF-8").rstrip("\n") + + +def __gitdiff(*opts): + """Runs a git diff command with no pager set""" + return __git("--no-pager", "diff", *opts) + + +def branch(): + """Returns the name of the current branch""" + name = __git("rev-parse", "--abbrev-ref", "HEAD") + name = name.rstrip() + return name + + +def repo_version(): + """ + Determines the version of the repo by using `git describe` + + Returns + ------- + str + The full version of the repo in the format 'v#.#.#{a|b|rc}' + """ + return __git("describe", "--tags", "--abbrev=0") + + +def repo_version_major_minor(): + """ + Determines the version of the repo using `git describe` and returns only + the major and minor portion + + Returns + ------- + str + The partial version of the repo in the format '{major}.{minor}' + """ + + full_repo_version = repo_version() + + match = re.match(r"^v?(?P[0-9]+)(?:\.(?P[0-9]+))?", + full_repo_version) + + if (match is None): + print(" [DEBUG] Could not determine repo major minor version. " + f"Full repo version: {full_repo_version}.") + return None + + out_version = match.group("major") + + if (match.group("minor")): + out_version += "." + match.group("minor") + + return out_version + + +def determine_merge_commit(current_branch="HEAD"): + """ + When running outside of CI, this will estimate the target merge commit hash + of `current_branch` by finding a common ancester with the remote branch + 'branch-{major}.{minor}' where {major} and {minor} are determined from the + repo version. + + Parameters + ---------- + current_branch : str, optional + Which branch to consider as the current branch, by default "HEAD" + + Returns + ------- + str + The common commit hash ID + """ + + try: + # Try to determine the target branch from the most recent tag + head_branch = __git("describe", + "--all", + "--tags", + "--match='branch-*'", + "--abbrev=0") + except subprocess.CalledProcessError: + print(" [DEBUG] Could not determine target branch from most recent " + "tag. Falling back to 'branch-{major}.{minor}.") + head_branch = None + + if (head_branch is not None): + # Convert from head to branch name + head_branch = __git("name-rev", "--name-only", head_branch) + else: + # Try and guess the target branch as "branch-." + version = repo_version_major_minor() + + if (version is None): + return None + + head_branch = "branch-{}".format(version) + + try: + # Now get the remote tracking branch + remote_branch = __git("rev-parse", + "--abbrev-ref", + "--symbolic-full-name", + head_branch + "@{upstream}") + except subprocess.CalledProcessError: + print(" [DEBUG] Could not remote tracking reference for " + f"branch {head_branch}.") + remote_branch = None + + if (remote_branch is None): + return None + + print(f" [DEBUG] Determined TARGET_BRANCH as: '{remote_branch}'. " + "Finding common ancestor.") + + common_commit = __git("merge-base", remote_branch, current_branch) + + return common_commit + + +def uncommittedFiles(): + """ + Returns a list of all changed files that are not yet committed. This + means both untracked/unstaged as well as uncommitted files too. + """ + files = __git("status", "-u", "-s") + ret = [] + for f in files.splitlines(): + f = f.strip(" ") + f = re.sub("\s+", " ", f) # noqa: W605 + tmp = f.split(" ", 1) + # only consider staged files or uncommitted files + # in other words, ignore untracked files + if tmp[0] == "M" or tmp[0] == "A": + ret.append(tmp[1]) + return ret + + +def changedFilesBetween(baseName, branchName, commitHash): + """ + Returns a list of files changed between branches baseName and latest commit + of branchName. + """ + current = branch() + # checkout "base" branch + __git("checkout", "--force", baseName) + # checkout branch for comparing + __git("checkout", "--force", branchName) + # checkout latest commit from branch + __git("checkout", "-fq", commitHash) + + files = __gitdiff("--name-only", + "--ignore-submodules", + f"{baseName}..{branchName}") + + # restore the original branch + __git("checkout", "--force", current) + return files.splitlines() + + +def changesInFileBetween(file, b1, b2, filter=None): + """Filters the changed lines to a file between the branches b1 and b2""" + current = branch() + __git("checkout", "--quiet", b1) + __git("checkout", "--quiet", b2) + diffs = __gitdiff("--ignore-submodules", + "-w", + "--minimal", + "-U0", + "%s...%s" % (b1, b2), + "--", + file) + __git("checkout", "--quiet", current) + lines = [] + for line in diffs.splitlines(): + if filter is None or filter(line): + lines.append(line) + return lines + + +def modifiedFiles(pathFilter=None): + """ + If inside a CI-env (ie. TARGET_BRANCH and COMMIT_HASH are defined, and + current branch is "current-pr-branch"), then lists out all files modified + between these 2 branches. Locally, TARGET_BRANCH will try to be determined + from the current repo version and finding a coresponding branch named + 'branch-{major}.{minor}'. If this fails, this functino will list out all + the uncommitted files in the current branch. + + Such utility function is helpful while putting checker scripts as part of + cmake, as well as CI process. This way, during development, only the files + touched (but not yet committed) by devs can be checked. But, during the CI + process ALL files modified by the dev, as submiited in the PR, will be + checked. This happens, all the while using the same script. + """ + targetBranch = os.environ.get("TARGET_BRANCH") + commitHash = os.environ.get("COMMIT_HASH") + currentBranch = branch() + print( + f" [DEBUG] TARGET_BRANCH={targetBranch}, COMMIT_HASH={commitHash}, " + f"currentBranch={currentBranch}") + + if targetBranch and commitHash and (currentBranch == "current-pr-branch"): + print(" [DEBUG] Assuming a CI environment.") + allFiles = changedFilesBetween(targetBranch, currentBranch, commitHash) + else: + print(" [DEBUG] Did not detect CI environment. " + "Determining TARGET_BRANCH locally.") + + common_commit = determine_merge_commit(currentBranch) + + if (common_commit is not None): + + # Now get the diff. Use --staged to get both diff between + # common_commit..HEAD and any locally staged files + allFiles = __gitdiff("--name-only", + "--ignore-submodules", + "--staged", + f"{common_commit}").splitlines() + else: + # Fallback to just uncommitted files + allFiles = uncommittedFiles() + + files = [] + for f in allFiles: + if pathFilter is None or pathFilter(f): + files.append(f) + + filesToCheckString = "\n\t".join(files) if files else "" + print(f" [DEBUG] Found files to check:\n\t{filesToCheckString}\n") + return files + + +def listAllFilesInDir(folder): + """Utility function to list all files/subdirs in the input folder""" + allFiles = [] + for root, dirs, files in os.walk(folder): + for name in files: + allFiles.append(os.path.join(root, name)) + return allFiles + + +def listFilesToCheck(filesDirs, filter=None): + """ + Utility function to filter the input list of files/dirs based on the input + filter method and returns all the files that need to be checked + """ + allFiles = [] + for f in filesDirs: + if os.path.isfile(f): + if filter is None or filter(f): + allFiles.append(f) + elif os.path.isdir(f): + files = listAllFilesInDir(f) + for f_ in files: + if filter is None or filter(f_): + allFiles.append(f_) + return allFiles diff --git a/cpp/src/community/legacy/spectral_clustering.cu b/cpp/src/community/legacy/spectral_clustering.cu index e6d026ad22a..85d331c258e 100644 --- a/cpp/src/community/legacy/spectral_clustering.cu +++ b/cpp/src/community/legacy/spectral_clustering.cu @@ -30,8 +30,8 @@ #include #include -#include -#include +#include +#include namespace cugraph { diff --git a/cpp/src/detail/utility_wrappers.cu b/cpp/src/detail/utility_wrappers.cu index 85ccbcad3bc..64d7331b004 100644 --- a/cpp/src/detail/utility_wrappers.cu +++ b/cpp/src/detail/utility_wrappers.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,7 +15,7 @@ */ #include -#include +#include #include #include diff --git a/cpp/src/linear_assignment/hungarian.cu b/cpp/src/linear_assignment/hungarian.cu index dc7c3bdc38c..ac275f47a7d 100644 --- a/cpp/src/linear_assignment/hungarian.cu +++ b/cpp/src/linear_assignment/hungarian.cu @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/src/sampling/rw_traversals.hpp b/cpp/src/sampling/rw_traversals.hpp index 40503b0dd2e..9061866d692 100644 --- a/cpp/src/sampling/rw_traversals.hpp +++ b/cpp/src/sampling/rw_traversals.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,7 +29,7 @@ #include #include -#include +#include #include diff --git a/cpp/src/visitors/graph_envelope.cpp b/cpp/src/visitors/graph_envelope.cpp old mode 100755 new mode 100644 index 927c5060b1e..a93119124e5 --- a/cpp/src/visitors/graph_envelope.cpp +++ b/cpp/src/visitors/graph_envelope.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/tests/sampling/random_walks_profiling.cu b/cpp/tests/sampling/random_walks_profiling.cu index 455fba2876a..3bc107230b9 100644 --- a/cpp/tests/sampling/random_walks_profiling.cu +++ b/cpp/tests/sampling/random_walks_profiling.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,7 +22,7 @@ #include #include -#include +#include #include diff --git a/cpp/tests/sampling/random_walks_test.cu b/cpp/tests/sampling/random_walks_test.cu index 33fd67cfc44..dd2fc3c9857 100644 --- a/cpp/tests/sampling/random_walks_test.cu +++ b/cpp/tests/sampling/random_walks_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,7 +27,7 @@ #include #include -#include +#include #include "random_walks_utils.cuh" diff --git a/cpp/tests/sampling/rw_low_level_test.cu b/cpp/tests/sampling/rw_low_level_test.cu index a0a7b2b1d79..c178df17272 100644 --- a/cpp/tests/sampling/rw_low_level_test.cu +++ b/cpp/tests/sampling/rw_low_level_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,7 +28,7 @@ #include #include -#include +#include #include "random_walks_utils.cuh"