diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml index 0d8e6d653428..78a17d3f705a 100644 --- a/.github/workflows/python_tests.yml +++ b/.github/workflows/python_tests.yml @@ -65,7 +65,7 @@ jobs: run: | cd python-package python --version - python setup.py sdist + python -m build --sdist pip install -v ./dist/xgboost-*.tar.gz cd .. python -c 'import xgboost' @@ -92,6 +92,9 @@ jobs: auto-update-conda: true python-version: ${{ matrix.python-version }} activate-environment: test + - name: Install build + run: | + conda install -c conda-forge python-build - name: Display Conda env run: | conda info @@ -100,7 +103,7 @@ jobs: run: | cd python-package python --version - python setup.py sdist + python -m build --sdist pip install -v ./dist/xgboost-*.tar.gz cd .. python -c 'import xgboost' @@ -147,7 +150,7 @@ jobs: run: | cd python-package python --version - python setup.py install + pip install -v . - name: Test Python package run: | @@ -194,7 +197,7 @@ jobs: run: | cd python-package python --version - python setup.py bdist_wheel --universal + pip wheel -v . --wheel-dir dist/ pip install ./dist/*.whl - name: Test Python package @@ -238,7 +241,7 @@ jobs: run: | cd python-package python --version - python setup.py install + pip install -v . - name: Test Python package run: | diff --git a/CMakeLists.txt b/CMakeLists.txt index e5a61c60b082..7953a10dd990 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -47,6 +47,7 @@ option(USE_NVTX "Build with cuda profiling annotations. Developers only." OFF) set(NVTX_HEADER_DIR "" CACHE PATH "Path to the stand-alone nvtx header") option(RABIT_MOCK "Build rabit with mock" OFF) option(HIDE_CXX_SYMBOLS "Build shared library and hide all C++ symbols" OFF) +option(KEEP_BUILD_ARTIFACTS_IN_BINARY_DIR "Output build artifacts in CMake binary dir" OFF) ## CUDA option(USE_CUDA "Build with GPU acceleration" OFF) option(USE_NCCL "Build with NCCL to enable distributed GPU support." OFF) @@ -268,8 +269,13 @@ if (JVM_BINDINGS) xgboost_target_defs(xgboost4j) endif (JVM_BINDINGS) -set_output_directory(runxgboost ${xgboost_SOURCE_DIR}) -set_output_directory(xgboost ${xgboost_SOURCE_DIR}/lib) +if (KEEP_BUILD_ARTIFACTS_IN_BINARY_DIR) + set_output_directory(runxgboost ${xgboost_BINARY_DIR}) + set_output_directory(xgboost ${xgboost_BINARY_DIR}/lib) +else () + set_output_directory(runxgboost ${xgboost_SOURCE_DIR}) + set_output_directory(xgboost ${xgboost_SOURCE_DIR}/lib) +endif () # Ensure these two targets do not build simultaneously, as they produce outputs with conflicting names add_dependencies(xgboost runxgboost) diff --git a/dev/release-artifacts.py b/dev/release-artifacts.py index 18c317a915e4..eab64ff0c9a3 100644 --- a/dev/release-artifacts.py +++ b/dev/release-artifacts.py @@ -105,7 +105,7 @@ def make_pysrc_wheel(release: str, outdir: str) -> None: os.mkdir(dist) with DirectoryExcursion(os.path.join(ROOT, "python-package")): - subprocess.check_call(["python", "setup.py", "sdist"]) + subprocess.check_call(["python", "-m", "build", "--sdist"]) src = os.path.join(DIST, f"xgboost-{release}.tar.gz") subprocess.check_call(["twine", "check", src]) shutil.move(src, os.path.join(dist, f"xgboost-{release}.tar.gz")) diff --git a/doc/build.rst b/doc/build.rst index 53d9a3209dc4..e78d2d2f464e 100644 --- a/doc/build.rst +++ b/doc/build.rst @@ -12,6 +12,7 @@ systems. If the instructions do not work for you, please feel free to ask quest Consider installing XGBoost from a pre-built binary, to avoid the trouble of building XGBoost from the source. Checkout :doc:`Installation Guide `. .. contents:: Contents + :local: .. _get_source: @@ -152,11 +153,11 @@ On Windows, run CMake as follows: mkdir build cd build - cmake .. -G"Visual Studio 14 2015 Win64" -DUSE_CUDA=ON + cmake .. -G"Visual Studio 17 2022" -A x64 -DUSE_CUDA=ON (Change the ``-G`` option appropriately if you have a different version of Visual Studio installed.) -The above cmake configuration run will create an ``xgboost.sln`` solution file in the build directory. Build this solution in release mode as a x64 build, either from Visual studio or from command line: +The above cmake configuration run will create an ``xgboost.sln`` solution file in the build directory. Build this solution in Release mode, either from Visual studio or from command line: .. code-block:: bash @@ -176,110 +177,103 @@ Building Python Package with Default Toolchains =============================================== There are several ways to build and install the package from source: -1. Use Python setuptools directly +1. Build C++ core with CMake first - The XGBoost Python package supports most of the setuptools commands, here is a list of tested commands: + You can first build C++ library using CMake as described in :ref:`build_shared_lib`. + After compilation, a shared library will appear in ``lib/`` directory. + On Linux distributions, the shared library is ``lib/libxgboost.so``. + The install script ``pip install .`` will reuse the shared library instead of compiling + it from scratch, making it quite fast to run. - .. code-block:: bash - - python setup.py install # Install the XGBoost to your current Python environment. - python setup.py build # Build the Python package. - python setup.py build_ext # Build only the C++ core. - python setup.py sdist # Create a source distribution - python setup.py bdist # Create a binary distribution - python setup.py bdist_wheel # Create a binary distribution with wheel format - - Running ``python setup.py install`` will compile XGBoost using default CMake flags. For - passing additional compilation options, append the flags to the command. For example, - to enable CUDA acceleration and NCCL (distributed GPU) support: - - .. code-block:: bash + .. code-block:: console - python setup.py install --use-cuda --use-nccl + $ cd python-package/ + $ pip install . # Will re-use lib/libxgboost.so - Please refer to ``setup.py`` for a complete list of available options. Some other - options used for development are only available for using CMake directly. See next - section on how to use CMake with setuptools manually. +2. Install the Python package directly - You can install the created distribution packages using pip. For example, after running - ``sdist`` setuptools command, a tar ball similar to ``xgboost-1.0.0.tar.gz`` will be - created under the ``dist`` directory. Then you can install it by invoking the following - command under ``dist`` directory: + You can navigate to ``python-package/`` directory and install the Python package directly + by running - .. code-block:: bash + .. code-block:: console - # under python-package directory - cd dist - pip install ./xgboost-1.0.0.tar.gz + $ cd python-package/ + $ pip install -v . + which will compile XGBoost's native (C++) code using default CMake flags. + To enable additional compilation options, pass corresponding ``--config-settings``: - For details about these commands, please refer to the official document of `setuptools - `_, or just Google "how to install Python - package from source". XGBoost Python package follows the general convention. - Setuptools is usually available with your Python distribution, if not you can install it - via system command. For example on Debian or Ubuntu: + .. code-block:: console - .. code-block:: bash + $ pip install -v . --config-settings use_cuda=True --config-settings use_nccl=True - sudo apt-get install python-setuptools + Use Pip 22.1 or later to use ``--config-settings`` option. + Here are the available options for ``--config-settings``: - For cleaning up the directory after running above commands, ``python setup.py clean`` is - not sufficient. After copying out the build result, simply running ``git clean -xdf`` - under ``python-package`` is an efficient way to remove generated cache files. If you - find weird behaviors in Python build or running linter, it might be caused by those - cached files. + .. literalinclude:: ../python-package/packager/build_config.py + :language: python + :start-at: @dataclasses.dataclass + :end-before: def _set_config_setting( - For using develop command (editable installation), see next section. + ``use_system_libxgboost`` is a special option. See Item 4 below for + detailed description. - .. code-block:: + .. note:: Verbose flag recommended - python setup.py develop # Create a editable installation. - pip install -e . # Same as above, but carried out by pip. + As ``pip install .`` will build C++ code, it will take a while to complete. + To ensure that the build is progressing successfully, we suggest that + you add the verbose flag (``-v``) when invoking ``pip install``. -2. Build C++ core with CMake first +3. Editable installation - This is mostly for C++ developers who don't want to go through the hooks in Python - setuptools. You can build C++ library directly using CMake as described in above - sections. After compilation, a shared object (or called dynamic linked library, jargon - depending on your platform) will appear in XGBoost's source tree under ``lib/`` - directory. On Linux distributions it's ``lib/libxgboost.so``. From there all Python - setuptools commands will reuse that shared object instead of compiling it again. This - is especially convenient if you are using the editable installation, where the installed - package is simply a link to the source tree. We can perform rapid testing during - development. Here is a simple bash script does that: + To further enable rapid development and iteration, we provide an **editable installation**. + In an editable installation, the installed package is simply a symbolic link to your + working copy of the XGBoost source code. So every changes you make to your source + directory will be immediately visible to the Python interpreter. Here is how to + install XGBoost as editable installation: .. code-block:: bash - # Under xgboost source tree. + # Under xgboost source directory mkdir build cd build - cmake .. - make -j$(nproc) + # Build shared library libxgboost.so + cmake .. -GNinja + ninja + # Install as editable installation cd ../python-package - pip install -e . # or equivalently python setup.py develop + pip install -e . + +4. Use ``libxgboost.so`` on system path. -3. Use ``libxgboost.so`` on system path. + This option is useful for package managers that wish to separately package + ``libxgboost.so`` and the XGBoost Python package. For example, Conda + publishes ``libxgboost`` (for the shared library) and ``py-xgboost`` + (for the Python package). - This is for distributing xgboost in a language independent manner, where - ``libxgboost.so`` is separately packaged with Python package. Assuming `libxgboost.so` - is already presented in system library path, which can be queried via: + To use this option, first make sure that ``libxgboost.so`` exists in the system library path: .. code-block:: python import sys - import os - os.path.join(sys.prefix, 'lib') + import pathlib + libpath = pathlib.Path(sys.prefix).joinpath("lib", "libxgboost.so") + assert libpath.exists() - Then one only needs to provide an user option when installing Python package to reuse the - shared object in system path: + Then pass ``use_system_libxgboost=True`` option to ``pip install``: .. code-block:: bash - cd xgboost/python-package - python setup.py install --use-system-libxgboost + cd python-package + pip install . --config-settings use_system_libxgboost=True + + +.. note:: + See :doc:`contrib/python_packaging` for instructions on packaging + and distributing XGBoost as Python distributions. .. _python_mingw: @@ -297,7 +291,7 @@ So you may want to build XGBoost with GCC own your own risk. This presents some 2. ``-O3`` is OK. 3. ``-mtune=native`` is also OK. 4. Don't use ``-march=native`` gcc flag. Using it causes the Python interpreter to crash if the DLL was actually used. -5. You may need to provide the lib with the runtime libs. If ``mingw32/bin`` is not in ``PATH``, build a wheel (``python setup.py bdist_wheel``), open it with an archiver and put the needed dlls to the directory where ``xgboost.dll`` is situated. Then you can install the wheel with ``pip``. +5. You may need to provide the lib with the runtime libs. If ``mingw32/bin`` is not in ``PATH``, build a wheel (``pip wheel``), open it with an archiver and put the needed dlls to the directory where ``xgboost.dll`` is situated. Then you can install the wheel with ``pip``. ****************************** Building R Package From Source diff --git a/doc/contrib/ci.rst b/doc/contrib/ci.rst index 6073e646ad99..76e06de352fa 100644 --- a/doc/contrib/ci.rst +++ b/doc/contrib/ci.rst @@ -35,8 +35,9 @@ calls ``cibuildwheel`` to build the wheel. The ``cibuildwheel`` is a library tha suitable Python environment for each OS and processor target. Since we don't have Apple Silion machine in GitHub Actions, cross-compilation is needed; ``cibuildwheel`` takes care of the complex task of cross-compiling a Python wheel. (Note that ``cibuildwheel`` will call -``setup.py bdist_wheel``. Since XGBoost has a native library component, ``setup.py`` contains -a glue code to call CMake and a C++ compiler to build the native library on the fly.) +``pip wheel``. Since XGBoost has a native library component, we created a customized build +backend that hooks into ``pip``. The customized backend contains the glue code to compile the native +library on the fly.) ********************************************************* Reproduce CI testing environments using Docker containers diff --git a/doc/contrib/index.rst b/doc/contrib/index.rst index c9c5f93a2e8b..6a36cb1086c1 100644 --- a/doc/contrib/index.rst +++ b/doc/contrib/index.rst @@ -23,6 +23,7 @@ Here are guidelines for contributing to various aspect of the XGBoost project: Community Guideline donate coding_guide + python_packaging unit_tests Docs and Examples git_guide diff --git a/doc/contrib/python_packaging.rst b/doc/contrib/python_packaging.rst new file mode 100644 index 000000000000..5cf0856851d9 --- /dev/null +++ b/doc/contrib/python_packaging.rst @@ -0,0 +1,83 @@ +########################################### +Notes on packaging XGBoost's Python package +########################################### + + +.. contents:: Contents + :local: + +.. _packaging_python_xgboost: + +*************************************************** +How to build binary wheels and source distributions +*************************************************** + +Wheels and source distributions (sdist for short) are the two main +mechanisms for packaging and distributing Python packages. + +* A **source distribution** (sdist) is a tarball (``.tar.gz`` extension) that + contains the source code. +* A **wheel** is a ZIP-compressed archive (with ``.whl`` extension) + representing a *built* distribution. Unlike an sdist, a wheel can contain + compiled components. The compiled components are compiled prior to distribution, + making it more convenient for end-users to install a wheel. Wheels containing + compiled components are referred to as **binary wheels**. + +See `Python Packaging User Guide `_ +to learn more about how Python packages in general are packaged and +distributed. + +For the remainder of this document, we will focus on packaging and +distributing XGBoost. + +Building sdists +=============== + +In the case of XGBoost, an sdist contains both the Python code as well as +the C++ code, so that the core part of XGBoost can be compiled into the +shared libary ``libxgboost.so`` [#shared_lib_name]_. + +You can obtain an sdist as follows: + +.. code-block:: console + + $ python -m build --sdist . + +(You'll need to install the ``build`` package first: +``pip install build`` or ``conda install python-build``.) + +Running ``pip install`` with an sdist will launch CMake and a C++ compiler +to compile the bundled C++ code into ``libxgboost.so``: + +.. code-block:: console + + $ pip install -v xgboost-2.0.0.tar.gz # Add -v to show build progress + +Building binary wheels +====================== + +You can also build a wheel as follows: + +.. code-block:: console + + $ pip wheel --no-deps -v . + +Notably, the resulting wheel contains a copy of the shared library +``libxgboost.so`` [#shared_lib_name]_. The wheel is a **binary wheel**, +since it contains a compiled binary. + + +Running ``pip install`` with the binary wheel will extract the content of +the wheel into the current Python environment. Since the wheel already +contains a pre-built copy of ``libxgboost.so``, it does not have to be +built at the time of install. So ``pip install`` with the binary wheel +completes quickly: + +.. code-block:: console + + $ pip install xgboost-2.0.0-py3-none-linux_x86_64.whl # Completes quickly + +.. rubric:: Footnotes + +.. [#shared_lib_name] The name of the shared library file will differ + depending on the operating system in use. See :ref:`build_shared_lib`. diff --git a/doc/install.rst b/doc/install.rst index 03daf465f605..0e155f647731 100644 --- a/doc/install.rst +++ b/doc/install.rst @@ -16,15 +16,28 @@ Stable Release Python ------ -Pre-built binary are uploaded to PyPI (Python Package Index) for each release. Supported platforms are Linux (x86_64, aarch64), Windows (x86_64) and MacOS (x86_64, Apple Silicon). +Pre-built binary wheels are uploaded to PyPI (Python Package Index) for each release. Supported platforms are Linux (x86_64, aarch64), Windows (x86_64) and MacOS (x86_64, Apple Silicon). .. code-block:: bash + # Pip 21.3+ is required pip install xgboost You might need to run the command with ``--user`` flag or use ``virtualenv`` if you run -into permission errors. Python pre-built binary capability for each platform: +into permission errors. + +.. note:: Windows users need to install Visual C++ Redistributable + + XGBoost requires DLLs from `Visual C++ Redistributable + `_ + in order to function, so make sure to install it. Exception: If + you have Visual Studio installed, you already have access to + necessary libraries and thus don't need to install Visual C++ + Redistributable. + + +Capabilities of binary wheels for each platform: .. |tick| unicode:: U+2714 .. |cross| unicode:: U+2718 diff --git a/plugin/federated/README.md b/plugin/federated/README.md index d83db6be1ee2..631c44cee26f 100644 --- a/plugin/federated/README.md +++ b/plugin/federated/README.md @@ -19,7 +19,7 @@ cmake .. -GNinja \ -DUSE_NCCL=ON ninja cd ../python-package -pip install -e . # or equivalently python setup.py develop +pip install -e . ``` If CMake fails to locate gRPC, you may need to pass `-DCMAKE_PREFIX_PATH=` to CMake. diff --git a/python-package/MANIFEST.in b/python-package/MANIFEST.in deleted file mode 100644 index 23f2684c2ac4..000000000000 --- a/python-package/MANIFEST.in +++ /dev/null @@ -1,56 +0,0 @@ -include README.rst -include xgboost/LICENSE -include xgboost/VERSION -include xgboost/CMakeLists.txt - -include xgboost/py.typed -recursive-include xgboost *.py -recursive-include xgboost/cmake * -exclude xgboost/cmake/RPackageInstall.cmake.in -exclude xgboost/cmake/RPackageInstallTargetSetup.cmake -exclude xgboost/cmake/Sanitizer.cmake -exclude xgboost/cmake/modules/FindASan.cmake -exclude xgboost/cmake/modules/FindLSan.cmake -exclude xgboost/cmake/modules/FindLibR.cmake -exclude xgboost/cmake/modules/FindTSan.cmake -exclude xgboost/cmake/modules/FindUBSan.cmake -recursive-include xgboost/include * -recursive-include xgboost/plugin * -recursive-include xgboost/src * - -recursive-include xgboost/gputreeshap/GPUTreeShap * - -include xgboost/rabit/CMakeLists.txt -recursive-include xgboost/rabit/include * -recursive-include xgboost/rabit/src * -prune xgboost/rabit/doc -prune xgboost/rabit/guide - -include xgboost/dmlc-core/CMakeLists.txt - -recursive-include xgboost/dmlc-core/cmake * -exclude xgboost/dmlc-core/cmake/gtest_cmake.in -exclude xgboost/dmlc-core/cmake/lint.cmake -exclude xgboost/dmlc-core/cmake/Sanitizer.cmake -exclude xgboost/dmlc-core/cmake/Modules/FindASan.cmake -exclude xgboost/dmlc-core/cmake/Modules/FindLSan.cmake -exclude xgboost/dmlc-core/cmake/Modules/FindTSan.cmake -exclude xgboost/dmlc-core/cmake/Modules/FindUBSan.cmake - -recursive-include xgboost/dmlc-core/include * -recursive-include xgboost/dmlc-core/include * -recursive-include xgboost/dmlc-core/make * -recursive-include xgboost/dmlc-core/src * -include xgboost/dmlc-core/tracker/dmlc-submit -recursive-include xgboost/dmlc-core/tracker/dmlc_tracker *.py -include xgboost/dmlc-core/tracker/yarn/build.bat -include xgboost/dmlc-core/tracker/yarn/build.sh -include xgboost/dmlc-core/tracker/yarn/pom.xml -recursive-include xgboost/dmlc-core/tracker/yarn/src * -include xgboost/dmlc-core/windows/dmlc.sln -include xgboost/dmlc-core/windows/dmlc/dmlc.vcxproj - -prune xgboost/dmlc-core/doc -prune xgboost/dmlc-core/scripts/ - -global-exclude *.py[oc] diff --git a/python-package/hatch_build.py b/python-package/hatch_build.py new file mode 100644 index 000000000000..696787fa2fe6 --- /dev/null +++ b/python-package/hatch_build.py @@ -0,0 +1,22 @@ +""" +Custom hook to customize the behavior of Hatchling. +Here, we customize the tag of the generated wheels. +""" +import sysconfig +from typing import Any, Dict + +from hatchling.builders.hooks.plugin.interface import BuildHookInterface + + +def get_tag() -> str: + """Get appropriate wheel tag according to system""" + tag_platform = sysconfig.get_platform().replace("-", "_").replace(".", "_") + return f"py3-none-{tag_platform}" + + +class CustomBuildHook(BuildHookInterface): + """A custom build hook""" + + def initialize(self, version: str, build_data: Dict[str, Any]) -> None: + """This step ccurs immediately before each build.""" + build_data["tag"] = get_tag() diff --git a/python-package/packager/__init__.py b/python-package/packager/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python-package/packager/build_config.py b/python-package/packager/build_config.py new file mode 100644 index 000000000000..290cf15db608 --- /dev/null +++ b/python-package/packager/build_config.py @@ -0,0 +1,56 @@ +"""Build configuration""" +import dataclasses +from typing import Any, Dict, List, Optional + + +@dataclasses.dataclass +class BuildConfiguration: # pylint: disable=R0902 + """Configurations use when building libxgboost""" + + # Whether to hide C++ symbols in libxgboost.so + hide_cxx_symbols: bool = True + # Whether to enable OpenMP + use_openmp: bool = True + # Whether to enable CUDA + use_cuda: bool = False + # Whether to enable NCCL + use_nccl: bool = False + # Whether to enable HDFS + use_hdfs: bool = False + # Whether to enable Azure Storage + use_azure: bool = False + # Whether to enable AWS S3 + use_s3: bool = False + # Whether to enable the dense parser plugin + plugin_dense_parser: bool = False + # Special option: See explanation below + use_system_libxgboost: bool = False + + def _set_config_setting( + self, config_settings: Dict[str, Any], field_name: str + ) -> None: + if field_name in config_settings: + setattr( + self, + field_name, + (config_settings[field_name].lower() in ["true", "1", "on"]), + ) + else: + raise ValueError(f"Field {field_name} is not a valid config_settings") + + def update(self, config_settings: Optional[Dict[str, Any]]) -> None: + """Parse config_settings from Pip (or other PEP 517 frontend)""" + if config_settings is not None: + for field_name in [x.name for x in dataclasses.fields(self)]: + self._set_config_setting(config_settings, field_name) + + def get_cmake_args(self) -> List[str]: + """Convert build configuration to CMake args""" + cmake_args = [] + for field_name in [x.name for x in dataclasses.fields(self)]: + if field_name in ["use_system_libxgboost"]: + continue + cmake_option = field_name.upper() + cmake_value = "ON" if getattr(self, field_name) is True else "OFF" + cmake_args.append(f"-D{cmake_option}={cmake_value}") + return cmake_args diff --git a/python-package/packager/nativelib.py b/python-package/packager/nativelib.py new file mode 100644 index 000000000000..f7f5b4e79e79 --- /dev/null +++ b/python-package/packager/nativelib.py @@ -0,0 +1,157 @@ +""" +Functions for building libxgboost +""" +import logging +import os +import pathlib +import shutil +import subprocess +import sys +from platform import system +from typing import Optional + +from .build_config import BuildConfiguration + + +def _lib_name() -> str: + """Return platform dependent shared object name.""" + if system() in ["Linux", "OS400"] or system().upper().endswith("BSD"): + name = "libxgboost.so" + elif system() == "Darwin": + name = "libxgboost.dylib" + elif system() == "Windows": + name = "xgboost.dll" + else: + raise NotImplementedError(f"System {system()} not supported") + return name + + +def build_libxgboost( + cpp_src_dir: pathlib.Path, + build_dir: pathlib.Path, + build_config: BuildConfiguration, +) -> pathlib.Path: + """Build libxgboost in a temporary directory and obtain the path to built libxgboost""" + logger = logging.getLogger("xgboost.packager.build_libxgboost") + + if not cpp_src_dir.is_dir(): + raise RuntimeError(f"Expected {cpp_src_dir} to be a directory") + logger.info( + "Building %s from the C++ source files in %s...", _lib_name(), str(cpp_src_dir) + ) + + def _build(*, generator: str) -> None: + cmake_cmd = [ + "cmake", + str(cpp_src_dir), + generator, + "-DKEEP_BUILD_ARTIFACTS_IN_BINARY_DIR=ON", + ] + cmake_cmd.extend(build_config.get_cmake_args()) + + # Flag for cross-compiling for Apple Silicon + # We use environment variable because it's the only way to pass down custom flags + # through the cibuildwheel package, which calls `pip wheel` command. + if "CIBW_TARGET_OSX_ARM64" in os.environ: + cmake_cmd.append("-DCMAKE_OSX_ARCHITECTURES=arm64") + + logger.info("CMake args: %s", str(cmake_cmd)) + subprocess.check_call(cmake_cmd, cwd=build_dir) + + if system() == "Windows": + subprocess.check_call( + ["cmake", "--build", ".", "--config", "Release"], cwd=build_dir + ) + else: + nproc = os.cpu_count() + assert build_tool is not None + subprocess.check_call([build_tool, f"-j{nproc}"], cwd=build_dir) + + if system() == "Windows": + supported_generators = ( + "-GVisual Studio 17 2022", + "-GVisual Studio 16 2019", + "-GVisual Studio 15 2017", + "-GMinGW Makefiles", + ) + for generator in supported_generators: + try: + _build(generator=generator) + logger.info( + "Successfully built %s using generator %s", _lib_name(), generator + ) + break + except subprocess.CalledProcessError as e: + logger.info( + "Tried building with generator %s but failed with exception %s", + generator, + str(e), + ) + # Empty build directory + shutil.rmtree(build_dir) + build_dir.mkdir() + else: + raise RuntimeError( + "None of the supported generators produced a successful build!" + f"Supported generators: {supported_generators}" + ) + else: + build_tool = "ninja" if shutil.which("ninja") else "make" + generator = "-GNinja" if build_tool == "ninja" else "-GUnix Makefiles" + try: + _build(generator=generator) + except subprocess.CalledProcessError as e: + logger.info("Failed to build with OpenMP. Exception: %s", str(e)) + build_config.use_openmp = False + _build(generator=generator) + + return build_dir / "lib" / _lib_name() + + +def locate_local_libxgboost( + toplevel_dir: pathlib.Path, + logger: logging.Logger, +) -> Optional[pathlib.Path]: + """ + Locate libxgboost from the local project directory's lib/ subdirectory. + """ + libxgboost = toplevel_dir.parent / "lib" / _lib_name() + if libxgboost.exists(): + logger.info("Found %s at %s", libxgboost.name, str(libxgboost.parent)) + return libxgboost + return None + + +def locate_or_build_libxgboost( + toplevel_dir: pathlib.Path, + build_dir: pathlib.Path, + build_config: BuildConfiguration, +) -> pathlib.Path: + """Locate libxgboost; if not exist, build it""" + logger = logging.getLogger("xgboost.packager.locate_or_build_libxgboost") + + libxgboost = locate_local_libxgboost(toplevel_dir, logger=logger) + if libxgboost is not None: + return libxgboost + if build_config.use_system_libxgboost: + # Find libxgboost from system prefix + sys_prefix = pathlib.Path(sys.prefix).absolute().resolve() + libxgboost = sys_prefix / "lib" / _lib_name() + if not libxgboost.exists(): + raise RuntimeError( + f"use_system_libxgboost was specified but {_lib_name()} is " + f"not found in {libxgboost.parent}" + ) + + logger.info("Using system XGBoost: %s", str(libxgboost)) + return libxgboost + + if toplevel_dir.joinpath("cpp_src").exists(): + # Source distribution; all C++ source files to be found in cpp_src/ + cpp_src_dir = toplevel_dir.joinpath("cpp_src") + else: + # Probably running "pip install ." from python-package/ + cpp_src_dir = toplevel_dir.parent + if not cpp_src_dir.joinpath("CMakeLists.txt").exists(): + raise RuntimeError(f"Did not find CMakeLists.txt from {cpp_src_dir}") + return build_libxgboost(cpp_src_dir, build_dir=build_dir, build_config=build_config) diff --git a/python-package/packager/pep517.py b/python-package/packager/pep517.py new file mode 100644 index 000000000000..56583e117d99 --- /dev/null +++ b/python-package/packager/pep517.py @@ -0,0 +1,157 @@ +""" +Custom build backend for XGBoost Python package. +Builds source distribution and binary wheels, following PEP 517 / PEP 660. +Reuses components of Hatchling (https://github.com/pypa/hatch/tree/master/backend) for the sake +of brevity. +""" +import dataclasses +import logging +import os +import pathlib +import tempfile +from contextlib import contextmanager +from typing import Any, Dict, Iterator, Optional, Union + +import hatchling.build + +from .build_config import BuildConfiguration +from .nativelib import locate_local_libxgboost, locate_or_build_libxgboost +from .sdist import copy_cpp_src_tree +from .util import copy_with_logging, copytree_with_logging + + +@contextmanager +def cd(path: Union[str, pathlib.Path]) -> Iterator[str]: # pylint: disable=C0103 + """ + Temporarily change working directory. + TODO(hcho3): Remove this once we adopt Python 3.11, which implements contextlib.chdir. + """ + path = str(path) + path = os.path.realpath(path) + cwd = os.getcwd() + os.chdir(path) + try: + yield path + finally: + os.chdir(cwd) + + +TOPLEVEL_DIR = pathlib.Path(__file__).parent.parent.absolute().resolve() +logging.basicConfig(level=logging.INFO) + + +# Aliases +get_requires_for_build_sdist = hatchling.build.get_requires_for_build_sdist +get_requires_for_build_wheel = hatchling.build.get_requires_for_build_wheel +get_requires_for_build_editable = hatchling.build.get_requires_for_build_editable + + +def build_wheel( + wheel_directory: str, + config_settings: Optional[Dict[str, Any]] = None, + metadata_directory: Optional[str] = None, +) -> str: + """Build a wheel""" + logger = logging.getLogger("xgboost.packager.build_wheel") + + build_config = BuildConfiguration() + build_config.update(config_settings) + logger.info("Parsed build configuration: %s", dataclasses.asdict(build_config)) + + # Create tempdir with Python package + libxgboost + with tempfile.TemporaryDirectory() as td: + td_path = pathlib.Path(td) + build_dir = td_path / "libbuild" + build_dir.mkdir() + + workspace = td_path / "whl_workspace" + workspace.mkdir() + logger.info("Copying project files to temporary directory %s", str(workspace)) + + copy_with_logging(TOPLEVEL_DIR / "pyproject.toml", workspace, logger=logger) + copy_with_logging(TOPLEVEL_DIR / "hatch_build.py", workspace, logger=logger) + copy_with_logging(TOPLEVEL_DIR / "README.rst", workspace, logger=logger) + + pkg_path = workspace / "xgboost" + copytree_with_logging(TOPLEVEL_DIR / "xgboost", pkg_path, logger=logger) + lib_path = pkg_path / "lib" + lib_path.mkdir() + libxgboost = locate_or_build_libxgboost( + TOPLEVEL_DIR, build_dir=build_dir, build_config=build_config + ) + copy_with_logging(libxgboost, lib_path, logger=logger) + + with cd(workspace): + wheel_name = hatchling.build.build_wheel( + wheel_directory, config_settings, metadata_directory + ) + return wheel_name + + +def build_sdist( + sdist_directory: str, + config_settings: Optional[Dict[str, Any]] = None, +) -> str: + """Build a source distribution""" + logger = logging.getLogger("xgboost.packager.build_sdist") + + if config_settings: + raise NotImplementedError( + "XGBoost's custom build backend doesn't support config_settings option " + f"when building sdist. {config_settings=}" + ) + + cpp_src_dir = TOPLEVEL_DIR.parent + if not cpp_src_dir.joinpath("CMakeLists.txt").exists(): + raise RuntimeError(f"Did not find CMakeLists.txt from {cpp_src_dir}") + + # Create tempdir with Python package + C++ sources + with tempfile.TemporaryDirectory() as td: + td_path = pathlib.Path(td) + + workspace = td_path / "sdist_workspace" + workspace.mkdir() + logger.info("Copying project files to temporary directory %s", str(workspace)) + + copy_with_logging(TOPLEVEL_DIR / "pyproject.toml", workspace, logger=logger) + copy_with_logging(TOPLEVEL_DIR / "hatch_build.py", workspace, logger=logger) + copy_with_logging(TOPLEVEL_DIR / "README.rst", workspace, logger=logger) + + copytree_with_logging( + TOPLEVEL_DIR / "xgboost", workspace / "xgboost", logger=logger + ) + copytree_with_logging( + TOPLEVEL_DIR / "packager", workspace / "packager", logger=logger + ) + + temp_cpp_src_dir = workspace / "cpp_src" + copy_cpp_src_tree(cpp_src_dir, target_dir=temp_cpp_src_dir, logger=logger) + + with cd(workspace): + sdist_name = hatchling.build.build_sdist(sdist_directory, config_settings) + return sdist_name + + +def build_editable( + wheel_directory: str, + config_settings: Optional[Dict[str, Any]] = None, + metadata_directory: Optional[str] = None, +) -> str: + """Build an editable installation. We mostly delegate to Hatchling.""" + logger = logging.getLogger("xgboost.packager.build_editable") + + if config_settings: + raise NotImplementedError( + "XGBoost's custom build backend doesn't support config_settings option " + f"when building editable installation. {config_settings=}" + ) + + if locate_local_libxgboost(TOPLEVEL_DIR, logger=logger) is None: + raise RuntimeError( + "To use the editable installation, first build libxgboost with CMake. " + "See https://xgboost.readthedocs.io/en/latest/build.html for detailed instructions." + ) + + return hatchling.build.build_editable( + wheel_directory, config_settings, metadata_directory + ) diff --git a/python-package/packager/sdist.py b/python-package/packager/sdist.py new file mode 100644 index 000000000000..af9fbca0d9ec --- /dev/null +++ b/python-package/packager/sdist.py @@ -0,0 +1,27 @@ +""" +Functions for building sdist +""" +import logging +import pathlib + +from .util import copy_with_logging, copytree_with_logging + + +def copy_cpp_src_tree( + cpp_src_dir: pathlib.Path, target_dir: pathlib.Path, logger: logging.Logger +) -> None: + """Copy C++ source tree into build directory""" + + for subdir in [ + "src", + "include", + "dmlc-core", + "gputreeshap", + "rabit", + "cmake", + "plugin", + ]: + copytree_with_logging(cpp_src_dir / subdir, target_dir / subdir, logger=logger) + + for filename in ["CMakeLists.txt", "LICENSE"]: + copy_with_logging(cpp_src_dir.joinpath(filename), target_dir, logger=logger) diff --git a/python-package/packager/util.py b/python-package/packager/util.py new file mode 100644 index 000000000000..0fff062d7275 --- /dev/null +++ b/python-package/packager/util.py @@ -0,0 +1,25 @@ +""" +Utility functions for implementing PEP 517 backend +""" +import logging +import pathlib +import shutil + + +def copytree_with_logging( + src: pathlib.Path, dest: pathlib.Path, logger: logging.Logger +) -> None: + """Call shutil.copytree() with logging""" + logger.info("Copying %s -> %s", str(src), str(dest)) + shutil.copytree(src, dest) + + +def copy_with_logging( + src: pathlib.Path, dest: pathlib.Path, logger: logging.Logger +) -> None: + """Call shutil.copy() with logging""" + if dest.is_dir(): + logger.info("Copying %s -> %s", str(src), str(dest / src.name)) + else: + logger.info("Copying %s -> %s", str(src), str(dest)) + shutil.copy(src, dest) diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml new file mode 100644 index 000000000000..8f120df5dcd0 --- /dev/null +++ b/python-package/pyproject.toml @@ -0,0 +1,42 @@ +[build-system] +requires = [ + "hatchling>=1.12.1" +] +backend-path = ["."] +build-backend = "packager.pep517" + +[project] +name = "xgboost" +version = "2.0.0-dev" +authors = [ + {name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu"}, + {name = "Jiaming Yuan", email = "jm.yuan@outlook.com"} +] +description = "XGBoost Python Package" +readme = {file = "README.rst", content-type = "text/x-rst"} +requires-python = ">=3.8" +license = {text = "Apache-2.0"} +classifiers = [ + "License :: OSI Approved :: Apache Software License", + "Development Status :: 5 - Production/Stable", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10" +] +dependencies = [ + "numpy", + "scipy" +] + +[project.optional-dependencies] +pandas = ["pandas"] +scikit-learn = ["scikit-learn"] +dask = ["dask", "pandas", "distributed"] +datatable = ["datatable"] +plotting = ["graphviz", "matplotlib"] +pyspark = ["pyspark", "scikit-learn", "cloudpickle"] + +[tool.hatch.build.targets.wheel.hooks.custom] diff --git a/python-package/setup.py b/python-package/setup.py deleted file mode 100644 index fe1cbf2e9c19..000000000000 --- a/python-package/setup.py +++ /dev/null @@ -1,399 +0,0 @@ -"""Setup xgboost package.""" -import logging -import os -import shutil -import subprocess -import sys -from platform import system -from typing import List, Optional - -from setuptools import Extension, find_packages, setup -from setuptools.command import build_ext, install, install_lib, sdist - -# You can't use `pip install .` as pip copies setup.py to a temporary -# directory, parent directory is no longer reachable (isolated build) . -CURRENT_DIR = os.path.abspath(os.path.dirname(__file__)) -sys.path.insert(0, CURRENT_DIR) - -# Options only effect `python setup.py install`, building `bdist_wheel` -# requires using CMake directly. -USER_OPTIONS = { - # libxgboost options. - "use-openmp": (None, "Build with OpenMP support.", 1), - "use-cuda": (None, "Build with GPU acceleration.", 0), - "use-nccl": (None, "Build with NCCL to enable distributed GPU support.", 0), - "build-with-shared-nccl": (None, "Build with shared NCCL library.", 0), - "hide-cxx-symbols": (None, "Hide all C++ symbols during build.", 1), - "use-hdfs": (None, "Build with HDFS support", 0), - "use-azure": (None, "Build with AZURE support.", 0), - "use-s3": (None, "Build with S3 support", 0), - "plugin-dense-parser": (None, "Build dense parser plugin.", 0), - # Python specific - "use-system-libxgboost": (None, "Use libxgboost.so in system path.", 0), -} - -NEED_CLEAN_TREE = set() -NEED_CLEAN_FILE = set() -BUILD_TEMP_DIR = None - - -def lib_name() -> str: - """Return platform dependent shared object name.""" - if system() == "Linux" or system().upper().endswith("BSD"): - name = "libxgboost.so" - elif system() == "Darwin": - name = "libxgboost.dylib" - elif system() == "Windows": - name = "xgboost.dll" - elif system() == "OS400": - name = "libxgboost.so" - return name - - -def copy_tree(src_dir: str, target_dir: str) -> None: - """Copy source tree into build directory.""" - - def clean_copy_tree(src: str, dst: str) -> None: - shutil.copytree(src, dst) - NEED_CLEAN_TREE.add(os.path.abspath(dst)) - - def clean_copy_file(src: str, dst: str) -> None: - shutil.copy(src, dst) - NEED_CLEAN_FILE.add(os.path.abspath(dst)) - - src = os.path.join(src_dir, "src") - inc = os.path.join(src_dir, "include") - dmlc_core = os.path.join(src_dir, "dmlc-core") - gputreeshap = os.path.join(src_dir, "gputreeshap") - rabit = os.path.join(src_dir, "rabit") - cmake = os.path.join(src_dir, "cmake") - plugin = os.path.join(src_dir, "plugin") - - clean_copy_tree(src, os.path.join(target_dir, "src")) - clean_copy_tree(inc, os.path.join(target_dir, "include")) - clean_copy_tree(dmlc_core, os.path.join(target_dir, "dmlc-core")) - clean_copy_tree(gputreeshap, os.path.join(target_dir, "gputreeshap")) - clean_copy_tree(rabit, os.path.join(target_dir, "rabit")) - clean_copy_tree(cmake, os.path.join(target_dir, "cmake")) - clean_copy_tree(plugin, os.path.join(target_dir, "plugin")) - - cmake_list = os.path.join(src_dir, "CMakeLists.txt") - clean_copy_file(cmake_list, os.path.join(target_dir, "CMakeLists.txt")) - lic = os.path.join(src_dir, "LICENSE") - clean_copy_file(lic, os.path.join(target_dir, "LICENSE")) - - -def clean_up() -> None: - """Removed copied files.""" - for path in NEED_CLEAN_TREE: - shutil.rmtree(path) - for path in NEED_CLEAN_FILE: - os.remove(path) - - -class CMakeExtension(Extension): # pylint: disable=too-few-public-methods - """Wrapper for extension""" - - def __init__(self, name: str) -> None: - super().__init__(name=name, sources=[]) - - -class BuildExt(build_ext.build_ext): # pylint: disable=too-many-ancestors - """Custom build_ext command using CMake.""" - - logger = logging.getLogger("XGBoost build_ext") - - # pylint: disable=too-many-arguments - def build( - self, - src_dir: str, - build_dir: str, - generator: str, - build_tool: Optional[str] = None, - use_omp: int = 1, - ) -> None: - """Build the core library with CMake.""" - cmake_cmd = ["cmake", src_dir, generator] - - for k, v in USER_OPTIONS.items(): - arg = k.replace("-", "_").upper() - value = str(v[2]) - if arg == "USE_SYSTEM_LIBXGBOOST": - continue - if arg == "USE_OPENMP" and use_omp == 0: - cmake_cmd.append("-D" + arg + "=0") - continue - cmake_cmd.append("-D" + arg + "=" + value) - - # Flag for cross-compiling for Apple Silicon - # We use environment variable because it's the only way to pass down custom flags - # through the cibuildwheel package, which otherwise calls `python setup.py bdist_wheel` - # command. - if "CIBW_TARGET_OSX_ARM64" in os.environ: - cmake_cmd.append("-DCMAKE_OSX_ARCHITECTURES=arm64") - - self.logger.info("Run CMake command: %s", str(cmake_cmd)) - subprocess.check_call(cmake_cmd, cwd=build_dir) - - if system() != "Windows": - nproc = os.cpu_count() - assert build_tool is not None - subprocess.check_call([build_tool, "-j" + str(nproc)], cwd=build_dir) - else: - subprocess.check_call( - ["cmake", "--build", ".", "--config", "Release"], cwd=build_dir - ) - - def build_cmake_extension(self) -> None: - """Configure and build using CMake""" - if USER_OPTIONS["use-system-libxgboost"][2]: - self.logger.info("Using system libxgboost.") - return - - build_dir = self.build_temp - global BUILD_TEMP_DIR # pylint: disable=global-statement - BUILD_TEMP_DIR = build_dir - libxgboost = os.path.abspath( - os.path.join(CURRENT_DIR, os.path.pardir, "lib", lib_name()) - ) - - if os.path.exists(libxgboost): - self.logger.info("Found shared library, skipping build.") - return - - src_dir = "xgboost" - try: - copy_tree( - os.path.join(CURRENT_DIR, os.path.pardir), - os.path.join(self.build_temp, src_dir), - ) - except Exception: # pylint: disable=broad-except - copy_tree(src_dir, os.path.join(self.build_temp, src_dir)) - - self.logger.info("Building from source. %s", libxgboost) - if not os.path.exists(build_dir): - os.mkdir(build_dir) - if shutil.which("ninja"): - build_tool = "ninja" - else: - build_tool = "make" - if sys.platform.startswith("os400"): - build_tool = "make" - - if system() == "Windows": - # Pick up from LGB, just test every possible tool chain. - for vs in ( - "-GVisual Studio 17 2022", - "-GVisual Studio 16 2019", - "-GVisual Studio 15 2017", - "-GVisual Studio 14 2015", - "-GMinGW Makefiles", - ): - try: - self.build(src_dir, build_dir, vs) - self.logger.info( - "%s is used for building Windows distribution.", vs - ) - break - except subprocess.CalledProcessError: - shutil.rmtree(build_dir) - os.mkdir(build_dir) - continue - else: - gen = "-GNinja" if build_tool == "ninja" else "-GUnix Makefiles" - try: - self.build(src_dir, build_dir, gen, build_tool, use_omp=1) - except subprocess.CalledProcessError: - self.logger.warning("Disabling OpenMP support.") - self.build(src_dir, build_dir, gen, build_tool, use_omp=0) - - def build_extension(self, ext: Extension) -> None: - """Override the method for dispatching.""" - if isinstance(ext, CMakeExtension): - self.build_cmake_extension() - else: - super().build_extension(ext) - - def copy_extensions_to_source(self) -> None: - """Dummy override. Invoked during editable installation. Our binary - should available in `lib`. - - """ - if not os.path.exists( - os.path.join(CURRENT_DIR, os.path.pardir, "lib", lib_name()) - ): - raise ValueError( - "For using editable installation, please " - + "build the shared object first with CMake." - ) - - -class Sdist(sdist.sdist): # pylint: disable=too-many-ancestors - """Copy c++ source into Python directory.""" - - logger = logging.getLogger("xgboost sdist") - - def run(self) -> None: - copy_tree( - os.path.join(CURRENT_DIR, os.path.pardir), - os.path.join(CURRENT_DIR, "xgboost"), - ) - libxgboost = os.path.join(CURRENT_DIR, os.path.pardir, "lib", lib_name()) - if os.path.exists(libxgboost): - self.logger.warning( - "Found shared library, removing to avoid being included in source distribution." - ) - os.remove(libxgboost) - super().run() - - -class InstallLib(install_lib.install_lib): - """Copy shared object into installation directory.""" - - logger = logging.getLogger("xgboost install_lib") - - def install(self) -> List[str]: - outfiles = super().install() - - if USER_OPTIONS["use-system-libxgboost"][2] != 0: - self.logger.info("Using system libxgboost.") - lib_path = os.path.join(sys.prefix, "lib") - msg = ( - "use-system-libxgboost is specified, but " - + lib_name() - + " is not found in: " - + lib_path - ) - assert os.path.exists(os.path.join(lib_path, lib_name())), msg - return [] - - lib_dir = os.path.join(self.install_dir, "xgboost", "lib") - if not os.path.exists(lib_dir): - os.mkdir(lib_dir) - dst = os.path.join(self.install_dir, "xgboost", "lib", lib_name()) - - libxgboost_path = lib_name() - - assert BUILD_TEMP_DIR is not None - dft_lib_dir = os.path.join(CURRENT_DIR, os.path.pardir, "lib") - build_dir = os.path.join(BUILD_TEMP_DIR, "xgboost", "lib") - - if os.path.exists(os.path.join(dft_lib_dir, libxgboost_path)): - # The library is built by CMake directly - src = os.path.join(dft_lib_dir, libxgboost_path) - else: - # The library is built by setup.py - src = os.path.join(build_dir, libxgboost_path) - self.logger.info("Installing shared library: %s", src) - dst, _ = self.copy_file(src, dst) - outfiles.append(dst) - return outfiles - - -class Install(install.install): # pylint: disable=too-many-instance-attributes - """An interface to install command, accepting XGBoost specific - arguments. - - """ - - user_options = install.install.user_options + [ - (k, v[0], v[1]) for k, v in USER_OPTIONS.items() - ] - - def initialize_options(self) -> None: - super().initialize_options() - self.use_openmp = 1 - self.use_cuda = 0 - self.use_nccl = 0 - self.build_with_shared_nccl = 0 - self.hide_cxx_symbols = 1 - - self.use_hdfs = 0 - self.use_azure = 0 - self.use_s3 = 0 - - self.plugin_dense_parser = 0 - - self.use_system_libxgboost = 0 - - def run(self) -> None: - # setuptools will configure the options according to user supplied command line - # arguments, then here we propagate them into `USER_OPTIONS` for visibility to - # other sub-commands like `build_ext`. - for k, v in USER_OPTIONS.items(): - arg = k.replace("-", "_") - if hasattr(self, arg): - USER_OPTIONS[k] = (v[0], v[1], getattr(self, arg)) - super().run() - - -if __name__ == "__main__": - # Supported commands: - # From internet: - # - pip install xgboost - # - pip install --no-binary :all: xgboost - - # From source tree `xgboost/python-package`: - # - python setup.py build - # - python setup.py build_ext - # - python setup.py install - # - python setup.py sdist && pip install - # - python setup.py bdist_wheel && pip install - - # When XGBoost is compiled directly with CMake: - # - pip install -e . - # - python setup.py develop # same as above - logging.basicConfig(level=logging.INFO) - - with open(os.path.join(CURRENT_DIR, "README.rst"), encoding="utf-8") as fd: - description = fd.read() - with open(os.path.join(CURRENT_DIR, "xgboost/VERSION"), encoding="ascii") as fd: - version = fd.read().strip() - - setup( - name="xgboost", - version=version, - description="XGBoost Python Package", - long_description=description, - long_description_content_type="text/x-rst", - install_requires=[ - "numpy", - "scipy", - ], - ext_modules=[CMakeExtension("libxgboost")], - # error: expected "str": "Type[Command]" - cmdclass={ - "build_ext": BuildExt, # type: ignore - "sdist": Sdist, # type: ignore - "install_lib": InstallLib, # type: ignore - "install": Install, # type: ignore - }, - extras_require={ - "pandas": ["pandas"], - "scikit-learn": ["scikit-learn"], - "dask": ["dask", "pandas", "distributed"], - "datatable": ["datatable"], - "plotting": ["graphviz", "matplotlib"], - "pyspark": ["pyspark", "scikit-learn", "cloudpickle"], - }, - maintainer="Hyunsu Cho", - maintainer_email="chohyu01@cs.washington.edu", - zip_safe=False, - packages=find_packages(), - include_package_data=True, - license="Apache-2.0", - classifiers=[ - "License :: OSI Approved :: Apache Software License", - "Development Status :: 5 - Production/Stable", - "Operating System :: OS Independent", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - ], - python_requires=">=3.8", - url="https://github.com/dmlc/xgboost", - ) - - clean_up() diff --git a/python-package/xgboost/config.py b/python-package/xgboost/config.py index c08a13150508..1691d473fa70 100644 --- a/python-package/xgboost/config.py +++ b/python-package/xgboost/config.py @@ -16,7 +16,7 @@ def config_doc( extra_note: Optional[str] = None, parameters: Optional[str] = None, returns: Optional[str] = None, - see_also: Optional[str] = None + see_also: Optional[str] = None, ) -> Callable[[_F], _F]: """Decorator to format docstring for config functions. diff --git a/python-package/xgboost/plotting.py b/python-package/xgboost/plotting.py index 71058e8c952d..d9eb14d0f600 100644 --- a/python-package/xgboost/plotting.py +++ b/python-package/xgboost/plotting.py @@ -30,7 +30,7 @@ def plot_importance( grid: bool = True, show_values: bool = True, values_format: str = "{v}", - **kwargs: Any + **kwargs: Any, ) -> Axes: """Plot importance based on fitted trees. @@ -155,7 +155,7 @@ def to_graphviz( no_color: Optional[str] = None, condition_node_params: Optional[dict] = None, leaf_node_params: Optional[dict] = None, - **kwargs: Any + **kwargs: Any, ) -> GraphvizSource: """Convert specified tree to graphviz instance. IPython can automatically plot the returned graphviz instance. Otherwise, you should call .render() method @@ -250,7 +250,7 @@ def plot_tree( num_trees: int = 0, rankdir: Optional[str] = None, ax: Optional[Axes] = None, - **kwargs: Any + **kwargs: Any, ) -> Axes: """Plot specified tree. diff --git a/tests/buildkite/build-cpu-arm64.sh b/tests/buildkite/build-cpu-arm64.sh index 1a95a880a515..fd00a7971101 100755 --- a/tests/buildkite/build-cpu-arm64.sh +++ b/tests/buildkite/build-cpu-arm64.sh @@ -18,7 +18,7 @@ $command_wrapper bash -c "cd build && ctest --extra-verbose" echo "--- Build binary wheel" $command_wrapper bash -c \ - "cd python-package && rm -rf dist/* && python setup.py bdist_wheel --universal" + "cd python-package && rm -rf dist/* && pip wheel --no-deps -v . --wheel-dir dist/" $command_wrapper python tests/ci_build/rename_whl.py python-package/dist/*.whl \ ${BUILDKITE_COMMIT} ${WHEEL_TAG} diff --git a/tests/buildkite/build-cuda.sh b/tests/buildkite/build-cuda.sh index b25345b1bbb1..c180695e820f 100755 --- a/tests/buildkite/build-cuda.sh +++ b/tests/buildkite/build-cuda.sh @@ -27,7 +27,7 @@ $command_wrapper tests/ci_build/build_via_cmake.sh -DCMAKE_PREFIX_PATH=/opt/grpc -DNCCL_LIBRARY=/workspace/libnccl_static.a ${arch_flag} echo "--- Build binary wheel" $command_wrapper bash -c \ - "cd python-package && rm -rf dist/* && python setup.py bdist_wheel --universal" + "cd python-package && rm -rf dist/* && pip wheel --no-deps -v . --wheel-dir dist/" $command_wrapper python tests/ci_build/rename_whl.py python-package/dist/*.whl \ ${BUILDKITE_COMMIT} ${WHEEL_TAG} diff --git a/tests/buildkite/build-win64-gpu.ps1 b/tests/buildkite/build-win64-gpu.ps1 index 05d7aefb9048..32cd2806adcd 100644 --- a/tests/buildkite/build-win64-gpu.ps1 +++ b/tests/buildkite/build-win64-gpu.ps1 @@ -24,21 +24,17 @@ if ($LASTEXITCODE -ne 0) { throw "Last command failed" } Write-Host "--- Build binary wheel" cd ../python-package conda activate -& python setup.py bdist_wheel --universal +& pip install --user -v "pip>=23" +& pip --version +& pip wheel --no-deps -v . --wheel-dir dist/ Get-ChildItem . -Filter dist/*.whl | Foreach-Object { & python ../tests/ci_build/rename_whl.py $_.FullName $Env:BUILDKITE_COMMIT win_amd64 if ($LASTEXITCODE -ne 0) { throw "Last command failed" } } -Write-Host "--- Insert vcomp140.dll (OpenMP runtime) into the wheel" -cd dist -Copy-Item -Path ../../tests/ci_build/insert_vcomp140.py -Destination . -& python insert_vcomp140.py *.whl -if ($LASTEXITCODE -ne 0) { throw "Last command failed" } - Write-Host "--- Upload Python wheel" -cd ../.. +cd .. Get-ChildItem . -Filter python-package/dist/*.whl | Foreach-Object { & buildkite-agent artifact upload python-package/dist/$_ diff --git a/tests/ci_build/build_python_wheels.sh b/tests/ci_build/build_python_wheels.sh index d91df2286fd2..205b3b695fcd 100644 --- a/tests/ci_build/build_python_wheels.sh +++ b/tests/ci_build/build_python_wheels.sh @@ -26,7 +26,7 @@ if [[ "$platform_id" == macosx_* ]]; then # cibuildwheel will take care of cross-compilation. wheel_tag=macosx_12_0_arm64 cpython_ver=38 - setup_env_var='CIBW_TARGET_OSX_ARM64=1' # extra flag to be passed to setup.py + setup_env_var='CIBW_TARGET_OSX_ARM64=1' # extra flag to be passed to xgboost.packager backend export PYTHON_CROSSENV=1 export MACOSX_DEPLOYMENT_TARGET=12.0 #OPENMP_URL="https://anaconda.org/conda-forge/llvm-openmp/11.1.0/download/osx-arm64/llvm-openmp-11.1.0-hf3c4609_1.tar.bz2" diff --git a/tests/ci_build/change_version.py b/tests/ci_build/change_version.py index 62cb894dcb75..25561859c5e4 100644 --- a/tests/ci_build/change_version.py +++ b/tests/ci_build/change_version.py @@ -40,14 +40,24 @@ def pypkg( major: int, minor: int, patch: int, rc: int, is_rc: bool, is_dev: bool ) -> None: version = f"{major}.{minor}.{patch}" - pyver_path = os.path.join("xgboost", "VERSION") pyver = version if is_rc: pyver = pyver + f"rc{rc}" if is_dev: pyver = pyver + "-dev" + + pyver_path = os.path.join("xgboost", "VERSION") with open(pyver_path, "w") as fd: - fd.write(pyver) + fd.write(pyver + "\n") + + pyprj_path = os.path.join("pyproject.toml") + with open(pyprj_path, "r") as fd: + pyprj = fd.read() + matched = re.search('version = "' + r"([0-9]+\.[0-9]+\.[0-9]+.*)" + '"', pyprj) + assert matched, "Couldn't find version string in pyproject.toml." + pyprj = pyprj[: matched.start(1)] + pyver + pyprj[matched.end(1) :] + with open(pyprj_path, "w") as fd: + fd.write(pyprj) @cd(R_PACKAGE) diff --git a/tests/ci_build/conda_env/python_lint.yml b/tests/ci_build/conda_env/python_lint.yml index a64f649a21a4..3d42dfaf3ae6 100644 --- a/tests/ci_build/conda_env/python_lint.yml +++ b/tests/ci_build/conda_env/python_lint.yml @@ -18,6 +18,7 @@ dependencies: - cloudpickle - pytest - hypothesis +- hatchling - pip: # TODO: Replace it with pyspark>=3.4 once 3.4 released. - https://ml-team-public-read.s3.us-west-2.amazonaws.com/pyspark-3.4.0.dev0.tar.gz diff --git a/tests/ci_build/conda_env/sdist_test.yml b/tests/ci_build/conda_env/sdist_test.yml index acc4607ad722..67a9324f7006 100644 --- a/tests/ci_build/conda_env/sdist_test.yml +++ b/tests/ci_build/conda_env/sdist_test.yml @@ -8,5 +8,6 @@ dependencies: - wheel - cmake - ninja +- python-build - c-compiler - cxx-compiler diff --git a/tests/ci_build/insert_vcomp140.py b/tests/ci_build/insert_vcomp140.py deleted file mode 100644 index cfa8d792dee2..000000000000 --- a/tests/ci_build/insert_vcomp140.py +++ /dev/null @@ -1,102 +0,0 @@ -import argparse -import base64 -import glob -import hashlib -import os -import pathlib -import re -import shutil -import tempfile - -VCOMP140_PATH = "C:\\Windows\\System32\\vcomp140.dll" - - -def get_sha256sum(path): - return ( - base64.urlsafe_b64encode(hashlib.sha256(open(path, "rb").read()).digest()) - .decode("latin1") - .rstrip("=") - ) - - -def update_record(*, wheel_content_dir, xgboost_version): - vcomp140_size = os.path.getsize(VCOMP140_PATH) - vcomp140_hash = get_sha256sum(VCOMP140_PATH) - - record_path = wheel_content_dir / pathlib.Path( - f"xgboost-{xgboost_version}.dist-info/RECORD" - ) - with open(record_path, "r") as f: - record_content = f.read() - record_content += f"xgboost-{xgboost_version}.data/data/xgboost/vcomp140.dll," - record_content += f"sha256={vcomp140_hash},{vcomp140_size}\n" - with open(record_path, "w") as f: - f.write(record_content) - - -def main(args): - candidates = list(sorted(glob.glob(args.wheel_path))) - for wheel_path in candidates: - print(f"Processing wheel {wheel_path}") - m = re.search(r"xgboost-(.*)\+.*-py3", wheel_path) - if not m: - raise ValueError(f"Wheel {wheel_path} has unexpected name") - version = m.group(1) - print(f" Detected version for {wheel_path}: {version}") - print(f" Inserting vcomp140.dll into {wheel_path}...") - with tempfile.TemporaryDirectory() as tempdir: - wheel_content_dir = pathlib.Path(tempdir) / "wheel_content" - print(f" Extract {wheel_path} into {wheel_content_dir}") - shutil.unpack_archive( - wheel_path, extract_dir=wheel_content_dir, format="zip" - ) - data_dir = wheel_content_dir / pathlib.Path( - f"xgboost-{version}.data/data/xgboost" - ) - data_dir.mkdir(parents=True, exist_ok=True) - - print(f" Copy {VCOMP140_PATH} -> {data_dir}") - shutil.copy(VCOMP140_PATH, data_dir) - - print(f" Update RECORD") - update_record(wheel_content_dir=wheel_content_dir, xgboost_version=version) - - print(f" Content of {wheel_content_dir}:") - for e in sorted(wheel_content_dir.rglob("*")): - if e.is_file(): - r = e.relative_to(wheel_content_dir) - print(f" {r}") - - print(f" Create new wheel...") - new_wheel_tmp_path = pathlib.Path(tempdir) / "new_wheel" - shutil.make_archive( - str(new_wheel_tmp_path.resolve()), - format="zip", - root_dir=wheel_content_dir, - ) - new_wheel_tmp_path = new_wheel_tmp_path.resolve().with_suffix(".zip") - new_wheel_tmp_path = new_wheel_tmp_path.rename( - new_wheel_tmp_path.with_suffix(".whl") - ) - print(f" Created new wheel {new_wheel_tmp_path}") - - # Rename the old wheel with suffix .bak - # The new wheel takes the name of the old wheel - wheel_path_obj = pathlib.Path(wheel_path).resolve() - backup_path = wheel_path_obj.with_suffix(".whl.bak") - print(f" Rename {wheel_path_obj} -> {backup_path}") - wheel_path_obj.replace(backup_path) - print(f" Rename {new_wheel_tmp_path} -> {wheel_path_obj}") - new_wheel_tmp_path.replace(wheel_path_obj) - - shutil.rmtree(wheel_content_dir) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "wheel_path", type=str, help="Path to wheel (wildcard permitted)" - ) - args = parser.parse_args() - - main(args) diff --git a/tests/ci_build/lint_python.py b/tests/ci_build/lint_python.py index 00791e19db8c..3f553da9f79e 100644 --- a/tests/ci_build/lint_python.py +++ b/tests/ci_build/lint_python.py @@ -198,7 +198,7 @@ def main(args: argparse.Namespace) -> None: run_mypy(path) for path in [ # core - "python-package/xgboost/", + "python-package/", # demo "demo/json-model/json_parser.py", "demo/guide-python/external_memory.py", diff --git a/tests/ci_build/test_python.sh b/tests/ci_build/test_python.sh index 7375b4c9f872..a70b2796130f 100755 --- a/tests/ci_build/test_python.sh +++ b/tests/ci_build/test_python.sh @@ -28,7 +28,7 @@ function install_xgboost { then pushd . cd python-package - python setup.py install --user + pip install --user -v . popd fi }