Skip to content

Commit

Permalink
Move protobuf compilation to CMake (#11986)
Browse files Browse the repository at this point in the history
We currently compile a proto file into a Python file inside setup.py by overriding a certain setuptool (scikit-build) stage (`build_ext`). However, depending on the exact means by which we are building the package (specifically, in the case of building wheels) we may occasionally bypass that stage. Putting this logic into the CMake guarantees that it is always run.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Paul Taylor (https://github.com/trxcllnt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Lawrence Mitchell (https://github.com/wence-)

URL: #11986
  • Loading branch information
vyasr authored Oct 25, 2022
1 parent 6a5c77b commit 5bfc9a4
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 48 deletions.
4 changes: 4 additions & 0 deletions python/cudf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,7 @@ include(rapids-cython)
rapids_cython_init()

add_subdirectory(cudf/_lib)

include(cmake/Modules/ProtobufHelpers.cmake)

codegen_protoc(cudf/utils/metadata/orc_column_statistics.proto)
51 changes: 51 additions & 0 deletions python/cudf/cmake/Modules/ProtobufHelpers.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# =============================================================================
# Copyright (c) 2022, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
# =============================================================================
include_guard(GLOBAL)

# Compile protobuf files to Python. All arguments are assumed to be .proto files.
function(codegen_protoc)
# Allow user to provide path to protoc executable as an environment variable.
if(DEFINED ENV{PROTOC})
set(protoc_COMMAND $ENV{PROTOC})
else()
find_program(protoc_COMMAND protoc REQUIRED)
endif()

foreach(_proto_path IN LISTS ARGV)
string(REPLACE "\.proto" "_pb2\.py" pb2_py_path "${_proto_path}")
set(pb2_py_path "${CMAKE_CURRENT_SOURCE_DIR}/${pb2_py_path}")
# Note: If we ever need to process larger numbers of protobuf files we should consider switching
# to protobuf_generate_python from the FindProtobuf module.
execute_process(
COMMAND ${protoc_COMMAND} --python_out=. "${_proto_path}"
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY
)
# Mark entire file to skip formatting.
file(READ "${pb2_py_path}" pb2_py)
file(
WRITE "${pb2_py_path}"
[=[
# flake8: noqa
# fmt: off
]=]
)
file(APPEND "${pb2_py_path}" "${pb2_py}")
file(
APPEND "${pb2_py_path}"
[=[
# fmt: on
]=]
)
endforeach()
endfunction()
2 changes: 2 additions & 0 deletions python/cudf/cudf/utils/metadata/orc_column_statistics.proto
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
syntax = "proto2";

message IntegerStatistics {
optional sint64 minimum = 1;
optional sint64 maximum = 2;
Expand Down
49 changes: 1 addition & 48 deletions python/cudf/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,10 @@
import os
import re
import shutil
import subprocess
import sys
from distutils.spawn import find_executable

import versioneer
from setuptools import find_packages
from skbuild import setup
from skbuild.command.build_ext import build_ext

install_requires = [
"cachetools",
Expand Down Expand Up @@ -84,49 +80,6 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""):
)


class build_ext_and_proto(build_ext):
def run(self):
# Get protoc
protoc = None
if "PROTOC" in os.environ and os.path.exists(os.environ["PROTOC"]):
protoc = os.environ["PROTOC"]
else:
protoc = find_executable("protoc")
if protoc is None:
sys.stderr.write("protoc not found")
sys.exit(1)

# Build .proto file
for source in ["cudf/utils/metadata/orc_column_statistics.proto"]:
output = source.replace(".proto", "_pb2.py")

if not os.path.exists(output) or (
os.path.getmtime(source) > os.path.getmtime(output)
):
with open(output, "a") as src:
src.write("# flake8: noqa" + os.linesep)
src.write("# fmt: off" + os.linesep)
subprocess.check_call([protoc, "--python_out=.", source])
with open(output, "r+") as src:
new_src_content = (
"# flake8: noqa"
+ os.linesep
+ "# fmt: off"
+ os.linesep
+ src.read()
+ "# fmt: on"
+ os.linesep
)
src.seek(0)
src.write(new_src_content)

# Run original Cython build_ext command
super().run()


cmdclass = versioneer.get_cmdclass()
cmdclass["build_ext"] = build_ext_and_proto

setup(
name="cudf",
version=versioneer.get_version(),
Expand All @@ -147,7 +100,7 @@ def run(self):
package_data={
key: ["*.pxd"] for key in find_packages(include=["cudf._lib*"])
},
cmdclass=cmdclass,
cmdclass=versioneer.get_cmdclass(),
install_requires=install_requires,
extras_require=extras_require,
zip_safe=False,
Expand Down

0 comments on commit 5bfc9a4

Please sign in to comment.