From 7b6e4ba5283d3f3e2445ad856b33d8f3d49cf2b7 Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Tue, 24 Jan 2023 10:53:31 -0800 Subject: [PATCH 01/32] Implement mdspan serializer --- cpp/CMakeLists.txt | 11 +- .../core/detail/mdspan_numpy_serializer.hpp | 230 ++++++++++++++++++ cpp/include/raft/core/mdspan.hpp | 5 +- cpp/include/raft/core/mdspan_serializer.hpp | 62 +++++ cpp/test/CMakeLists.txt | 1 + cpp/test/core/mdspan_serializer.cu | 115 +++++++++ cpp/test/core/mdspan_utils.cu | 2 +- 7 files changed, 423 insertions(+), 3 deletions(-) create mode 100644 cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp create mode 100644 cpp/include/raft/core/mdspan_serializer.hpp create mode 100644 cpp/test/core/mdspan_serializer.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 784bbbb935..855c2582cb 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -218,6 +218,15 @@ target_link_libraries( target_compile_features(raft INTERFACE cxx_std_17 $) +# Endian detection +include(TestBigEndian) +test_big_endian(BIG_ENDIAN) +if(BIG_ENDIAN) + target_compile_definitions(raft INTERFACE RAFT_SYSTEM_LITTLE_ENDIAN=0) +else() + target_compile_definitions(raft INTERFACE RAFT_SYSTEM_LITTLE_ENDIAN=1) +endif() + if(RAFT_COMPILE_DIST_LIBRARY OR RAFT_COMPILE_NN_LIBRARY) file( WRITE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld" diff --git a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp new file mode 100644 index 0000000000..643cf9036d --- /dev/null +++ b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace raft { + +namespace detail { + +namespace numpy_serializer { + +/* + * A small implementation of NumPy serialization format. + * Reference: https://numpy.org/doc/1.13/neps/npy-format.html + * + * Adapted from https://github.com/llohse/libnpy/blob/master/include/npy.hpp, using the following + * license: + * + * MIT License + * + * Copyright (c) 2021 Leon Merten Lohse + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +const char little_endian_char = '<'; +const char big_endian_char = '>'; +const char no_endian_char = '|'; +const char endian_chars[] = {little_endian_char, big_endian_char, no_endian_char}; +const char numtype_chars[] = {'f', 'i', 'u', 'c'}; + +#if RAFT_SYSTEM_LITTLE_ENDIAN == 1 +const char host_endian_char = little_endian_char; +#else // RAFT_SYSTEM_LITTLE_ENDIAN == 1 +const char host_endian_char = big_endian_char; +#endif // RAFT_SYSTEM_LITTLE_ENDIAN == 1 + +using ndarray_len_t = std::uint64_t; + +struct dtype_t { + const char byteorder; + const char kind; + const unsigned int itemsize; + + std::string to_string() const + { + char buf[16] = {0}; + std::sprintf(buf, "%c%c%u", byteorder, kind, itemsize); + return std::string(buf); + } + + bool operator==(const dtype_t& other) const + { + return (byteorder == other.byteorder && kind == other.kind && itemsize == other.itemsize); + } +}; + +struct header_t { + const dtype_t dtype; + const bool fortran_order; + const std::vector shape; +}; + +template +struct is_complex : std::false_type { +}; +template +struct is_complex> : std::true_type { +}; + +template , bool> = true> +dtype_t get_numpy_dtype() +{ + return {host_endian_char, 'f', sizeof(T)}; +} + +template && std::is_signed_v, bool> = true> +dtype_t get_numpy_dtype() +{ + const char endian_char = (sizeof(T) == 1 ? no_endian_char : host_endian_char); + return {endian_char, 'i', sizeof(T)}; +} + +template && std::is_unsigned_v, bool> = true> +dtype_t get_numpy_dtype() +{ + const char endian_char = (sizeof(T) == 1 ? no_endian_char : host_endian_char); + return {endian_char, 'u', sizeof(T)}; +} + +template {}, bool> = true> +dtype_t get_numpy_dtype() +{ + return {host_endian_char, 'c', sizeof(T)}; +} + +template +std::string tuple_to_string(const std::vector& tuple) +{ + std::ostringstream oss; + if (tuple.empty()) { + oss << "()"; + } else if (tuple.size() == 1) { + oss << "(" << tuple.front() << ",)"; + } else { + oss << "("; + for (std::size_t i = 0; i < tuple.size() - 1; ++i) { + oss << tuple[i] << ", "; + } + oss << tuple.back() << ")"; + } + return oss.str(); +} + +std::string header_to_string(const header_t& header) +{ + std::ostringstream oss; + oss << "{'descr': '" << header.dtype.to_string() + << "', 'fortran_order': " << (header.fortran_order ? "True" : "False") + << ", 'shape': " << tuple_to_string(header.shape) << "}"; + return oss.str(); +} + +const char magic_string[] = "\x93NUMPY"; +const std::size_t magic_string_length = 6; + +void write_magic(std::ostream& os) +{ + os.write(magic_string, magic_string_length); + RAFT_EXPECTS(os.good(), "Error writing magic string"); + // Use version 1.0 + os.put(1); + os.put(0); + RAFT_EXPECTS(os.good(), "Error writing magic string"); +} + +void write_header(std::ostream& os, const header_t& header) +{ + std::string header_dict = header_to_string(header); + std::size_t preamble_length = magic_string_length + 2 + 2 + header_dict.length() + 1; + RAFT_EXPECTS(preamble_length < 255 * 255, "Header too long"); + std::size_t padding_len = 16 - preamble_length % 16; + std::string padding(padding_len, ' '); + + write_magic(os); + + // Write header length + std::uint8_t header_len_le16[2]; + std::uint16_t header_len = + static_cast(header_dict.length() + padding.length() + 1); + header_len_le16[0] = (header_len >> 0) & 0xff; + header_len_le16[1] = (header_len >> 8) & 0xff; + os.put(header_len_le16[0]); + os.put(header_len_le16[1]); + RAFT_EXPECTS(os.good(), "Error writing HEADER_LEN"); + + os << header_dict << padding << "\n"; + RAFT_EXPECTS(os.good(), "Error writing header dict"); +} + +template +void serialize(const raft::handle_t& handle, + std::ostream& os, + const raft::host_mdspan& obj) +{ + using obj_t = raft::host_mdspan; + using inner_accessor_type = typename obj_t::accessor_type::accessor_type; + static_assert( + std::is_same_v>, + "The serializer only supports serializing mdspans with default accessor"); + + const auto dtype = get_numpy_dtype(); + const bool fortran_order = std::is_same_v; + std::vector shape; + for (typename obj_t::rank_type i = 0; i < obj.rank(); ++i) { + shape.push_back(obj.extent(i)); + } + const header_t header = {dtype, fortran_order, shape}; + write_header(os, header); + + // For contiguous layouts, size() == product of dimensions + os.write(reinterpret_cast(obj.data_handle()), obj.size() * sizeof(ElementType)); + RAFT_EXPECTS(os.good(), "Error writing content of mdspan"); +} + +} // end namespace numpy_serializer +} // end namespace detail +} // end namespace raft diff --git a/cpp/include/raft/core/mdspan.hpp b/cpp/include/raft/core/mdspan.hpp index 786ce69f89..0b30e9a73a 100644 --- a/cpp/include/raft/core/mdspan.hpp +++ b/cpp/include/raft/core/mdspan.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,6 +33,9 @@ template > using mdspan = std::experimental::mdspan; +template +using dextents = std::experimental::dextents; + namespace detail { // keeping ByteAlignment as optional to allow testing diff --git a/cpp/include/raft/core/mdspan_serializer.hpp b/cpp/include/raft/core/mdspan_serializer.hpp new file mode 100644 index 0000000000..7339c02997 --- /dev/null +++ b/cpp/include/raft/core/mdspan_serializer.hpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace raft { + +template +void serialize_mdspan( + const raft::handle_t& handle, + std::ostream& os, + const raft::host_mdspan& obj) +{ + static_assert(std::is_same_v || + std::is_same_v, + "The serializer only supports row-major and column-major layouts"); + detail::numpy_serializer::serialize(handle, os, obj); +} + +template +void serialize_mdspan( + const raft::handle_t& handle, + std::ostream& os, + const raft::device_mdspan& obj) +{ + static_assert(std::is_same_v || + std::is_same_v, + "The serializer only supports row-major and column-major layouts"); + using obj_t = raft::device_mdspan; + + // Copy to host before serializing + // For contiguous layouts, size() == product of dimensions + std::vector tmp(obj.size()); + cudaStream_t stream = handle.get_stream(); + raft::update_host(tmp.data(), obj.data_handle(), obj.size(), stream); + using inner_accessor_type = typename obj_t::accessor_type::accessor_type; + auto tmp_mdspan = + raft::host_mdspan>( + tmp, obj.extents()); + detail::numpy_serializer::serialize(handle, os, tmp_mdspan); +} + +} // end namespace raft diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 8ca30a5c82..5908ca4128 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -93,6 +93,7 @@ if(BUILD_TESTS) test/core/nvtx.cpp test/core/mdarray.cu test/core/mdspan_utils.cu + test/core/mdspan_serializer.cu test/core/memory_type.cpp test/core/span.cpp test/core/span.cu diff --git a/cpp/test/core/mdspan_serializer.cu b/cpp/test/core/mdspan_serializer.cu new file mode 100644 index 0000000000..3861cc8948 --- /dev/null +++ b/cpp/test/core/mdspan_serializer.cu @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace raft { + +TEST(MDArraySerializer, Basic) +{ + raft::handle_t handle{}; + + std::vector vec{1.0, 2.0, 3.0, 4.0}; + + using mdspan_matrix2d_c_layout = + raft::host_mdspan, raft::layout_c_contiguous>; + auto span = mdspan_matrix2d_c_layout(vec.data(), 2, 2); + + std::ofstream of("/home/phcho/tmp/foobar.npy", std::ios::out | std::ios::binary); + serialize_mdspan(handle, of, span); +} + +TEST(MDArraySerializer, Tuple2String) +{ + { + std::vector tuple{}; + EXPECT_EQ(detail::numpy_serializer::tuple_to_string(tuple), "()"); + } + { + std::vector tuple{2}; + EXPECT_EQ(detail::numpy_serializer::tuple_to_string(tuple), "(2,)"); + } + { + std::vector tuple{2, 3}; + EXPECT_EQ(detail::numpy_serializer::tuple_to_string(tuple), "(2, 3)"); + } + { + std::vector tuple{2, 3, 10, 20}; + EXPECT_EQ(detail::numpy_serializer::tuple_to_string(tuple), "(2, 3, 10, 20)"); + } +} + +TEST(MDArraySerializer, NumPyDType) +{ + const char expected_endian_char = RAFT_SYSTEM_LITTLE_ENDIAN ? '<' : '>'; + { + const detail::numpy_serializer::dtype_t expected_dtype{ + expected_endian_char, 'f', sizeof(float)}; + EXPECT_EQ(detail::numpy_serializer::get_numpy_dtype(), expected_dtype); + } + { + const detail::numpy_serializer::dtype_t expected_dtype{ + expected_endian_char, 'f', sizeof(long double)}; + EXPECT_EQ(detail::numpy_serializer::get_numpy_dtype(), expected_dtype); + } + { + const detail::numpy_serializer::dtype_t expected_dtype{'|', 'i', sizeof(signed char)}; + EXPECT_EQ(detail::numpy_serializer::get_numpy_dtype(), expected_dtype); + } + { + const detail::numpy_serializer::dtype_t expected_dtype{ + expected_endian_char, 'i', sizeof(std::int64_t)}; + EXPECT_EQ(detail::numpy_serializer::get_numpy_dtype(), expected_dtype); + } + { + const detail::numpy_serializer::dtype_t expected_dtype{'|', 'u', sizeof(unsigned char)}; + EXPECT_EQ(detail::numpy_serializer::get_numpy_dtype(), expected_dtype); + } + { + const detail::numpy_serializer::dtype_t expected_dtype{ + expected_endian_char, 'u', sizeof(std::uint64_t)}; + EXPECT_EQ(detail::numpy_serializer::get_numpy_dtype(), expected_dtype); + } + { + const detail::numpy_serializer::dtype_t expected_dtype{ + expected_endian_char, 'c', sizeof(std::complex)}; + EXPECT_EQ(detail::numpy_serializer::get_numpy_dtype>(), expected_dtype); + } +} + +TEST(MDArraySerializer, WriteHeader) +{ + using namespace std::string_literals; + std::ostringstream oss; + detail::numpy_serializer::header_t header{{'<', 'f', 8}, false, {2, 10, 5}}; + detail::numpy_serializer::write_header(oss, header); + EXPECT_EQ(oss.str(), + "\x93NUMPY\x01\x00"s // magic string + version (1.0) + "\x46\x00"s // HEADER_LEN = 70, in little endian + "{'descr': ' Date: Tue, 24 Jan 2023 14:25:14 -0800 Subject: [PATCH 02/32] Implement deserializer --- .../core/detail/mdspan_numpy_serializer.hpp | 210 +++++++++++++++++- cpp/include/raft/core/mdspan_serializer.hpp | 36 +++ cpp/test/core/mdspan_serializer.cu | 95 +++++++- 3 files changed, 334 insertions(+), 7 deletions(-) diff --git a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp index 643cf9036d..2070e422b8 100644 --- a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp +++ b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp @@ -19,10 +19,12 @@ #include #include #include -#include +#include +#include #include #include #include +#include #include #include #include @@ -99,6 +101,11 @@ struct header_t { const dtype_t dtype; const bool fortran_order; const std::vector shape; + + bool operator==(const header_t& other) const + { + return (dtype == other.dtype && fortran_order == other.fortran_order && shape == other.shape); + } }; template @@ -163,6 +170,112 @@ std::string header_to_string(const header_t& header) return oss.str(); } +std::string trim(const std::string& str) +{ + const std::string whitespace = " \t"; + auto begin = str.find_first_not_of(whitespace); + if (begin == std::string::npos) { return ""; } + auto end = str.find_last_not_of(whitespace); + + return str.substr(begin, end - begin + 1); +} + +// A poor man's parser for Python dictionary +// TODO(hcho3): Consider writing a proper parser +// Limitation: can only parse a flat dictionary; all values are assumed to non-objects +// Limitation: must know all the keys ahead of time; you get undefined behavior if you omit any key +std::map parse_pydict(std::string str, + const std::vector& keys) +{ + std::map result; + + // Unwrap dictionary + str = trim(str); + RAFT_EXPECTS(str.front() == '{' && str.back() == '}', "Expected a Python dictionary"); + str = str.substr(1, str.length() - 2); + + // Get the position of each key and put it in the list + std::vector> positions; + for (auto const& key : keys) { + std::size_t pos = str.find("'" + key + "'"); + RAFT_EXPECTS(pos != std::string::npos, "Missing '%s' key.", key.c_str()); + positions.emplace_back(pos, key); + } + // Sort the list + std::sort(positions.begin(), positions.end()); + + // Extract each key-value pair + for (std::size_t i = 0; i < positions.size(); ++i) { + std::string key = positions[i].second; + + std::size_t begin = positions[i].first; + std::size_t end = (i + 1 < positions.size() ? positions[i + 1].first : std::string::npos); + std::string raw_value = trim(str.substr(begin, end - begin)); + if (raw_value.back() == ',') { raw_value.pop_back(); } + std::size_t sep_pos = raw_value.find_first_of(":"); + if (sep_pos == std::string::npos) { + result[key] = ""; + } else { + result[key] = trim(raw_value.substr(sep_pos + 1)); + } + } + + return result; +} + +std::string parse_pystring(std::string str) +{ + RAFT_EXPECTS(str.front() == '\'' && str.back() == '\'', "Invalid Python string: %s", str.c_str()); + return str.substr(1, str.length() - 2); +} + +bool parse_pybool(std::string str) +{ + if (str == "True") { + return true; + } else if (str == "False") { + return false; + } else { + RAFT_FAIL("Invalid Python boolean: %s", str.c_str()); + } +} + +std::vector parse_pytuple(std::string str) +{ + std::vector result; + + str = trim(str); + RAFT_EXPECTS(str.front() == '(' && str.back() == ')', "Invalid Python tuple: %s", str.c_str()); + str = str.substr(1, str.length() - 2); + + std::istringstream iss(str); + for (std::string token; std::getline(iss, token, ',');) { + result.push_back(trim(token)); + } + + return result; +} + +dtype_t parse_descr(std::string typestr) +{ + RAFT_EXPECTS(typestr.length() >= 3, "Invalid typestr: Too short"); + char byteorder_c = typestr.at(0); + char kind_c = typestr.at(1); + std::string itemsize_s = typestr.substr(2); + + RAFT_EXPECTS(std::find(std::begin(endian_chars), std::end(endian_chars), byteorder_c) != + std::end(endian_chars), + "Invalid typestr: unrecognized byteorder %c", + byteorder_c); + RAFT_EXPECTS(std::find(std::begin(numtype_chars), std::end(numtype_chars), kind_c) != + std::end(numtype_chars), + "Invalid typestr: unrecognized kind %c", + kind_c); + unsigned int itemsize = std::stoul(itemsize_s); + + return {byteorder_c, kind_c, itemsize}; +} + const char magic_string[] = "\x93NUMPY"; const std::size_t magic_string_length = 6; @@ -176,6 +289,23 @@ void write_magic(std::ostream& os) RAFT_EXPECTS(os.good(), "Error writing magic string"); } +void read_magic(std::istream& is) +{ + char magic_buf[magic_string_length + 2] = {0}; + is.read(magic_buf, magic_string_length + 2); + RAFT_EXPECTS(is.good(), "Error reading magic string"); + + RAFT_EXPECTS(std::memcmp(magic_buf, magic_string, magic_string_length) == 0, + "The given stream does not have a valid NumPy format."); + + std::uint8_t version_major = magic_buf[magic_string_length]; + std::uint8_t version_minor = magic_buf[magic_string_length + 1]; + RAFT_EXPECTS(version_major == 1 && version_minor == 0, + "Unsupported NumPy version: %d.%d", + version_major, + version_minor); +} + void write_header(std::ostream& os, const header_t& header) { std::string header_dict = header_to_string(header); @@ -200,6 +330,44 @@ void write_header(std::ostream& os, const header_t& header) RAFT_EXPECTS(os.good(), "Error writing header dict"); } +std::string read_header_bytes(std::istream& is) +{ + read_magic(is); + + // Read header length + std::uint8_t header_len_le16[2]; + is.read(reinterpret_cast(header_len_le16), 2); + RAFT_EXPECTS(is.good(), "Error while reading HEADER_LEN"); + const std::uint32_t header_length = (header_len_le16[0] << 0) | (header_len_le16[1] << 8); + + std::vector header_bytes(header_length); + is.read(header_bytes.data(), header_length); + RAFT_EXPECTS(is.good(), "Error while reading the header"); + + return std::string(header_bytes.data(), header_length); +} + +header_t read_header(std::istream& is) +{ + std::string header_bytes = read_header_bytes(is); + + // remove trailing newline + RAFT_EXPECTS(header_bytes.back() == '\n', "Invalid NumPy header"); + header_bytes.pop_back(); + + // parse the header dict + auto header_dict = parse_pydict(header_bytes, {"descr", "fortran_order", "shape"}); + dtype_t descr = parse_descr(parse_pystring(header_dict["descr"])); + bool fortran_order = parse_pybool(header_dict["fortran_order"]); + std::vector shape; + auto shape_tup_str = parse_pytuple(header_dict["shape"]); + for (const auto& e : shape_tup_str) { + shape.push_back(static_cast(std::stoul(e))); + } + + return {descr, fortran_order, shape}; +} + template void serialize(const raft::handle_t& handle, std::ostream& os, @@ -225,6 +393,46 @@ void serialize(const raft::handle_t& handle, RAFT_EXPECTS(os.good(), "Error writing content of mdspan"); } +template +void deserialize(const raft::handle_t& handle, + std::istream& is, + const raft::host_mdspan& obj) +{ + using obj_t = raft::host_mdspan; + using inner_accessor_type = typename obj_t::accessor_type::accessor_type; + static_assert( + std::is_same_v>, + "The serializer only supports serializing mdspans with default accessor"); + + // Check if given dtype and fortran_order are compatible with the mdspan + const auto expected_dtype = get_numpy_dtype(); + const bool expected_fortran_order = std::is_same_v; + header_t header = read_header(is); + RAFT_EXPECTS(header.dtype == expected_dtype, + "Expected dtype %s but got %s instead", + header.dtype.to_string().c_str(), + expected_dtype.to_string().c_str()); + RAFT_EXPECTS(header.fortran_order == expected_fortran_order, + "Wrong matrix layout; expected %s but got a different layout", + (expected_fortran_order ? "Fortran layout" : "C layout")); + + // Check if dimensions are correct + RAFT_EXPECTS(obj.rank() == header.shape.size(), + "Incorrect rank: expected %zu but got %zu", + obj.rank(), + header.shape.size()); + for (typename obj_t::rank_type i = 0; i < obj.rank(); ++i) { + RAFT_EXPECTS(obj.extent(i) == header.shape[i], + "Incorrect dimension: expected %zu but got %zu", + obj.extent(i), + header.shape[i]); + } + + // For contiguous layouts, size() == product of dimensions + is.read(reinterpret_cast(obj.data_handle()), obj.size() * sizeof(ElementType)); + RAFT_EXPECTS(is.good(), "Error while reading mdspan content"); +} + } // end namespace numpy_serializer } // end namespace detail } // end namespace raft diff --git a/cpp/include/raft/core/mdspan_serializer.hpp b/cpp/include/raft/core/mdspan_serializer.hpp index 7339c02997..d5b115ba7c 100644 --- a/cpp/include/raft/core/mdspan_serializer.hpp +++ b/cpp/include/raft/core/mdspan_serializer.hpp @@ -16,6 +16,7 @@ #pragma once +#include #include #include #include @@ -59,4 +60,39 @@ void serialize_mdspan( detail::numpy_serializer::serialize(handle, os, tmp_mdspan); } +template +void deserialize_mdspan(const raft::handle_t& handle, + std::istream& is, + raft::host_mdspan& obj) +{ + static_assert(std::is_same_v || + std::is_same_v, + "The serializer only supports row-major and column-major layouts"); + detail::numpy_serializer::deserialize(handle, is, obj); +} + +template +void deserialize_mdspan( + const raft::handle_t& handle, + std::istream& is, + raft::device_mdspan& obj) +{ + static_assert(std::is_same_v || + std::is_same_v, + "The serializer only supports row-major and column-major layouts"); + using obj_t = raft::device_mdspan; + + // Copy to device after serializing + // For contiguous layouts, size() == product of dimensions + std::vector tmp(obj.size()); + using inner_accessor_type = typename obj_t::accessor_type::accessor_type; + auto tmp_mdspan = + raft::host_mdspan>( + tmp, obj.extents()); + detail::numpy_serializer::deserialize(handle, is, tmp_mdspan); + + cudaStream_t stream = handle.get_stream(); + raft::update_device(obj.data_handle(), tmp.data(), obj.size(), stream); +} + } // end namespace raft diff --git a/cpp/test/core/mdspan_serializer.cu b/cpp/test/core/mdspan_serializer.cu index 3861cc8948..29c9da5fed 100644 --- a/cpp/test/core/mdspan_serializer.cu +++ b/cpp/test/core/mdspan_serializer.cu @@ -21,23 +21,72 @@ #include #include #include +#include #include #include namespace raft { -TEST(MDArraySerializer, Basic) +template +void test_mdspan_roundtrip(const raft::handle_t& handle, std::vector& vec, Args... dims) { - raft::handle_t handle{}; + std::vector vec2(vec.size()); + + auto span = MDSpanType(vec.data(), dims...); + std::ostringstream oss; + serialize_mdspan(handle, oss, span); - std::vector vec{1.0, 2.0, 3.0, 4.0}; + auto span2 = MDSpanType(vec2.data(), dims...); + std::istringstream iss(oss.str()); + deserialize_mdspan(handle, iss, span2); + EXPECT_EQ(vec, vec2); +} + +TEST(MDArraySerializer, E2ERoundTrip) +{ + raft::handle_t handle{}; + std::vector vec{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}; using mdspan_matrix2d_c_layout = raft::host_mdspan, raft::layout_c_contiguous>; - auto span = mdspan_matrix2d_c_layout(vec.data(), 2, 2); + using mdspan_matrix2d_f_layout = + raft::host_mdspan, raft::layout_f_contiguous>; + using mdspan_matrix3d_c_layout = + raft::host_mdspan, raft::layout_c_contiguous>; + using mdspan_matrix3d_f_layout = + raft::host_mdspan, raft::layout_f_contiguous>; - std::ofstream of("/home/phcho/tmp/foobar.npy", std::ios::out | std::ios::binary); - serialize_mdspan(handle, of, span); + test_mdspan_roundtrip(handle, vec, 2, 4); + test_mdspan_roundtrip(handle, vec, 2, 4); + test_mdspan_roundtrip(handle, vec, 1, 8); + test_mdspan_roundtrip(handle, vec, 1, 8); + test_mdspan_roundtrip(handle, vec, 2, 2, 2); + test_mdspan_roundtrip(handle, vec, 2, 2, 2); + test_mdspan_roundtrip(handle, vec, 1, 2, 4); + test_mdspan_roundtrip(handle, vec, 1, 2, 4); +} + +TEST(MDArraySerializer, HeaderRoundTrip) +{ + for (char byteorder : detail::numpy_serializer::endian_chars) { + for (char kind : detail::numpy_serializer::numtype_chars) { + for (unsigned int itemsize : std::vector{1, 2, 4, 8, 16}) { + for (bool fortran_order : std::vector{true, false}) { + for (const auto& shape : + std::vector>{ + {10}, {2, 2}, {10, 30, 100}, {}}) { + detail::numpy_serializer::dtype_t dtype{byteorder, kind, itemsize}; + detail::numpy_serializer::header_t header{dtype, fortran_order, shape}; + std::ostringstream oss; + detail::numpy_serializer::write_header(oss, header); + std::istringstream iss(oss.str()); + auto header2 = detail::numpy_serializer::read_header(iss); + EXPECT_EQ(header, header2); + } + } + } + } + } } TEST(MDArraySerializer, Tuple2String) @@ -112,4 +161,38 @@ TEST(MDArraySerializer, WriteHeader) ); } +TEST(MDArraySerializer, ParsePyDict) +{ + std::string dict{"{'apple': 2, 'pie': 'is', 'delicious': True, 'piece of': 'cake'}"}; + auto parse = + detail::numpy_serializer::parse_pydict(dict, {"apple", "pie", "delicious", "piece of"}); + auto expected_parse = std::map{ + {"apple", "2"}, {"pie", "'is'"}, {"delicious", "True"}, {"piece of", "'cake'"}}; + EXPECT_EQ(parse, expected_parse); +} + +TEST(MDArraySerializer, ParsePyString) +{ + EXPECT_EQ(detail::numpy_serializer::parse_pystring("'foobar'"), "foobar"); +} + +TEST(MDArraySerializer, ParsePyTuple) +{ + { + std::string tuple_str{"(2,)"}; + std::vector expected_parse{"2"}; + EXPECT_EQ(detail::numpy_serializer::parse_pytuple(tuple_str), expected_parse); + } + { + std::string tuple_str{"(2, 3)"}; + std::vector expected_parse{"2", "3"}; + EXPECT_EQ(detail::numpy_serializer::parse_pytuple(tuple_str), expected_parse); + } + { + std::string tuple_str{"(2, 3, 10, 20)"}; + std::vector expected_parse{"2", "3", "10", "20"}; + EXPECT_EQ(detail::numpy_serializer::parse_pytuple(tuple_str), expected_parse); + } +} + } // namespace raft From 9d4b332600ab27c9976799b16de717bb1ffe8065 Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Wed, 25 Jan 2023 15:28:55 -0800 Subject: [PATCH 03/32] Update ANN to use the new mdspan serializer --- .../core/detail/mdspan_numpy_serializer.hpp | 12 +-- cpp/include/raft/core/mdspan_serializer.hpp | 21 ++++- .../spatial/knn/detail/ann_serialization.h | 81 ------------------- .../spatial/knn/detail/ivf_flat_build.cuh | 25 +++--- .../raft/spatial/knn/detail/ivf_pq_build.cuh | 33 ++++---- 5 files changed, 51 insertions(+), 121 deletions(-) diff --git a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp index 2070e422b8..4607d36944 100644 --- a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp +++ b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp @@ -373,11 +373,7 @@ void serialize(const raft::handle_t& handle, std::ostream& os, const raft::host_mdspan& obj) { - using obj_t = raft::host_mdspan; - using inner_accessor_type = typename obj_t::accessor_type::accessor_type; - static_assert( - std::is_same_v>, - "The serializer only supports serializing mdspans with default accessor"); + using obj_t = raft::host_mdspan; const auto dtype = get_numpy_dtype(); const bool fortran_order = std::is_same_v; @@ -398,11 +394,7 @@ void deserialize(const raft::handle_t& handle, std::istream& is, const raft::host_mdspan& obj) { - using obj_t = raft::host_mdspan; - using inner_accessor_type = typename obj_t::accessor_type::accessor_type; - static_assert( - std::is_same_v>, - "The serializer only supports serializing mdspans with default accessor"); + using obj_t = raft::host_mdspan; // Check if given dtype and fortran_order are compatible with the mdspan const auto expected_dtype = get_numpy_dtype(); diff --git a/cpp/include/raft/core/mdspan_serializer.hpp b/cpp/include/raft/core/mdspan_serializer.hpp index d5b115ba7c..9e685e76b4 100644 --- a/cpp/include/raft/core/mdspan_serializer.hpp +++ b/cpp/include/raft/core/mdspan_serializer.hpp @@ -56,7 +56,7 @@ void serialize_mdspan( using inner_accessor_type = typename obj_t::accessor_type::accessor_type; auto tmp_mdspan = raft::host_mdspan>( - tmp, obj.extents()); + tmp.data(), obj.extents()); detail::numpy_serializer::serialize(handle, os, tmp_mdspan); } @@ -88,11 +88,28 @@ void deserialize_mdspan( using inner_accessor_type = typename obj_t::accessor_type::accessor_type; auto tmp_mdspan = raft::host_mdspan>( - tmp, obj.extents()); + tmp.data(), obj.extents()); detail::numpy_serializer::deserialize(handle, is, tmp_mdspan); cudaStream_t stream = handle.get_stream(); raft::update_device(obj.data_handle(), tmp.data(), obj.size(), stream); } +template +void deserialize_mdspan(const raft::handle_t& handle, + std::istream& is, + raft::host_mdspan&& obj) +{ + deserialize_mdspan(handle, is, obj); +} + +template +void deserialize_mdspan( + const raft::handle_t& handle, + std::istream& is, + raft::device_mdspan&& obj) +{ + deserialize_mdspan(handle, is, obj); +} + } // end namespace raft diff --git a/cpp/include/raft/spatial/knn/detail/ann_serialization.h b/cpp/include/raft/spatial/knn/detail/ann_serialization.h index cf2aeedcfc..6aa0083d0b 100644 --- a/cpp/include/raft/spatial/knn/detail/ann_serialization.h +++ b/cpp/include/raft/spatial/knn/detail/ann_serialization.h @@ -56,85 +56,4 @@ T read_scalar(std::ifstream& file) return value; } -template -void write_mdspan( - const raft::handle_t& handle, - std::ofstream& of, - const raft::device_mdspan& obj) -{ - using obj_t = raft::device_mdspan; - write_scalar(of, obj.rank()); - if (obj.is_exhaustive() && obj.is_unique()) { - write_scalar(of, obj.size()); - } else { - RAFT_FAIL("Cannot serialize non exhaustive mdarray"); - } - if (obj.size() > 0) { - for (typename obj_t::rank_type i = 0; i < obj.rank(); i++) - write_scalar(of, obj.extent(i)); - cudaStream_t stream = handle.get_stream(); - std::vector< - typename raft::device_mdspan::value_type> - tmp(obj.size()); - raft::update_host(tmp.data(), obj.data_handle(), obj.size(), stream); - handle.sync_stream(stream); - of.write(reinterpret_cast(tmp.data()), tmp.size() * sizeof(ElementType)); - if (of.good()) { - RAFT_LOG_DEBUG("Written %zu bytes", - static_cast(obj.size() * sizeof(obj.data_handle()[0]))); - } else { - RAFT_FAIL("Error writing mdarray to file"); - } - } else { - RAFT_LOG_DEBUG("Skipping mdspand with zero size"); - } -} - -template -void read_mdspan(const raft::handle_t& handle, - std::ifstream& file, - raft::device_mdspan& obj) -{ - using obj_t = raft::device_mdspan; - auto rank = read_scalar(file); - if (obj.rank() != rank) { RAFT_FAIL("Incorrect rank while reading mdarray"); } - auto size = read_scalar(file); - if (obj.size() != size) { - RAFT_FAIL("Incorrect rank while reading mdarray %zu vs %zu", - static_cast(size), - static_cast(obj.size())); - } - if (obj.size() > 0) { - for (typename obj_t::rank_type i = 0; i < obj.rank(); i++) { - auto ex = read_scalar(file); - if (obj.extent(i) != ex) { - RAFT_FAIL("Incorrect extent while reading mdarray %d vs %d at %d", - static_cast(ex), - static_cast(obj.extent(i)), - static_cast(i)); - } - } - cudaStream_t stream = handle.get_stream(); - std::vector tmp(obj.size()); - file.read(reinterpret_cast(tmp.data()), tmp.size() * sizeof(ElementType)); - raft::update_device(obj.data_handle(), tmp.data(), tmp.size(), stream); - handle.sync_stream(stream); - if (file.good()) { - RAFT_LOG_DEBUG("Read %zu bytes", - static_cast(obj.size() * sizeof(obj.data_handle()[0]))); - } else { - RAFT_FAIL("error reading mdarray from file"); - } - } else { - RAFT_LOG_DEBUG("Skipping mdspand with zero size"); - } -} - -template -void read_mdspan(const raft::handle_t& handle, - std::ifstream& file, - raft::device_mdspan&& obj) -{ - read_mdspan(handle, file, obj); -} } // namespace raft::spatial::knn::detail diff --git a/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh b/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh index 6e038db68f..c6e1943586 100644 --- a/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh +++ b/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -405,15 +406,15 @@ void save(const handle_t& handle, const std::string& filename, const index index index_.allocate(handle, n_rows); auto data = index_.data(); - read_mdspan(handle, infile, data); - read_mdspan(handle, infile, index_.indices()); - read_mdspan(handle, infile, index_.list_sizes()); - read_mdspan(handle, infile, index_.list_offsets()); - read_mdspan(handle, infile, index_.centers()); + deserialize_mdspan(handle, infile, data); + deserialize_mdspan(handle, infile, index_.indices()); + deserialize_mdspan(handle, infile, index_.list_sizes()); + deserialize_mdspan(handle, infile, index_.list_offsets()); + deserialize_mdspan(handle, infile, index_.centers()); bool has_norms = read_scalar(infile); if (has_norms) { if (!index_.center_norms()) { RAFT_FAIL("Error inconsistent center norms"); } else { auto center_norms = *index_.center_norms(); - read_mdspan(handle, infile, center_norms); + deserialize_mdspan(handle, infile, center_norms); } } infile.close(); diff --git a/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh b/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh index adbedf854f..9a8df98365 100644 --- a/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -1398,14 +1399,14 @@ void save(const handle_t& handle_, const std::string& filename, const index index handle_, metric, codebook_kind, n_lists, dim, pq_bits, pq_dim, n_nonempty_lists); index_.allocate(handle_, n_rows); - read_mdspan(handle_, infile, index_.pq_centers()); - read_mdspan(handle_, infile, index_.pq_dataset()); - read_mdspan(handle_, infile, index_.indices()); - read_mdspan(handle_, infile, index_.rotation_matrix()); - read_mdspan(handle_, infile, index_.list_offsets()); - read_mdspan(handle_, infile, index_.list_sizes()); - read_mdspan(handle_, infile, index_.centers()); - read_mdspan(handle_, infile, index_.centers_rot()); + deserialize_mdspan(handle_, infile, index_.pq_centers()); + deserialize_mdspan(handle_, infile, index_.pq_dataset()); + deserialize_mdspan(handle_, infile, index_.indices()); + deserialize_mdspan(handle_, infile, index_.rotation_matrix()); + deserialize_mdspan(handle_, infile, index_.list_offsets()); + deserialize_mdspan(handle_, infile, index_.list_sizes()); + deserialize_mdspan(handle_, infile, index_.centers()); + deserialize_mdspan(handle_, infile, index_.centers_rot()); infile.close(); From 71d1b0a834fe1317035b675e068e67e0454d54f4 Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Wed, 25 Jan 2023 15:33:23 -0800 Subject: [PATCH 04/32] Fix copyright year --- cpp/include/raft/spatial/knn/detail/ann_serialization.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/raft/spatial/knn/detail/ann_serialization.h b/cpp/include/raft/spatial/knn/detail/ann_serialization.h index 6aa0083d0b..5f499c32ea 100644 --- a/cpp/include/raft/spatial/knn/detail/ann_serialization.h +++ b/cpp/include/raft/spatial/knn/detail/ann_serialization.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 73913137491783d9866ab317cdc0459a9f739a98 Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Wed, 25 Jan 2023 15:53:37 -0800 Subject: [PATCH 05/32] Solve link error due to duplicated symbols --- .../core/detail/mdspan_numpy_serializer.hpp | 101 +++++++++--------- cpp/include/raft/core/mdspan_serializer.hpp | 22 ++-- cpp/test/core/mdspan_serializer.cu | 5 +- 3 files changed, 68 insertions(+), 60 deletions(-) diff --git a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp index 4607d36944..ec75abdae4 100644 --- a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp +++ b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp @@ -65,16 +65,16 @@ namespace numpy_serializer { * SOFTWARE. */ -const char little_endian_char = '<'; -const char big_endian_char = '>'; -const char no_endian_char = '|'; -const char endian_chars[] = {little_endian_char, big_endian_char, no_endian_char}; -const char numtype_chars[] = {'f', 'i', 'u', 'c'}; +#define RAFT_NUMPY_LITTLE_ENDIAN_CHAR '<' +#define RAFT_NUMPY_BIG_ENDIAN_CHAR '>' +#define RAFT_NUMPY_NO_ENDIAN_CHAR '|' +#define RAFT_NUMPY_MAGIC_STRING "\x93NUMPY" +#define RAFT_NUMPY_MAGIC_STRING_LENGTH 6 #if RAFT_SYSTEM_LITTLE_ENDIAN == 1 -const char host_endian_char = little_endian_char; -#else // RAFT_SYSTEM_LITTLE_ENDIAN == 1 -const char host_endian_char = big_endian_char; +#define RAFT_NUMPY_HOST_ENDIAN_CHAR RAFT_NUMPY_LITTLE_ENDIAN_CHAR +#else // RAFT_SYSTEM_LITTLE_ENDIAN == 1 +#define RAFT_NUMPY_HOST_ENDIAN_CHAR RAFT_NUMPY_BIG_ENDIAN_CHAR #endif // RAFT_SYSTEM_LITTLE_ENDIAN == 1 using ndarray_len_t = std::uint64_t; @@ -116,35 +116,37 @@ struct is_complex> : std::true_type { }; template , bool> = true> -dtype_t get_numpy_dtype() +inline dtype_t get_numpy_dtype() { - return {host_endian_char, 'f', sizeof(T)}; + return {RAFT_NUMPY_HOST_ENDIAN_CHAR, 'f', sizeof(T)}; } template && std::is_signed_v, bool> = true> -dtype_t get_numpy_dtype() +inline dtype_t get_numpy_dtype() { - const char endian_char = (sizeof(T) == 1 ? no_endian_char : host_endian_char); + const char endian_char = + (sizeof(T) == 1 ? RAFT_NUMPY_NO_ENDIAN_CHAR : RAFT_NUMPY_HOST_ENDIAN_CHAR); return {endian_char, 'i', sizeof(T)}; } template && std::is_unsigned_v, bool> = true> -dtype_t get_numpy_dtype() +inline dtype_t get_numpy_dtype() { - const char endian_char = (sizeof(T) == 1 ? no_endian_char : host_endian_char); + const char endian_char = + (sizeof(T) == 1 ? RAFT_NUMPY_NO_ENDIAN_CHAR : RAFT_NUMPY_HOST_ENDIAN_CHAR); return {endian_char, 'u', sizeof(T)}; } template {}, bool> = true> -dtype_t get_numpy_dtype() +inline dtype_t get_numpy_dtype() { - return {host_endian_char, 'c', sizeof(T)}; + return {RAFT_NUMPY_HOST_ENDIAN_CHAR, 'c', sizeof(T)}; } template -std::string tuple_to_string(const std::vector& tuple) +inline std::string tuple_to_string(const std::vector& tuple) { std::ostringstream oss; if (tuple.empty()) { @@ -161,7 +163,7 @@ std::string tuple_to_string(const std::vector& tuple) return oss.str(); } -std::string header_to_string(const header_t& header) +inline std::string header_to_string(const header_t& header) { std::ostringstream oss; oss << "{'descr': '" << header.dtype.to_string() @@ -170,7 +172,7 @@ std::string header_to_string(const header_t& header) return oss.str(); } -std::string trim(const std::string& str) +inline std::string trim(const std::string& str) { const std::string whitespace = " \t"; auto begin = str.find_first_not_of(whitespace); @@ -184,8 +186,8 @@ std::string trim(const std::string& str) // TODO(hcho3): Consider writing a proper parser // Limitation: can only parse a flat dictionary; all values are assumed to non-objects // Limitation: must know all the keys ahead of time; you get undefined behavior if you omit any key -std::map parse_pydict(std::string str, - const std::vector& keys) +inline std::map parse_pydict(std::string str, + const std::vector& keys) { std::map result; @@ -223,13 +225,13 @@ std::map parse_pydict(std::string str, return result; } -std::string parse_pystring(std::string str) +inline std::string parse_pystring(std::string str) { RAFT_EXPECTS(str.front() == '\'' && str.back() == '\'', "Invalid Python string: %s", str.c_str()); return str.substr(1, str.length() - 2); } -bool parse_pybool(std::string str) +inline bool parse_pybool(std::string str) { if (str == "True") { return true; @@ -240,7 +242,7 @@ bool parse_pybool(std::string str) } } -std::vector parse_pytuple(std::string str) +inline std::vector parse_pytuple(std::string str) { std::vector result; @@ -256,13 +258,17 @@ std::vector parse_pytuple(std::string str) return result; } -dtype_t parse_descr(std::string typestr) +inline dtype_t parse_descr(std::string typestr) { RAFT_EXPECTS(typestr.length() >= 3, "Invalid typestr: Too short"); char byteorder_c = typestr.at(0); char kind_c = typestr.at(1); std::string itemsize_s = typestr.substr(2); + const char endian_chars[] = { + RAFT_NUMPY_LITTLE_ENDIAN_CHAR, RAFT_NUMPY_BIG_ENDIAN_CHAR, RAFT_NUMPY_NO_ENDIAN_CHAR}; + const char numtype_chars[] = {'f', 'i', 'u', 'c'}; + RAFT_EXPECTS(std::find(std::begin(endian_chars), std::end(endian_chars), byteorder_c) != std::end(endian_chars), "Invalid typestr: unrecognized byteorder %c", @@ -276,12 +282,9 @@ dtype_t parse_descr(std::string typestr) return {byteorder_c, kind_c, itemsize}; } -const char magic_string[] = "\x93NUMPY"; -const std::size_t magic_string_length = 6; - -void write_magic(std::ostream& os) +inline void write_magic(std::ostream& os) { - os.write(magic_string, magic_string_length); + os.write(RAFT_NUMPY_MAGIC_STRING, RAFT_NUMPY_MAGIC_STRING_LENGTH); RAFT_EXPECTS(os.good(), "Error writing magic string"); // Use version 1.0 os.put(1); @@ -289,27 +292,27 @@ void write_magic(std::ostream& os) RAFT_EXPECTS(os.good(), "Error writing magic string"); } -void read_magic(std::istream& is) +inline void read_magic(std::istream& is) { - char magic_buf[magic_string_length + 2] = {0}; - is.read(magic_buf, magic_string_length + 2); + char magic_buf[RAFT_NUMPY_MAGIC_STRING_LENGTH + 2] = {0}; + is.read(magic_buf, RAFT_NUMPY_MAGIC_STRING_LENGTH + 2); RAFT_EXPECTS(is.good(), "Error reading magic string"); - RAFT_EXPECTS(std::memcmp(magic_buf, magic_string, magic_string_length) == 0, + RAFT_EXPECTS(std::memcmp(magic_buf, RAFT_NUMPY_MAGIC_STRING, RAFT_NUMPY_MAGIC_STRING_LENGTH) == 0, "The given stream does not have a valid NumPy format."); - std::uint8_t version_major = magic_buf[magic_string_length]; - std::uint8_t version_minor = magic_buf[magic_string_length + 1]; + std::uint8_t version_major = magic_buf[RAFT_NUMPY_MAGIC_STRING_LENGTH]; + std::uint8_t version_minor = magic_buf[RAFT_NUMPY_MAGIC_STRING_LENGTH + 1]; RAFT_EXPECTS(version_major == 1 && version_minor == 0, "Unsupported NumPy version: %d.%d", version_major, version_minor); } -void write_header(std::ostream& os, const header_t& header) +inline void write_header(std::ostream& os, const header_t& header) { std::string header_dict = header_to_string(header); - std::size_t preamble_length = magic_string_length + 2 + 2 + header_dict.length() + 1; + std::size_t preamble_length = RAFT_NUMPY_MAGIC_STRING_LENGTH + 2 + 2 + header_dict.length() + 1; RAFT_EXPECTS(preamble_length < 255 * 255, "Header too long"); std::size_t padding_len = 16 - preamble_length % 16; std::string padding(padding_len, ' '); @@ -330,7 +333,7 @@ void write_header(std::ostream& os, const header_t& header) RAFT_EXPECTS(os.good(), "Error writing header dict"); } -std::string read_header_bytes(std::istream& is) +inline std::string read_header_bytes(std::istream& is) { read_magic(is); @@ -347,7 +350,7 @@ std::string read_header_bytes(std::istream& is) return std::string(header_bytes.data(), header_length); } -header_t read_header(std::istream& is) +inline header_t read_header(std::istream& is) { std::string header_bytes = read_header_bytes(is); @@ -369,9 +372,10 @@ header_t read_header(std::istream& is) } template -void serialize(const raft::handle_t& handle, - std::ostream& os, - const raft::host_mdspan& obj) +inline void serialize( + const raft::handle_t& handle, + std::ostream& os, + const raft::host_mdspan& obj) { using obj_t = raft::host_mdspan; @@ -390,9 +394,10 @@ void serialize(const raft::handle_t& handle, } template -void deserialize(const raft::handle_t& handle, - std::istream& is, - const raft::host_mdspan& obj) +inline void deserialize( + const raft::handle_t& handle, + std::istream& is, + const raft::host_mdspan& obj) { using obj_t = raft::host_mdspan; @@ -414,9 +419,9 @@ void deserialize(const raft::handle_t& handle, obj.rank(), header.shape.size()); for (typename obj_t::rank_type i = 0; i < obj.rank(); ++i) { - RAFT_EXPECTS(obj.extent(i) == header.shape[i], + RAFT_EXPECTS(static_cast(obj.extent(i)) == header.shape[i], "Incorrect dimension: expected %zu but got %zu", - obj.extent(i), + static_cast(obj.extent(i)), header.shape[i]); } diff --git a/cpp/include/raft/core/mdspan_serializer.hpp b/cpp/include/raft/core/mdspan_serializer.hpp index 9e685e76b4..77c7f8ef33 100644 --- a/cpp/include/raft/core/mdspan_serializer.hpp +++ b/cpp/include/raft/core/mdspan_serializer.hpp @@ -26,7 +26,7 @@ namespace raft { template -void serialize_mdspan( +inline void serialize_mdspan( const raft::handle_t& handle, std::ostream& os, const raft::host_mdspan& obj) @@ -38,7 +38,7 @@ void serialize_mdspan( } template -void serialize_mdspan( +inline void serialize_mdspan( const raft::handle_t& handle, std::ostream& os, const raft::device_mdspan& obj) @@ -61,9 +61,10 @@ void serialize_mdspan( } template -void deserialize_mdspan(const raft::handle_t& handle, - std::istream& is, - raft::host_mdspan& obj) +inline void deserialize_mdspan( + const raft::handle_t& handle, + std::istream& is, + raft::host_mdspan& obj) { static_assert(std::is_same_v || std::is_same_v, @@ -72,7 +73,7 @@ void deserialize_mdspan(const raft::handle_t& handle, } template -void deserialize_mdspan( +inline void deserialize_mdspan( const raft::handle_t& handle, std::istream& is, raft::device_mdspan& obj) @@ -96,15 +97,16 @@ void deserialize_mdspan( } template -void deserialize_mdspan(const raft::handle_t& handle, - std::istream& is, - raft::host_mdspan&& obj) +inline void deserialize_mdspan( + const raft::handle_t& handle, + std::istream& is, + raft::host_mdspan&& obj) { deserialize_mdspan(handle, is, obj); } template -void deserialize_mdspan( +inline void deserialize_mdspan( const raft::handle_t& handle, std::istream& is, raft::device_mdspan&& obj) diff --git a/cpp/test/core/mdspan_serializer.cu b/cpp/test/core/mdspan_serializer.cu index 29c9da5fed..156f09b5ae 100644 --- a/cpp/test/core/mdspan_serializer.cu +++ b/cpp/test/core/mdspan_serializer.cu @@ -68,8 +68,9 @@ TEST(MDArraySerializer, E2ERoundTrip) TEST(MDArraySerializer, HeaderRoundTrip) { - for (char byteorder : detail::numpy_serializer::endian_chars) { - for (char kind : detail::numpy_serializer::numtype_chars) { + for (char byteorder : std::vector{ + RAFT_NUMPY_LITTLE_ENDIAN_CHAR, RAFT_NUMPY_BIG_ENDIAN_CHAR, RAFT_NUMPY_NO_ENDIAN_CHAR}) { + for (char kind : std::vector{'f', 'i', 'u', 'c'}) { for (unsigned int itemsize : std::vector{1, 2, 4, 8, 16}) { for (bool fortran_order : std::vector{true, false}) { for (const auto& shape : From 03efe03befe24dfdb441712c41b53993ef5023b4 Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Wed, 25 Jan 2023 16:35:41 -0800 Subject: [PATCH 06/32] Add gtest to test serializing device_mdspan --- cpp/test/core/mdspan_serializer.cu | 33 ++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/cpp/test/core/mdspan_serializer.cu b/cpp/test/core/mdspan_serializer.cu index 156f09b5ae..62628e5746 100644 --- a/cpp/test/core/mdspan_serializer.cu +++ b/cpp/test/core/mdspan_serializer.cu @@ -23,20 +23,22 @@ #include #include #include +#include +#include #include namespace raft { -template -void test_mdspan_roundtrip(const raft::handle_t& handle, std::vector& vec, Args... dims) +template +void test_mdspan_roundtrip(const raft::handle_t& handle, VectorType& vec, Args... dims) { - std::vector vec2(vec.size()); + VectorType vec2(vec.size()); - auto span = MDSpanType(vec.data(), dims...); + auto span = MDSpanType(thrust::raw_pointer_cast(vec.data()), dims...); std::ostringstream oss; serialize_mdspan(handle, oss, span); - auto span2 = MDSpanType(vec2.data(), dims...); + auto span2 = MDSpanType(thrust::raw_pointer_cast(vec2.data()), dims...); std::istringstream iss(oss.str()); deserialize_mdspan(handle, iss, span2); EXPECT_EQ(vec, vec2); @@ -45,7 +47,7 @@ void test_mdspan_roundtrip(const raft::handle_t& handle, std::vector& vec TEST(MDArraySerializer, E2ERoundTrip) { raft::handle_t handle{}; - std::vector vec{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}; + thrust::host_vector vec = std::vector{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}; using mdspan_matrix2d_c_layout = raft::host_mdspan, raft::layout_c_contiguous>; @@ -64,6 +66,25 @@ TEST(MDArraySerializer, E2ERoundTrip) test_mdspan_roundtrip(handle, vec, 2, 2, 2); test_mdspan_roundtrip(handle, vec, 1, 2, 4); test_mdspan_roundtrip(handle, vec, 1, 2, 4); + + using device_mdspan_matrix2d_c_layout = + raft::device_mdspan, raft::layout_c_contiguous>; + using device_mdspan_matrix2d_f_layout = + raft::device_mdspan, raft::layout_f_contiguous>; + using device_mdspan_matrix3d_c_layout = + raft::device_mdspan, raft::layout_c_contiguous>; + using device_mdspan_matrix3d_f_layout = + raft::device_mdspan, raft::layout_f_contiguous>; + + thrust::device_vector d_vec(vec); + test_mdspan_roundtrip(handle, d_vec, 2, 4); + test_mdspan_roundtrip(handle, d_vec, 2, 4); + test_mdspan_roundtrip(handle, d_vec, 1, 8); + test_mdspan_roundtrip(handle, d_vec, 1, 8); + test_mdspan_roundtrip(handle, d_vec, 2, 2, 2); + test_mdspan_roundtrip(handle, d_vec, 2, 2, 2); + test_mdspan_roundtrip(handle, d_vec, 1, 2, 4); + test_mdspan_roundtrip(handle, d_vec, 1, 2, 4); } TEST(MDArraySerializer, HeaderRoundTrip) From 1b22df488ca44e27e0fa148273b7c51b41e3ceba Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Wed, 25 Jan 2023 19:51:22 -0800 Subject: [PATCH 07/32] Rename header to serialize.hpp --- cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp | 2 +- .../raft/core/{mdspan_serializer.hpp => serialize.hpp} | 4 ++++ cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh | 2 +- cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh | 2 +- cpp/test/core/mdspan_serializer.cu | 2 +- 5 files changed, 8 insertions(+), 4 deletions(-) rename cpp/include/raft/core/{mdspan_serializer.hpp => serialize.hpp} (98%) diff --git a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp index ec75abdae4..6b32b04ae8 100644 --- a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp +++ b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp @@ -35,7 +35,7 @@ namespace detail { namespace numpy_serializer { -/* +/** * A small implementation of NumPy serialization format. * Reference: https://numpy.org/doc/1.13/neps/npy-format.html * diff --git a/cpp/include/raft/core/mdspan_serializer.hpp b/cpp/include/raft/core/serialize.hpp similarity index 98% rename from cpp/include/raft/core/mdspan_serializer.hpp rename to cpp/include/raft/core/serialize.hpp index 77c7f8ef33..0811c234eb 100644 --- a/cpp/include/raft/core/mdspan_serializer.hpp +++ b/cpp/include/raft/core/serialize.hpp @@ -23,6 +23,10 @@ #include #include +/** + * Collection of serialization functions for RAFT data types + */ + namespace raft { template diff --git a/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh b/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh index c6e1943586..ac52657c4b 100644 --- a/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh +++ b/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh @@ -24,9 +24,9 @@ #include #include #include -#include #include #include +#include #include #include #include diff --git a/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh b/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh index 9a8df98365..8771fbc9c2 100644 --- a/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh @@ -25,9 +25,9 @@ #include #include #include -#include #include #include +#include #include #include #include diff --git a/cpp/test/core/mdspan_serializer.cu b/cpp/test/core/mdspan_serializer.cu index 62628e5746..2341547680 100644 --- a/cpp/test/core/mdspan_serializer.cu +++ b/cpp/test/core/mdspan_serializer.cu @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include From cf13fd5c03d1f5177c45d26e780936d2aaa0c064 Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Wed, 25 Jan 2023 20:30:40 -0800 Subject: [PATCH 08/32] Implement scalar serializer --- .../core/detail/mdspan_numpy_serializer.hpp | 13 +++- cpp/include/raft/core/serialize.hpp | 23 +++++++- .../spatial/knn/detail/ann_serialization.h | 59 ------------------- .../spatial/knn/detail/ivf_flat_build.cuh | 38 ++++++------ .../raft/spatial/knn/detail/ivf_pq_build.cuh | 41 ++++++------- cpp/test/core/mdspan_serializer.cu | 13 ++-- 6 files changed, 81 insertions(+), 106 deletions(-) delete mode 100644 cpp/include/raft/spatial/knn/detail/ann_serialization.h diff --git a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp index 6b32b04ae8..2f9bc66203 100644 --- a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp +++ b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp @@ -16,14 +16,15 @@ #pragma once +#include +#include +#include + #include #include #include #include #include -#include -#include -#include #include #include #include @@ -145,6 +146,12 @@ inline dtype_t get_numpy_dtype() return {RAFT_NUMPY_HOST_ENDIAN_CHAR, 'c', sizeof(T)}; } +template , bool> = true> +inline dtype_t get_numpy_dtype() +{ + return get_numpy_dtype>(); +} + template inline std::string tuple_to_string(const std::vector& tuple) { diff --git a/cpp/include/raft/core/serialize.hpp b/cpp/include/raft/core/serialize.hpp index 0811c234eb..f31d6bb681 100644 --- a/cpp/include/raft/core/serialize.hpp +++ b/cpp/include/raft/core/serialize.hpp @@ -16,11 +16,12 @@ #pragma once -#include #include #include #include #include + +#include #include /** @@ -118,4 +119,24 @@ inline void deserialize_mdspan( deserialize_mdspan(handle, is, obj); } +template +void serialize_scalar(const raft::handle_t& handle, std::ostream& os, const T& value) +{ + using mdspan_1d_c_layout = + raft::host_mdspan, raft::layout_c_contiguous>; + auto tmp_mdspan = mdspan_1d_c_layout(&value, 1); + serialize_mdspan(handle, os, tmp_mdspan); +} + +template +T deserialize_scalar(const raft::handle_t& handle, std::istream& is) +{ + T value; + using mdspan_1d_c_layout = + raft::host_mdspan, raft::layout_c_contiguous>; + auto tmp_mdspan = mdspan_1d_c_layout(&value, 1); + deserialize_mdspan(handle, is, tmp_mdspan); + return value; +} + } // end namespace raft diff --git a/cpp/include/raft/spatial/knn/detail/ann_serialization.h b/cpp/include/raft/spatial/knn/detail/ann_serialization.h deleted file mode 100644 index 5f499c32ea..0000000000 --- a/cpp/include/raft/spatial/knn/detail/ann_serialization.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace raft::spatial::knn::detail { - -template -void write_scalar(std::ofstream& of, const T& value) -{ - of.write((char*)&value, sizeof value); - if (of.good()) { - RAFT_LOG_DEBUG("Written %z bytes", (sizeof value)); - } else { - RAFT_FAIL("error writing value to file"); - } -} - -template -T read_scalar(std::ifstream& file) -{ - T value; - file.read((char*)&value, sizeof value); - if (file.good()) { - RAFT_LOG_DEBUG("Read %z bytes", (sizeof value)); - } else { - RAFT_FAIL("error reading value from file"); - } - return value; -} - -} // namespace raft::spatial::knn::detail diff --git a/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh b/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh index ac52657c4b..ade0bdc5eb 100644 --- a/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh +++ b/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh @@ -18,7 +18,6 @@ #include "../ivf_flat_types.hpp" #include "ann_kmeans_balanced.cuh" -#include "ann_serialization.h" #include "ann_utils.cuh" #include @@ -35,6 +34,9 @@ #include +#include +#include + namespace raft::spatial::knn::ivf_flat::detail { using namespace raft::spatial::knn::detail; // NOLINT @@ -399,13 +401,13 @@ void save(const handle_t& handle, const std::string& filename, const index(index_.size()), index_.dim()); - write_scalar(of, serialization_version); - write_scalar(of, index_.size()); - write_scalar(of, index_.dim()); - write_scalar(of, index_.n_lists()); - write_scalar(of, index_.metric()); - write_scalar(of, index_.veclen()); - write_scalar(of, index_.adaptive_centers()); + serialize_scalar(handle, of, serialization_version); + serialize_scalar(handle, of, index_.size()); + serialize_scalar(handle, of, index_.dim()); + serialize_scalar(handle, of, index_.n_lists()); + serialize_scalar(handle, of, index_.metric()); + serialize_scalar(handle, of, index_.veclen()); + serialize_scalar(handle, of, index_.adaptive_centers()); serialize_mdspan(handle, of, index_.data()); serialize_mdspan(handle, of, index_.indices()); serialize_mdspan(handle, of, index_.list_sizes()); @@ -413,11 +415,11 @@ void save(const handle_t& handle, const std::string& filename, const index index if (!infile) { RAFT_FAIL("Cannot open %s", filename.c_str()); } - auto ver = read_scalar(infile); + auto ver = deserialize_scalar(handle, infile); if (ver != serialization_version) { RAFT_FAIL("serialization version mismatch, expected %d, got %d ", serialization_version, ver); } - auto n_rows = read_scalar(infile); - auto dim = read_scalar(infile); - auto n_lists = read_scalar(infile); - auto metric = read_scalar(infile); - auto veclen = read_scalar(infile); - bool adaptive_centers = read_scalar(infile); + auto n_rows = deserialize_scalar(handle, infile); + auto dim = deserialize_scalar(handle, infile); + auto n_lists = deserialize_scalar(handle, infile); + auto metric = deserialize_scalar(handle, infile); + auto veclen = deserialize_scalar(handle, infile); + bool adaptive_centers = deserialize_scalar(handle, infile); index index_ = raft::spatial::knn::ivf_flat::index(handle, metric, n_lists, adaptive_centers, dim); @@ -460,7 +462,7 @@ auto load(const handle_t& handle, const std::string& filename) -> index deserialize_mdspan(handle, infile, index_.list_sizes()); deserialize_mdspan(handle, infile, index_.list_offsets()); deserialize_mdspan(handle, infile, index_.centers()); - bool has_norms = read_scalar(infile); + bool has_norms = deserialize_scalar(handle, infile); if (has_norms) { if (!index_.center_norms()) { RAFT_FAIL("Error inconsistent center norms"); diff --git a/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh b/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh index 8771fbc9c2..e094527af4 100644 --- a/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh @@ -17,7 +17,6 @@ #pragma once #include "ann_kmeans_balanced.cuh" -#include "ann_serialization.h" #include "ann_utils.cuh" #include @@ -56,6 +55,8 @@ #include #include +#include +#include #include namespace raft::spatial::knn::ivf_pq::detail { @@ -1388,16 +1389,16 @@ void save(const handle_t& handle_, const std::string& filename, const index(index_.pq_dim()), static_cast(index_.pq_bits())); - write_scalar(of, serialization_version); - write_scalar(of, index_.size()); - write_scalar(of, index_.dim()); - write_scalar(of, index_.pq_bits()); - write_scalar(of, index_.pq_dim()); + serialize_scalar(handle_, of, serialization_version); + serialize_scalar(handle_, of, index_.size()); + serialize_scalar(handle_, of, index_.dim()); + serialize_scalar(handle_, of, index_.pq_bits()); + serialize_scalar(handle_, of, index_.pq_dim()); - write_scalar(of, index_.metric()); - write_scalar(of, index_.codebook_kind()); - write_scalar(of, index_.n_lists()); - write_scalar(of, index_.n_nonempty_lists()); + serialize_scalar(handle_, of, index_.metric()); + serialize_scalar(handle_, of, index_.codebook_kind()); + serialize_scalar(handle_, of, index_.n_lists()); + serialize_scalar(handle_, of, index_.n_nonempty_lists()); serialize_mdspan(handle_, of, index_.pq_centers()); serialize_mdspan(handle_, of, index_.pq_dataset()); @@ -1430,22 +1431,22 @@ auto load(const handle_t& handle_, const std::string& filename) -> index if (!infile) { RAFT_FAIL("Cannot open file %s", filename.c_str()); } - auto ver = read_scalar(infile); + auto ver = deserialize_scalar(handle_, infile); if (ver != serialization_version) { RAFT_FAIL("serialization version mismatch %d vs. %d", ver, serialization_version); } - auto n_rows = read_scalar(infile); - auto dim = read_scalar(infile); - auto pq_bits = read_scalar(infile); - auto pq_dim = read_scalar(infile); + auto n_rows = deserialize_scalar(handle_, infile); + auto dim = deserialize_scalar(handle_, infile); + auto pq_bits = deserialize_scalar(handle_, infile); + auto pq_dim = deserialize_scalar(handle_, infile); - auto metric = read_scalar(infile); - auto codebook_kind = read_scalar(infile); - auto n_lists = read_scalar(infile); - auto n_nonempty_lists = read_scalar(infile); + auto metric = deserialize_scalar(handle_, infile); + auto codebook_kind = deserialize_scalar(handle_, infile); + auto n_lists = deserialize_scalar(handle_, infile); + auto n_nonempty_lists = deserialize_scalar(handle_, infile); RAFT_LOG_DEBUG("n_rows %zu, dim %d, pq_dim %d, pq_bits %d, n_lists %d", - static_cast(n_rows), + static_cast(n_rows), static_cast(dim), static_cast(pq_dim), static_cast(pq_bits), diff --git a/cpp/test/core/mdspan_serializer.cu b/cpp/test/core/mdspan_serializer.cu index 2341547680..77defd00e3 100644 --- a/cpp/test/core/mdspan_serializer.cu +++ b/cpp/test/core/mdspan_serializer.cu @@ -14,17 +14,20 @@ * limitations under the License. */ -#include -#include -#include #include + #include #include #include -#include -#include + #include #include + +#include +#include +#include +#include +#include #include namespace raft { From d9062b25f11f52cf412419bffb2ad103b606daa3 Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Thu, 26 Jan 2023 08:32:14 -0800 Subject: [PATCH 09/32] Use device_resources instead of handle_t --- .../core/detail/mdspan_numpy_serializer.hpp | 6 +++--- cpp/include/raft/core/serialize.hpp | 18 +++++++++--------- cpp/test/core/mdspan_serializer.cu | 6 +++--- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp index 2f9bc66203..5574abddb5 100644 --- a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp +++ b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp @@ -17,7 +17,7 @@ #pragma once #include -#include +#include #include #include @@ -380,7 +380,7 @@ inline header_t read_header(std::istream& is) template inline void serialize( - const raft::handle_t& handle, + const raft::device_resources& handle, std::ostream& os, const raft::host_mdspan& obj) { @@ -402,7 +402,7 @@ inline void serialize( template inline void deserialize( - const raft::handle_t& handle, + const raft::device_resources& handle, std::istream& is, const raft::host_mdspan& obj) { diff --git a/cpp/include/raft/core/serialize.hpp b/cpp/include/raft/core/serialize.hpp index f31d6bb681..7a0c0f7970 100644 --- a/cpp/include/raft/core/serialize.hpp +++ b/cpp/include/raft/core/serialize.hpp @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include @@ -32,7 +32,7 @@ namespace raft { template inline void serialize_mdspan( - const raft::handle_t& handle, + const raft::device_resources& handle, std::ostream& os, const raft::host_mdspan& obj) { @@ -44,7 +44,7 @@ inline void serialize_mdspan( template inline void serialize_mdspan( - const raft::handle_t& handle, + const raft::device_resources& handle, std::ostream& os, const raft::device_mdspan& obj) { @@ -67,7 +67,7 @@ inline void serialize_mdspan( template inline void deserialize_mdspan( - const raft::handle_t& handle, + const raft::device_resources& handle, std::istream& is, raft::host_mdspan& obj) { @@ -79,7 +79,7 @@ inline void deserialize_mdspan( template inline void deserialize_mdspan( - const raft::handle_t& handle, + const raft::device_resources& handle, std::istream& is, raft::device_mdspan& obj) { @@ -103,7 +103,7 @@ inline void deserialize_mdspan( template inline void deserialize_mdspan( - const raft::handle_t& handle, + const raft::device_resources& handle, std::istream& is, raft::host_mdspan&& obj) { @@ -112,7 +112,7 @@ inline void deserialize_mdspan( template inline void deserialize_mdspan( - const raft::handle_t& handle, + const raft::device_resources& handle, std::istream& is, raft::device_mdspan&& obj) { @@ -120,7 +120,7 @@ inline void deserialize_mdspan( } template -void serialize_scalar(const raft::handle_t& handle, std::ostream& os, const T& value) +void serialize_scalar(const raft::device_resources& handle, std::ostream& os, const T& value) { using mdspan_1d_c_layout = raft::host_mdspan, raft::layout_c_contiguous>; @@ -129,7 +129,7 @@ void serialize_scalar(const raft::handle_t& handle, std::ostream& os, const T& v } template -T deserialize_scalar(const raft::handle_t& handle, std::istream& is) +T deserialize_scalar(const raft::device_resources& handle, std::istream& is) { T value; using mdspan_1d_c_layout = diff --git a/cpp/test/core/mdspan_serializer.cu b/cpp/test/core/mdspan_serializer.cu index 77defd00e3..c6a1ec8c7b 100644 --- a/cpp/test/core/mdspan_serializer.cu +++ b/cpp/test/core/mdspan_serializer.cu @@ -16,7 +16,7 @@ #include -#include +#include #include #include @@ -33,7 +33,7 @@ namespace raft { template -void test_mdspan_roundtrip(const raft::handle_t& handle, VectorType& vec, Args... dims) +void test_mdspan_roundtrip(const raft::device_resources& handle, VectorType& vec, Args... dims) { VectorType vec2(vec.size()); @@ -49,7 +49,7 @@ void test_mdspan_roundtrip(const raft::handle_t& handle, VectorType& vec, Args.. TEST(MDArraySerializer, E2ERoundTrip) { - raft::handle_t handle{}; + raft::device_resources handle{}; thrust::host_vector vec = std::vector{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}; using mdspan_matrix2d_c_layout = From b3477d05663633cc93838ad2244be9ec4bbd76f5 Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Thu, 26 Jan 2023 09:27:16 -0800 Subject: [PATCH 10/32] Use 64-bit alignment --- .../raft/core/detail/mdspan_numpy_serializer.hpp | 5 +++-- cpp/test/core/mdspan_serializer.cu | 10 +++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp index 5574abddb5..9e27eeba30 100644 --- a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp +++ b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp @@ -38,7 +38,7 @@ namespace numpy_serializer { /** * A small implementation of NumPy serialization format. - * Reference: https://numpy.org/doc/1.13/neps/npy-format.html + * Reference: https://numpy.org/doc/1.23/reference/generated/numpy.lib.format.html * * Adapted from https://github.com/llohse/libnpy/blob/master/include/npy.hpp, using the following * license: @@ -321,7 +321,8 @@ inline void write_header(std::ostream& os, const header_t& header) std::string header_dict = header_to_string(header); std::size_t preamble_length = RAFT_NUMPY_MAGIC_STRING_LENGTH + 2 + 2 + header_dict.length() + 1; RAFT_EXPECTS(preamble_length < 255 * 255, "Header too long"); - std::size_t padding_len = 16 - preamble_length % 16; + // Enforce 64-byte alignment + std::size_t padding_len = 64 - preamble_length % 64; std::string padding(padding_len, ' '); write_magic(os); diff --git a/cpp/test/core/mdspan_serializer.cu b/cpp/test/core/mdspan_serializer.cu index c6a1ec8c7b..6afb9723b6 100644 --- a/cpp/test/core/mdspan_serializer.cu +++ b/cpp/test/core/mdspan_serializer.cu @@ -180,10 +180,14 @@ TEST(MDArraySerializer, WriteHeader) detail::numpy_serializer::write_header(oss, header); EXPECT_EQ(oss.str(), "\x93NUMPY\x01\x00"s // magic string + version (1.0) - "\x46\x00"s // HEADER_LEN = 70, in little endian + "\x76\x00"s // HEADER_LEN = 118, in little endian "{'descr': ' Date: Thu, 26 Jan 2023 09:29:14 -0800 Subject: [PATCH 11/32] Move static_assert --- cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp | 8 ++++++++ cpp/include/raft/core/serialize.hpp | 6 ------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp index 9e27eeba30..6a492a788e 100644 --- a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp +++ b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp @@ -385,6 +385,10 @@ inline void serialize( std::ostream& os, const raft::host_mdspan& obj) { + static_assert(std::is_same_v || + std::is_same_v, + "The serializer only supports row-major and column-major layouts"); + using obj_t = raft::host_mdspan; const auto dtype = get_numpy_dtype(); @@ -407,6 +411,10 @@ inline void deserialize( std::istream& is, const raft::host_mdspan& obj) { + static_assert(std::is_same_v || + std::is_same_v, + "The serializer only supports row-major and column-major layouts"); + using obj_t = raft::host_mdspan; // Check if given dtype and fortran_order are compatible with the mdspan diff --git a/cpp/include/raft/core/serialize.hpp b/cpp/include/raft/core/serialize.hpp index 7a0c0f7970..810aef96d2 100644 --- a/cpp/include/raft/core/serialize.hpp +++ b/cpp/include/raft/core/serialize.hpp @@ -36,9 +36,6 @@ inline void serialize_mdspan( std::ostream& os, const raft::host_mdspan& obj) { - static_assert(std::is_same_v || - std::is_same_v, - "The serializer only supports row-major and column-major layouts"); detail::numpy_serializer::serialize(handle, os, obj); } @@ -71,9 +68,6 @@ inline void deserialize_mdspan( std::istream& is, raft::host_mdspan& obj) { - static_assert(std::is_same_v || - std::is_same_v, - "The serializer only supports row-major and column-major layouts"); detail::numpy_serializer::deserialize(handle, is, obj); } From 1770f1226fcb4e30668e8dd77b60dce6aee3fbe3 Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Thu, 26 Jan 2023 09:31:05 -0800 Subject: [PATCH 12/32] Ensure data have finished copying --- cpp/include/raft/core/serialize.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/include/raft/core/serialize.hpp b/cpp/include/raft/core/serialize.hpp index 810aef96d2..ea97ebb79c 100644 --- a/cpp/include/raft/core/serialize.hpp +++ b/cpp/include/raft/core/serialize.hpp @@ -55,6 +55,7 @@ inline void serialize_mdspan( std::vector tmp(obj.size()); cudaStream_t stream = handle.get_stream(); raft::update_host(tmp.data(), obj.data_handle(), obj.size(), stream); + handle.sync_stream(); using inner_accessor_type = typename obj_t::accessor_type::accessor_type; auto tmp_mdspan = raft::host_mdspan>( @@ -93,6 +94,7 @@ inline void deserialize_mdspan( cudaStream_t stream = handle.get_stream(); raft::update_device(obj.data_handle(), tmp.data(), obj.size(), stream); + handle.sync_stream(); } template From f53957abd8318ee38762e0ae40a1d4fcee98072f Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Thu, 26 Jan 2023 10:11:26 -0800 Subject: [PATCH 13/32] Serialize scalars as 0D NumPy array --- .../core/detail/mdspan_numpy_serializer.hpp | 36 ++++++++++++++++--- cpp/include/raft/core/serialize.hpp | 28 ++++++--------- 2 files changed, 42 insertions(+), 22 deletions(-) diff --git a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp index 6a492a788e..47865822d0 100644 --- a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp +++ b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp @@ -380,8 +380,7 @@ inline header_t read_header(std::istream& is) } template -inline void serialize( - const raft::device_resources& handle, +inline void serialize_host_mdspan( std::ostream& os, const raft::host_mdspan& obj) { @@ -405,9 +404,20 @@ inline void serialize( RAFT_EXPECTS(os.good(), "Error writing content of mdspan"); } +template +inline void serialize_scalar(std::ostream& os, const T& value) +{ + const auto dtype = get_numpy_dtype(); + const bool fortran_order = false; + const std::vector shape{}; + const header_t header = {dtype, fortran_order, shape}; + write_header(os, header); + os.write(reinterpret_cast(&value), sizeof(T)); + RAFT_EXPECTS(os.good(), "Error serializing a scalar"); +} + template -inline void deserialize( - const raft::device_resources& handle, +inline void deserialize_host_mdspan( std::istream& is, const raft::host_mdspan& obj) { @@ -446,6 +456,24 @@ inline void deserialize( RAFT_EXPECTS(is.good(), "Error while reading mdspan content"); } +template +inline T deserialize_scalar(std::istream& is) +{ + // Check if dtype is correct + const auto expected_dtype = get_numpy_dtype(); + header_t header = read_header(is); + RAFT_EXPECTS(header.dtype == expected_dtype, + "Expected dtype %s but got %s instead", + header.dtype.to_string().c_str(), + expected_dtype.to_string().c_str()); + // Check if dimensions are correct; shape should be () + RAFT_EXPECTS(header.shape.empty(), "Incorrect rank: expected 0 but got %zu", header.shape.size()); + + T value; + is.read(reinterpret_cast(&value), sizeof(T)); + RAFT_EXPECTS(is.good(), "Error while deserializing scalar"); +} + } // end namespace numpy_serializer } // end namespace detail } // end namespace raft diff --git a/cpp/include/raft/core/serialize.hpp b/cpp/include/raft/core/serialize.hpp index ea97ebb79c..de4728605a 100644 --- a/cpp/include/raft/core/serialize.hpp +++ b/cpp/include/raft/core/serialize.hpp @@ -32,11 +32,11 @@ namespace raft { template inline void serialize_mdspan( - const raft::device_resources& handle, + const raft::device_resources&, std::ostream& os, const raft::host_mdspan& obj) { - detail::numpy_serializer::serialize(handle, os, obj); + detail::numpy_serializer::serialize_host_mdspan(os, obj); } template @@ -60,16 +60,16 @@ inline void serialize_mdspan( auto tmp_mdspan = raft::host_mdspan>( tmp.data(), obj.extents()); - detail::numpy_serializer::serialize(handle, os, tmp_mdspan); + detail::numpy_serializer::serialize_host_mdspan(os, tmp_mdspan); } template inline void deserialize_mdspan( - const raft::device_resources& handle, + const raft::device_resources&, std::istream& is, raft::host_mdspan& obj) { - detail::numpy_serializer::deserialize(handle, is, obj); + detail::numpy_serializer::deserialize_host_mdspan(is, obj); } template @@ -90,7 +90,7 @@ inline void deserialize_mdspan( auto tmp_mdspan = raft::host_mdspan>( tmp.data(), obj.extents()); - detail::numpy_serializer::deserialize(handle, is, tmp_mdspan); + detail::numpy_serializer::deserialize_host_mdspan(is, tmp_mdspan); cudaStream_t stream = handle.get_stream(); raft::update_device(obj.data_handle(), tmp.data(), obj.size(), stream); @@ -116,23 +116,15 @@ inline void deserialize_mdspan( } template -void serialize_scalar(const raft::device_resources& handle, std::ostream& os, const T& value) +inline void serialize_scalar(const raft::device_resources&, std::ostream& os, const T& value) { - using mdspan_1d_c_layout = - raft::host_mdspan, raft::layout_c_contiguous>; - auto tmp_mdspan = mdspan_1d_c_layout(&value, 1); - serialize_mdspan(handle, os, tmp_mdspan); + detail::numpy_serializer::serialize_scalar(os, value); } template -T deserialize_scalar(const raft::device_resources& handle, std::istream& is) +inline T deserialize_scalar(const raft::device_resources&, std::istream& is) { - T value; - using mdspan_1d_c_layout = - raft::host_mdspan, raft::layout_c_contiguous>; - auto tmp_mdspan = mdspan_1d_c_layout(&value, 1); - deserialize_mdspan(handle, is, tmp_mdspan); - return value; + return detail::numpy_serializer::deserialize_scalar(is); } } // end namespace raft From 2cf24e5bf250e3d463ca9dfd8049d5553cb15cc2 Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Thu, 26 Jan 2023 10:28:34 -0800 Subject: [PATCH 14/32] Check endianness when deserializing --- cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp index 47865822d0..2391dc5e1c 100644 --- a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp +++ b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp @@ -376,6 +376,14 @@ inline header_t read_header(std::istream& is) shape.push_back(static_cast(std::stoul(e))); } + RAFT_EXPECTS( + descr.byteorder == RAFT_NUMPY_HOST_ENDIAN_CHAR, + "The mdspan was serialized on a %s machine but you're attempting to load it on " + "a %s machine. This use case is not currently supported.", + (RAFT_NUMPY_HOST_ENDIAN_CHAR == RAFT_NUMPY_LITTLE_ENDIAN_CHAR ? "big-endian" : "little-endian"), + (RAFT_NUMPY_HOST_ENDIAN_CHAR == RAFT_NUMPY_LITTLE_ENDIAN_CHAR ? "little-endian" + : "big-endian")); + return {descr, fortran_order, shape}; } From 3f129d4c7d9e774aead8da116f2f5f6b5be0229c Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Thu, 26 Jan 2023 13:41:03 -0800 Subject: [PATCH 15/32] Add missing return --- cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp index 2391dc5e1c..b3c26c3b53 100644 --- a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp +++ b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp @@ -480,6 +480,7 @@ inline T deserialize_scalar(std::istream& is) T value; is.read(reinterpret_cast(&value), sizeof(T)); RAFT_EXPECTS(is.good(), "Error while deserializing scalar"); + return value; } } // end namespace numpy_serializer From b1d0a75f97ed3aff8b460b4c43f44ebab4b85fec Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Thu, 26 Jan 2023 14:07:16 -0800 Subject: [PATCH 16/32] Fix gtest --- cpp/test/core/mdspan_serializer.cu | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/cpp/test/core/mdspan_serializer.cu b/cpp/test/core/mdspan_serializer.cu index 6afb9723b6..8981c1dfce 100644 --- a/cpp/test/core/mdspan_serializer.cu +++ b/cpp/test/core/mdspan_serializer.cu @@ -92,22 +92,19 @@ TEST(MDArraySerializer, E2ERoundTrip) TEST(MDArraySerializer, HeaderRoundTrip) { - for (char byteorder : std::vector{ - RAFT_NUMPY_LITTLE_ENDIAN_CHAR, RAFT_NUMPY_BIG_ENDIAN_CHAR, RAFT_NUMPY_NO_ENDIAN_CHAR}) { - for (char kind : std::vector{'f', 'i', 'u', 'c'}) { - for (unsigned int itemsize : std::vector{1, 2, 4, 8, 16}) { - for (bool fortran_order : std::vector{true, false}) { - for (const auto& shape : - std::vector>{ - {10}, {2, 2}, {10, 30, 100}, {}}) { - detail::numpy_serializer::dtype_t dtype{byteorder, kind, itemsize}; - detail::numpy_serializer::header_t header{dtype, fortran_order, shape}; - std::ostringstream oss; - detail::numpy_serializer::write_header(oss, header); - std::istringstream iss(oss.str()); - auto header2 = detail::numpy_serializer::read_header(iss); - EXPECT_EQ(header, header2); - } + char byteorder = RAFT_NUMPY_HOST_ENDIAN_CHAR; + for (char kind : std::vector{'f', 'i', 'u', 'c'}) { + for (unsigned int itemsize : std::vector{1, 2, 4, 8, 16}) { + for (bool fortran_order : std::vector{true, false}) { + for (const auto& shape : std::vector>{ + {10}, {2, 2}, {10, 30, 100}, {}}) { + detail::numpy_serializer::dtype_t dtype{byteorder, kind, itemsize}; + detail::numpy_serializer::header_t header{dtype, fortran_order, shape}; + std::ostringstream oss; + detail::numpy_serializer::write_header(oss, header); + std::istringstream iss(oss.str()); + auto header2 = detail::numpy_serializer::read_header(iss); + EXPECT_EQ(header, header2); } } } From f2f75c05d642caa78bfc132d0ef7852c6d2adb6f Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Thu, 26 Jan 2023 14:09:31 -0800 Subject: [PATCH 17/32] Rename gtest file to numpy_serializer.cu --- cpp/test/CMakeLists.txt | 2 +- ...{mdspan_serializer.cu => numpy_serializer.cu} | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) rename cpp/test/core/{mdspan_serializer.cu => numpy_serializer.cu} (96%) diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 97eebc8e0d..113a2ae58b 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -96,7 +96,7 @@ if(BUILD_TESTS) test/core/nvtx.cpp test/core/mdarray.cu test/core/mdspan_utils.cu - test/core/mdspan_serializer.cu + test/core/numpy_serializer.cu test/core/memory_type.cpp test/core/span.cpp test/core/span.cu diff --git a/cpp/test/core/mdspan_serializer.cu b/cpp/test/core/numpy_serializer.cu similarity index 96% rename from cpp/test/core/mdspan_serializer.cu rename to cpp/test/core/numpy_serializer.cu index 8981c1dfce..81b5f52d7f 100644 --- a/cpp/test/core/mdspan_serializer.cu +++ b/cpp/test/core/numpy_serializer.cu @@ -47,7 +47,7 @@ void test_mdspan_roundtrip(const raft::device_resources& handle, VectorType& vec EXPECT_EQ(vec, vec2); } -TEST(MDArraySerializer, E2ERoundTrip) +TEST(NumPySerializerMDSpan, E2ERoundTrip) { raft::device_resources handle{}; thrust::host_vector vec = std::vector{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}; @@ -90,7 +90,7 @@ TEST(MDArraySerializer, E2ERoundTrip) test_mdspan_roundtrip(handle, d_vec, 1, 2, 4); } -TEST(MDArraySerializer, HeaderRoundTrip) +TEST(NumPySerializerMDSpan, HeaderRoundTrip) { char byteorder = RAFT_NUMPY_HOST_ENDIAN_CHAR; for (char kind : std::vector{'f', 'i', 'u', 'c'}) { @@ -111,7 +111,7 @@ TEST(MDArraySerializer, HeaderRoundTrip) } } -TEST(MDArraySerializer, Tuple2String) +TEST(NumPySerializerMDSpan, Tuple2String) { { std::vector tuple{}; @@ -131,7 +131,7 @@ TEST(MDArraySerializer, Tuple2String) } } -TEST(MDArraySerializer, NumPyDType) +TEST(NumPySerializerMDSpan, NumPyDType) { const char expected_endian_char = RAFT_SYSTEM_LITTLE_ENDIAN ? '<' : '>'; { @@ -169,7 +169,7 @@ TEST(MDArraySerializer, NumPyDType) } } -TEST(MDArraySerializer, WriteHeader) +TEST(NumPySerializerMDSpan, WriteHeader) { using namespace std::string_literals; std::ostringstream oss; @@ -187,7 +187,7 @@ TEST(MDArraySerializer, WriteHeader) "\x20\x20\x20\x20\x20\x20\n"s); } -TEST(MDArraySerializer, ParsePyDict) +TEST(NumPySerializerMDSpan, ParsePyDict) { std::string dict{"{'apple': 2, 'pie': 'is', 'delicious': True, 'piece of': 'cake'}"}; auto parse = @@ -197,12 +197,12 @@ TEST(MDArraySerializer, ParsePyDict) EXPECT_EQ(parse, expected_parse); } -TEST(MDArraySerializer, ParsePyString) +TEST(NumPySerializerMDSpan, ParsePyString) { EXPECT_EQ(detail::numpy_serializer::parse_pystring("'foobar'"), "foobar"); } -TEST(MDArraySerializer, ParsePyTuple) +TEST(NumPySerializerMDSpan, ParsePyTuple) { { std::string tuple_str{"(2,)"}; From 63cd19e5346662b845df7dbdd42a0bb95efd9859 Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Thu, 26 Jan 2023 14:16:36 -0800 Subject: [PATCH 18/32] Bump up serialization version for IVF --- cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh | 7 ++++++- cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh b/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh index 9d551ded17..2de1de0f79 100644 --- a/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh +++ b/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh @@ -383,7 +383,12 @@ inline void fill_refinement_index(raft::device_resources const& handle, RAFT_CUDA_TRY(cudaPeekAtLastError()); } -static const int serialization_version = 1; +// Serialization version 2 +// No backward compatibility yet; that is, can't add additional fields without breaking +// backward compatibility. +// TODO(hcho3) Implement next-gen serializer for IVF that allows for expansion in a backward +// compatible fashion. +static const int serialization_version = 2; /** * Save the index to file. diff --git a/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh b/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh index ed85de5695..42506dd605 100644 --- a/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh @@ -1367,7 +1367,12 @@ auto build(raft::device_resources const& handle, } } -static const int serialization_version = 1; +// Serialization version 2 +// No backward compatibility yet; that is, can't add additional fields without breaking +// backward compatibility. +// TODO(hcho3) Implement next-gen serializer for IVF that allows for expansion in a backward +// compatible fashion. +static const int serialization_version = 2; /** * Save the index to file. From b4ff38e92a9f68cc8e25f482f155dc5e1cfe7ecb Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Thu, 26 Jan 2023 14:18:45 -0800 Subject: [PATCH 19/32] Fix endian check, to account for 1-byte types --- cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp index b3c26c3b53..df89811636 100644 --- a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp +++ b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp @@ -377,12 +377,11 @@ inline header_t read_header(std::istream& is) } RAFT_EXPECTS( - descr.byteorder == RAFT_NUMPY_HOST_ENDIAN_CHAR, + descr.byteorder == RAFT_NUMPY_HOST_ENDIAN_CHAR || descr.byteorder == RAFT_NUMPY_NO_ENDIAN_CHAR, "The mdspan was serialized on a %s machine but you're attempting to load it on " "a %s machine. This use case is not currently supported.", - (RAFT_NUMPY_HOST_ENDIAN_CHAR == RAFT_NUMPY_LITTLE_ENDIAN_CHAR ? "big-endian" : "little-endian"), - (RAFT_NUMPY_HOST_ENDIAN_CHAR == RAFT_NUMPY_LITTLE_ENDIAN_CHAR ? "little-endian" - : "big-endian")); + (RAFT_SYSTEM_LITTLE_ENDIAN ? "big-endian" : "little-endian"), + (RAFT_SYSTEM_LITTLE_ENDIAN ? "little-endian" : "big-endian")); return {descr, fortran_order, shape}; } From 145bdde326b7782d200707397642a19261c15c13 Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Thu, 26 Jan 2023 20:39:18 -0800 Subject: [PATCH 20/32] Test RoundTrip for multiple data types --- cpp/test/core/numpy_serializer.cu | 34 ++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/cpp/test/core/numpy_serializer.cu b/cpp/test/core/numpy_serializer.cu index 81b5f52d7f..f2479f8a40 100644 --- a/cpp/test/core/numpy_serializer.cu +++ b/cpp/test/core/numpy_serializer.cu @@ -25,7 +25,6 @@ #include #include -#include #include #include #include @@ -47,19 +46,20 @@ void test_mdspan_roundtrip(const raft::device_resources& handle, VectorType& vec EXPECT_EQ(vec, vec2); } -TEST(NumPySerializerMDSpan, E2ERoundTrip) +template +void run_roundtrip_test_mdspan_serializer() { raft::device_resources handle{}; - thrust::host_vector vec = std::vector{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}; + thrust::host_vector vec = std::vector{1, 2, 3, 4, 5, 6, 7, 8}; using mdspan_matrix2d_c_layout = - raft::host_mdspan, raft::layout_c_contiguous>; + raft::host_mdspan, raft::layout_c_contiguous>; using mdspan_matrix2d_f_layout = - raft::host_mdspan, raft::layout_f_contiguous>; + raft::host_mdspan, raft::layout_f_contiguous>; using mdspan_matrix3d_c_layout = - raft::host_mdspan, raft::layout_c_contiguous>; + raft::host_mdspan, raft::layout_c_contiguous>; using mdspan_matrix3d_f_layout = - raft::host_mdspan, raft::layout_f_contiguous>; + raft::host_mdspan, raft::layout_f_contiguous>; test_mdspan_roundtrip(handle, vec, 2, 4); test_mdspan_roundtrip(handle, vec, 2, 4); @@ -71,15 +71,15 @@ TEST(NumPySerializerMDSpan, E2ERoundTrip) test_mdspan_roundtrip(handle, vec, 1, 2, 4); using device_mdspan_matrix2d_c_layout = - raft::device_mdspan, raft::layout_c_contiguous>; + raft::device_mdspan, raft::layout_c_contiguous>; using device_mdspan_matrix2d_f_layout = - raft::device_mdspan, raft::layout_f_contiguous>; + raft::device_mdspan, raft::layout_f_contiguous>; using device_mdspan_matrix3d_c_layout = - raft::device_mdspan, raft::layout_c_contiguous>; + raft::device_mdspan, raft::layout_c_contiguous>; using device_mdspan_matrix3d_f_layout = - raft::device_mdspan, raft::layout_f_contiguous>; + raft::device_mdspan, raft::layout_f_contiguous>; - thrust::device_vector d_vec(vec); + thrust::device_vector d_vec(vec); test_mdspan_roundtrip(handle, d_vec, 2, 4); test_mdspan_roundtrip(handle, d_vec, 2, 4); test_mdspan_roundtrip(handle, d_vec, 1, 8); @@ -90,6 +90,16 @@ TEST(NumPySerializerMDSpan, E2ERoundTrip) test_mdspan_roundtrip(handle, d_vec, 1, 2, 4); } +TEST(NumPySerializerMDSpan, E2ERoundTrip) +{ + run_roundtrip_test_mdspan_serializer(); + run_roundtrip_test_mdspan_serializer(); + run_roundtrip_test_mdspan_serializer(); + run_roundtrip_test_mdspan_serializer(); + run_roundtrip_test_mdspan_serializer>(); + run_roundtrip_test_mdspan_serializer>(); +} + TEST(NumPySerializerMDSpan, HeaderRoundTrip) { char byteorder = RAFT_NUMPY_HOST_ENDIAN_CHAR; From b64146c41fc53058875a7178c2a191c05a431ecf Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Thu, 26 Jan 2023 21:00:12 -0800 Subject: [PATCH 21/32] Add gtest for scalars --- cpp/test/core/numpy_serializer.cu | 41 +++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/cpp/test/core/numpy_serializer.cu b/cpp/test/core/numpy_serializer.cu index f2479f8a40..c7482b9a94 100644 --- a/cpp/test/core/numpy_serializer.cu +++ b/cpp/test/core/numpy_serializer.cu @@ -231,4 +231,45 @@ TEST(NumPySerializerMDSpan, ParsePyTuple) } } +template +void run_roundtrip_test_scalar_serializer(T scalar) +{ + std::ostringstream oss; + detail::numpy_serializer::serialize_scalar(oss, scalar); + std::istringstream iss(oss.str()); + T tmp = detail::numpy_serializer::deserialize_scalar(iss); + EXPECT_EQ(scalar, tmp); +} + +TEST(NumPySerializerScalar, E2ERoundTrip) +{ + using namespace std::complex_literals; + run_roundtrip_test_scalar_serializer(2.0f); + run_roundtrip_test_scalar_serializer(-2.0); + run_roundtrip_test_scalar_serializer(-2); + run_roundtrip_test_scalar_serializer(0x4FFFFFF); + run_roundtrip_test_scalar_serializer>(1.0 - 2.0i); +} + +template +void check_header_scalar_serializer(T scalar) +{ + std::ostringstream oss; + detail::numpy_serializer::serialize_scalar(oss, scalar); + std::istringstream iss(oss.str()); + detail::numpy_serializer::header_t header = detail::numpy_serializer::read_header(iss); + EXPECT_TRUE(header.shape.empty()); + EXPECT_EQ(header.dtype.to_string(), detail::numpy_serializer::get_numpy_dtype().to_string()); +} + +TEST(NumPySerializerScalar, HeaderCheck) +{ + using namespace std::complex_literals; + check_header_scalar_serializer(2.0f); + check_header_scalar_serializer(-2.0); + check_header_scalar_serializer(-2); + check_header_scalar_serializer(0x4FFFFFF); + check_header_scalar_serializer>(1.0 - 2.0i); +} + } // namespace raft From 3a94fef15e52620e226a861ff0bfcc4ff24eca67 Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Thu, 26 Jan 2023 21:01:56 -0800 Subject: [PATCH 22/32] Slim down gtest NumPySerializerMDSpan.E2ERoundTrip --- cpp/test/core/numpy_serializer.cu | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/cpp/test/core/numpy_serializer.cu b/cpp/test/core/numpy_serializer.cu index c7482b9a94..7f55fdb7fa 100644 --- a/cpp/test/core/numpy_serializer.cu +++ b/cpp/test/core/numpy_serializer.cu @@ -56,38 +56,18 @@ void run_roundtrip_test_mdspan_serializer() raft::host_mdspan, raft::layout_c_contiguous>; using mdspan_matrix2d_f_layout = raft::host_mdspan, raft::layout_f_contiguous>; - using mdspan_matrix3d_c_layout = - raft::host_mdspan, raft::layout_c_contiguous>; - using mdspan_matrix3d_f_layout = - raft::host_mdspan, raft::layout_f_contiguous>; test_mdspan_roundtrip(handle, vec, 2, 4); test_mdspan_roundtrip(handle, vec, 2, 4); - test_mdspan_roundtrip(handle, vec, 1, 8); - test_mdspan_roundtrip(handle, vec, 1, 8); - test_mdspan_roundtrip(handle, vec, 2, 2, 2); - test_mdspan_roundtrip(handle, vec, 2, 2, 2); - test_mdspan_roundtrip(handle, vec, 1, 2, 4); - test_mdspan_roundtrip(handle, vec, 1, 2, 4); - using device_mdspan_matrix2d_c_layout = - raft::device_mdspan, raft::layout_c_contiguous>; - using device_mdspan_matrix2d_f_layout = - raft::device_mdspan, raft::layout_f_contiguous>; using device_mdspan_matrix3d_c_layout = raft::device_mdspan, raft::layout_c_contiguous>; using device_mdspan_matrix3d_f_layout = raft::device_mdspan, raft::layout_f_contiguous>; thrust::device_vector d_vec(vec); - test_mdspan_roundtrip(handle, d_vec, 2, 4); - test_mdspan_roundtrip(handle, d_vec, 2, 4); - test_mdspan_roundtrip(handle, d_vec, 1, 8); - test_mdspan_roundtrip(handle, d_vec, 1, 8); test_mdspan_roundtrip(handle, d_vec, 2, 2, 2); test_mdspan_roundtrip(handle, d_vec, 2, 2, 2); - test_mdspan_roundtrip(handle, d_vec, 1, 2, 4); - test_mdspan_roundtrip(handle, d_vec, 1, 2, 4); } TEST(NumPySerializerMDSpan, E2ERoundTrip) @@ -97,7 +77,6 @@ TEST(NumPySerializerMDSpan, E2ERoundTrip) run_roundtrip_test_mdspan_serializer(); run_roundtrip_test_mdspan_serializer(); run_roundtrip_test_mdspan_serializer>(); - run_roundtrip_test_mdspan_serializer>(); } TEST(NumPySerializerMDSpan, HeaderRoundTrip) From 65fdf53bb8977353d7261610eeaa78d72a368e93 Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Thu, 26 Jan 2023 22:41:08 -0800 Subject: [PATCH 23/32] Set up Pytest for mdspan serialization --- .../pylibraft/pylibraft/common/CMakeLists.txt | 4 +- .../pylibraft/pylibraft/common/cpp/mdspan.pxd | 33 +++- python/pylibraft/pylibraft/common/mdspan.pyx | 146 ++++++++++++++++++ .../pylibraft/test/test_mdspan_serializer.py | 26 ++++ 4 files changed, 204 insertions(+), 5 deletions(-) create mode 100644 python/pylibraft/pylibraft/common/mdspan.pyx create mode 100644 python/pylibraft/pylibraft/test/test_mdspan_serializer.py diff --git a/python/pylibraft/pylibraft/common/CMakeLists.txt b/python/pylibraft/pylibraft/common/CMakeLists.txt index 3b49cef429..6ce1dfe347 100644 --- a/python/pylibraft/pylibraft/common/CMakeLists.txt +++ b/python/pylibraft/pylibraft/common/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -13,7 +13,7 @@ # ============================================================================= # Set the list of Cython files to build -set(cython_sources cuda.pyx handle.pyx interruptible.pyx) +set(cython_sources cuda.pyx handle.pyx mdspan.pyx interruptible.pyx) set(linked_libraries raft::raft) # Build all of the Cython targets diff --git a/python/pylibraft/pylibraft/common/cpp/mdspan.pxd b/python/pylibraft/pylibraft/common/cpp/mdspan.pxd index d4d0dd8f35..c3e5abb47e 100644 --- a/python/pylibraft/pylibraft/common/cpp/mdspan.pxd +++ b/python/pylibraft/pylibraft/common/cpp/mdspan.pxd @@ -1,5 +1,5 @@ # -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,8 +19,9 @@ # cython: embedsignature = True # cython: language_level = 3 -import pylibraft.common.handle -from cython.operator cimport dereference as deref +from libcpp.string cimport string + +from pylibraft.common.handle cimport device_resources cdef extern from "raft/thirdparty/mdspan/include/experimental/__p0009_bits/layout_stride.hpp" namespace "std::experimental": # noqa: E501 @@ -35,6 +36,8 @@ cdef extern from "raft/core/mdspan_types.hpp" \ namespace "raft": ctypedef layout_right row_major ctypedef layout_left col_major + cdef cppclass matrix_extent[IndexType]: + pass cdef extern from "raft/core/device_mdspan.hpp" namespace "raft" nogil: @@ -73,6 +76,9 @@ cdef extern from "raft/core/host_mdspan.hpp" \ cdef cppclass host_scalar_view[ElementType, IndexType]: pass + cdef cppclass host_mdspan[ElementType, Extents, LayoutPolicy]: + pass + cdef host_matrix_view[ElementType, IndexType, LayoutPolicy] \ make_host_matrix_view[ElementType, IndexType, LayoutPolicy]( ElementType* ptr, IndexType n_rows, IndexType n_cols) except + @@ -84,3 +90,24 @@ cdef extern from "raft/core/host_mdspan.hpp" \ cdef host_scalar_view[ElementType, IndexType] \ make_host_scalar_view[ElementType, IndexType]( ElementType *ptr) except + + +cdef extern from "" namespace "std" nogil: + cdef cppclass ostringstream: + ostringstream() except + + string str() except + + + +cdef extern from "" namespace "std" nogil: + + cdef cppclass ostream: + pass + +cdef extern from "raft/core/mdspan.hpp" namespace "raft" nogil: + cdef cppclass dextents[IndentType, Rank]: + pass + +cdef extern from "raft/core/serialize.hpp" namespace "raft" nogil: + + cdef void serialize_mdspan[ElementType, Extents, LayoutPolicy]( + const device_resources& handle, ostream& os, + const host_mdspan[ElementType, Extents, LayoutPolicy]& obj) diff --git a/python/pylibraft/pylibraft/common/mdspan.pyx b/python/pylibraft/pylibraft/common/mdspan.pyx new file mode 100644 index 0000000000..ec825495f4 --- /dev/null +++ b/python/pylibraft/pylibraft/common/mdspan.pyx @@ -0,0 +1,146 @@ +# +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +import io + +import numpy as np + +from cpython.object cimport PyObject +from cython.operator cimport dereference as deref +from libc.stddef cimport size_t +from libc.stdint cimport int32_t, int64_t, uint32_t, uint64_t, uintptr_t + +from pylibraft.common.cpp.mdspan cimport ( + col_major, + host_mdspan, + make_host_matrix_view, + matrix_extent, + ostream, + ostringstream, + row_major, + serialize_mdspan, +) +from pylibraft.common.handle cimport device_resources + +from pylibraft.common import DeviceResources + + +cdef extern from "Python.h": + Py_buffer* PyMemoryView_GET_BUFFER(PyObject* mview) + + +def run_roundtrip_test_for_mdspan(X, fortran_order=False): + if not isinstance(X, np.ndarray) or len(X.shape) != 2: + raise ValueError("Please call this function with a NumPy array with" + "2 dimensions") + handle = DeviceResources() + cdef device_resources * handle_ = \ + handle.getHandle() + cdef ostringstream oss + if X.dtype == np.float32: + if fortran_order: + serialize_mdspan[float, matrix_extent[size_t], col_major]( + deref(handle_), + oss, + + make_host_matrix_view[float, size_t, col_major]( + PyMemoryView_GET_BUFFER( + X.data).buf, + X.shape[0], X.shape[1])) + else: + serialize_mdspan[float, matrix_extent[size_t], row_major]( + deref(handle_), + oss, + + make_host_matrix_view[float, size_t, row_major]( + PyMemoryView_GET_BUFFER( + X.data).buf, + X.shape[0], X.shape[1])) + elif X.dtype == np.float64: + if fortran_order: + serialize_mdspan[double, matrix_extent[size_t], col_major]( + deref(handle_), + oss, + + make_host_matrix_view[double, size_t, col_major]( + PyMemoryView_GET_BUFFER( + X.data).buf, + X.shape[0], X.shape[1])) + else: + serialize_mdspan[double, matrix_extent[size_t], row_major]( + deref(handle_), + oss, + + make_host_matrix_view[double, size_t, row_major]( + PyMemoryView_GET_BUFFER( + X.data).buf, + X.shape[0], X.shape[1])) + elif X.dtype == np.int32: + if fortran_order: + serialize_mdspan[int32_t, matrix_extent[size_t], col_major]( + deref(handle_), + oss, + + make_host_matrix_view[int32_t, size_t, col_major]( + PyMemoryView_GET_BUFFER( + X.data).buf, + X.shape[0], X.shape[1])) + else: + serialize_mdspan[int32_t, matrix_extent[size_t], row_major]( + deref(handle_), + oss, + + make_host_matrix_view[int32_t, size_t, row_major]( + PyMemoryView_GET_BUFFER( + X.data).buf, + X.shape[0], X.shape[1])) + elif X.dtype == np.uint32: + if fortran_order: + serialize_mdspan[uint32_t, matrix_extent[size_t], col_major]( + deref(handle_), + oss, + + make_host_matrix_view[uint32_t, size_t, col_major]( + PyMemoryView_GET_BUFFER( + X.data).buf, + X.shape[0], X.shape[1])) + else: + serialize_mdspan[uint32_t, matrix_extent[size_t], row_major]( + deref(handle_), + oss, + + make_host_matrix_view[uint32_t, size_t, row_major]( + PyMemoryView_GET_BUFFER( + X.data).buf, + X.shape[0], X.shape[1])) + else: + raise NotImplementedError() + f = io.BytesIO(oss.str()) + X2 = np.load(f) + assert np.all(X.shape == X2.shape) + assert np.all(X == X2) diff --git a/python/pylibraft/pylibraft/test/test_mdspan_serializer.py b/python/pylibraft/pylibraft/test/test_mdspan_serializer.py new file mode 100644 index 0000000000..412cf676d0 --- /dev/null +++ b/python/pylibraft/pylibraft/test/test_mdspan_serializer.py @@ -0,0 +1,26 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import pytest + +from pylibraft.common.mdspan import run_roundtrip_test_for_mdspan + + +# TODO(hcho3): Set up hypothesis +@pytest.mark.parametrize("dtype", ["float32", "float64", "int32", "uint32"]) +def test_mdspan_serializer(dtype): + X = np.random.random_sample((2, 3)).astype(dtype) + run_roundtrip_test_for_mdspan(X) From 30733f38df5911c9d2644da70c5871313b61e855 Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Mon, 30 Jan 2023 21:53:37 -0800 Subject: [PATCH 24/32] Rename load/save to (de)serialize --- .../raft/spatial/knn/detail/ann_utils.cuh | 4 ++-- .../raft/spatial/knn/detail/ivf_flat_build.cuh | 9 +++++---- .../raft/spatial/knn/detail/ivf_pq_build.cuh | 8 ++++---- cpp/include/raft_runtime/neighbors/ivf_pq.hpp | 12 ++++++------ cpp/src/distance/neighbors/ivfpq_build.cu | 16 ++++++++-------- cpp/test/neighbors/ann_ivf_flat.cu | 4 ++-- cpp/test/neighbors/ann_ivf_pq.cuh | 4 ++-- .../pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd | 12 ++++++------ .../pylibraft/neighbors/ivf_pq/ivf_pq.pyx | 4 ++-- 9 files changed, 37 insertions(+), 36 deletions(-) diff --git a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh index 395714a161..585853ece5 100644 --- a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh +++ b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh @@ -520,12 +520,12 @@ struct batch_load_iterator { } [[nodiscard]] auto operator*() const -> reference { - cur_batch_->load(cur_pos_); + cur_batch_->deserialize(cur_pos_); return *cur_batch_; } [[nodiscard]] auto operator->() const -> pointer { - cur_batch_->load(cur_pos_); + cur_batch_->deserialize(cur_pos_); return cur_batch_.get(); } friend auto operator==(const batch_load_iterator& x, const batch_load_iterator& y) -> bool diff --git a/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh b/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh index 2de1de0f79..499a8ce039 100644 --- a/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh +++ b/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh @@ -401,9 +401,9 @@ static const int serialization_version = 2; * */ template -void save(raft::device_resources const& handle, - const std::string& filename, - const index& index_) +void serialize(raft::device_resources const& handle, + const std::string& filename, + const index& index_) { std::ofstream of(filename, std::ios::out | std::ios::binary); if (!of) { RAFT_FAIL("Cannot open %s", filename.c_str()); } @@ -444,7 +444,8 @@ void save(raft::device_resources const& handle, * */ template -auto load(raft::device_resources const& handle, const std::string& filename) -> index +auto deserialize(raft::device_resources const& handle, const std::string& filename) + -> index { std::ifstream infile(filename, std::ios::in | std::ios::binary); diff --git a/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh b/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh index 42506dd605..87a6bd73be 100644 --- a/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh @@ -1385,9 +1385,9 @@ static const int serialization_version = 2; * */ template -void save(raft::device_resources const& handle_, - const std::string& filename, - const index& index_) +void serialize(raft::device_resources const& handle_, + const std::string& filename, + const index& index_) { std::ofstream of(filename, std::ios::out | std::ios::binary); if (!of) { RAFT_FAIL("Cannot open file %s", filename.c_str()); } @@ -1434,7 +1434,7 @@ void save(raft::device_resources const& handle_, * */ template -auto load(raft::device_resources const& handle_, const std::string& filename) -> index +auto deserialize(raft::device_resources const& handle_, const std::string& filename) -> index { std::ifstream infile(filename, std::ios::in | std::ios::binary); diff --git a/cpp/include/raft_runtime/neighbors/ivf_pq.hpp b/cpp/include/raft_runtime/neighbors/ivf_pq.hpp index cae32c9530..59d0b59128 100644 --- a/cpp/include/raft_runtime/neighbors/ivf_pq.hpp +++ b/cpp/include/raft_runtime/neighbors/ivf_pq.hpp @@ -84,9 +84,9 @@ RAFT_INST_BUILD_EXTEND(uint8_t, uint64_t) * @param[in] index IVF-PQ index * */ -void save(raft::device_resources const& handle, - const std::string& filename, - const raft::neighbors::ivf_pq::index& index); +void serialize(raft::device_resources const& handle, + const std::string& filename, + const raft::neighbors::ivf_pq::index& index); /** * Load index from file. @@ -98,8 +98,8 @@ void save(raft::device_resources const& handle, * @param[in] index IVF-PQ index * */ -void load(raft::device_resources const& handle, - const std::string& filename, - raft::neighbors::ivf_pq::index* index); +void deserialize(raft::device_resources const& handle, + const std::string& filename, + raft::neighbors::ivf_pq::index* index); } // namespace raft::runtime::neighbors::ivf_pq diff --git a/cpp/src/distance/neighbors/ivfpq_build.cu b/cpp/src/distance/neighbors/ivfpq_build.cu index 650767f918..31e304835b 100644 --- a/cpp/src/distance/neighbors/ivfpq_build.cu +++ b/cpp/src/distance/neighbors/ivfpq_build.cu @@ -64,18 +64,18 @@ RAFT_INST_BUILD_EXTEND(uint8_t, uint64_t); #undef RAFT_INST_BUILD_EXTEND -void save(raft::device_resources const& handle, - const std::string& filename, - const raft::neighbors::ivf_pq::index& index) +void serialize(raft::device_resources const& handle, + const std::string& filename, + const raft::neighbors::ivf_pq::index& index) { - raft::spatial::knn::ivf_pq::detail::save(handle, filename, index); + raft::spatial::knn::ivf_pq::detail::serialize(handle, filename, index); }; -void load(raft::device_resources const& handle, - const std::string& filename, - raft::neighbors::ivf_pq::index* index) +void deserialize(raft::device_resources const& handle, + const std::string& filename, + raft::neighbors::ivf_pq::index* index) { if (!index) { RAFT_FAIL("Invalid index pointer"); } - *index = raft::spatial::knn::ivf_pq::detail::load(handle, filename); + *index = raft::spatial::knn::ivf_pq::detail::deserialize(handle, filename); }; } // namespace raft::runtime::neighbors::ivf_pq diff --git a/cpp/test/neighbors/ann_ivf_flat.cu b/cpp/test/neighbors/ann_ivf_flat.cu index 8ccbe39889..98cc11c24e 100644 --- a/cpp/test/neighbors/ann_ivf_flat.cu +++ b/cpp/test/neighbors/ann_ivf_flat.cu @@ -188,10 +188,10 @@ class AnnIVFFlatTest : public ::testing::TestWithParam> { indices_ivfflat_dev.data(), ps.num_queries, ps.k); auto dists_out_view = raft::make_device_matrix_view( distances_ivfflat_dev.data(), ps.num_queries, ps.k); - raft::spatial::knn::ivf_flat::detail::save(handle_, "ivf_flat_index", index_2); + raft::spatial::knn::ivf_flat::detail::serialize(handle_, "ivf_flat_index", index_2); auto index_loaded = - raft::spatial::knn::ivf_flat::detail::load(handle_, "ivf_flat_index"); + raft::spatial::knn::ivf_flat::detail::deserialize(handle_, "ivf_flat_index"); ivf_flat::search(handle_, index_loaded, diff --git a/cpp/test/neighbors/ann_ivf_pq.cuh b/cpp/test/neighbors/ann_ivf_pq.cuh index 488041f527..31261871c1 100644 --- a/cpp/test/neighbors/ann_ivf_pq.cuh +++ b/cpp/test/neighbors/ann_ivf_pq.cuh @@ -215,9 +215,9 @@ class ivf_pq_test : public ::testing::TestWithParam { { { auto index = build_index(); - raft::spatial::knn::ivf_pq::detail::save(handle_, "ivf_pq_index", index); + raft::spatial::knn::ivf_pq::detail::serialize(handle_, "ivf_pq_index", index); } - auto index = raft::spatial::knn::ivf_pq::detail::load(handle_, "ivf_pq_index"); + auto index = raft::spatial::knn::ivf_pq::detail::deserialize(handle_, "ivf_pq_index"); size_t queries_size = ps.num_queries * ps.k; std::vector indices_ivf_pq(queries_size); diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd b/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd index 3a286868bf..c56c3e9d9b 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd @@ -177,10 +177,10 @@ cdef extern from "raft_runtime/neighbors/ivf_pq.hpp" \ float* distances, device_memory_resource* mr) except + - cdef void save(const device_resources& handle, - const string& filename, - const index[uint64_t]& index) except + + cdef void serialize(const device_resources& handle, + const string& filename, + const index[uint64_t]& index) except + - cdef void load(const device_resources& handle, - const string& filename, - index[uint64_t]* index) except + + cdef void deserialize(const device_resources& handle, + const string& filename, + index[uint64_t]* index) except + diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx index 42f508c969..e7b69ddbea 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx @@ -795,7 +795,7 @@ def save(filename, Index index, handle=None): cdef string c_filename = filename.encode('utf-8') - c_ivf_pq.save(deref(handle_), c_filename, deref(index.index)) + c_ivf_pq.serialize(deref(handle_), c_filename, deref(index.index)) @auto_sync_handle @@ -852,7 +852,7 @@ def load(filename, handle=None): cdef string c_filename = filename.encode('utf-8') index = Index() - c_ivf_pq.load(deref(handle_), c_filename, index.index) + c_ivf_pq.deserialize(deref(handle_), c_filename, index.index) index.trained = True return index From 4c025de30481d34355c393bfddd42f0b68fe90e8 Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Mon, 30 Jan 2023 22:16:32 -0800 Subject: [PATCH 25/32] Don't export dextents from raft --- cpp/include/raft/core/mdspan.hpp | 3 --- cpp/test/core/numpy_serializer.cu | 15 +++++++++++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/cpp/include/raft/core/mdspan.hpp b/cpp/include/raft/core/mdspan.hpp index 0b30e9a73a..1ba6843716 100644 --- a/cpp/include/raft/core/mdspan.hpp +++ b/cpp/include/raft/core/mdspan.hpp @@ -33,9 +33,6 @@ template > using mdspan = std::experimental::mdspan; -template -using dextents = std::experimental::dextents; - namespace detail { // keeping ByteAlignment as optional to allow testing diff --git a/cpp/test/core/numpy_serializer.cu b/cpp/test/core/numpy_serializer.cu index 7f55fdb7fa..856cceea80 100644 --- a/cpp/test/core/numpy_serializer.cu +++ b/cpp/test/core/numpy_serializer.cu @@ -29,6 +29,13 @@ #include #include +namespace { + +template +using dextents = std::experimental::dextents; + +} // anonymous namespace + namespace raft { template @@ -53,17 +60,17 @@ void run_roundtrip_test_mdspan_serializer() thrust::host_vector vec = std::vector{1, 2, 3, 4, 5, 6, 7, 8}; using mdspan_matrix2d_c_layout = - raft::host_mdspan, raft::layout_c_contiguous>; + raft::host_mdspan, raft::layout_c_contiguous>; using mdspan_matrix2d_f_layout = - raft::host_mdspan, raft::layout_f_contiguous>; + raft::host_mdspan, raft::layout_f_contiguous>; test_mdspan_roundtrip(handle, vec, 2, 4); test_mdspan_roundtrip(handle, vec, 2, 4); using device_mdspan_matrix3d_c_layout = - raft::device_mdspan, raft::layout_c_contiguous>; + raft::device_mdspan, raft::layout_c_contiguous>; using device_mdspan_matrix3d_f_layout = - raft::device_mdspan, raft::layout_f_contiguous>; + raft::device_mdspan, raft::layout_f_contiguous>; thrust::device_vector d_vec(vec); test_mdspan_roundtrip(handle, d_vec, 2, 2, 2); From 0196f00416e11cf1b0060ef40764d7cdb4659dbe Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Mon, 30 Jan 2023 22:17:39 -0800 Subject: [PATCH 26/32] Make serialization_version a constexpr --- cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh | 2 +- cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh b/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh index 499a8ce039..8d56578763 100644 --- a/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh +++ b/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh @@ -388,7 +388,7 @@ inline void fill_refinement_index(raft::device_resources const& handle, // backward compatibility. // TODO(hcho3) Implement next-gen serializer for IVF that allows for expansion in a backward // compatible fashion. -static const int serialization_version = 2; +constexpr int serialization_version = 2; /** * Save the index to file. diff --git a/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh b/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh index 87a6bd73be..2c6c84e928 100644 --- a/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh @@ -1372,7 +1372,7 @@ auto build(raft::device_resources const& handle, // backward compatibility. // TODO(hcho3) Implement next-gen serializer for IVF that allows for expansion in a backward // compatible fashion. -static const int serialization_version = 2; +constexpr int serialization_version = 2; /** * Save the index to file. From 1cd63a84846d1ec02b7b29369e9eb501db2d55d6 Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Mon, 30 Jan 2023 22:23:16 -0800 Subject: [PATCH 27/32] Implement serializer for managed_mdspan --- cpp/include/raft/core/serialize.hpp | 37 +++++++++++++++++++++++++++++ cpp/test/core/numpy_serializer.cu | 10 ++++++++ 2 files changed, 47 insertions(+) diff --git a/cpp/include/raft/core/serialize.hpp b/cpp/include/raft/core/serialize.hpp index de4728605a..05814e2845 100644 --- a/cpp/include/raft/core/serialize.hpp +++ b/cpp/include/raft/core/serialize.hpp @@ -63,6 +63,20 @@ inline void serialize_mdspan( detail::numpy_serializer::serialize_host_mdspan(os, tmp_mdspan); } +template +inline void serialize_mdspan( + const raft::device_resources&, + std::ostream& os, + const raft::managed_mdspan& obj) +{ + using obj_t = raft::managed_mdspan; + using inner_accessor_type = typename obj_t::accessor_type::accessor_type; + auto tmp_mdspan = + raft::host_mdspan>( + obj.data_handle(), obj.extents()); + detail::numpy_serializer::serialize_host_mdspan(os, tmp_mdspan); +} + template inline void deserialize_mdspan( const raft::device_resources&, @@ -106,6 +120,29 @@ inline void deserialize_mdspan( deserialize_mdspan(handle, is, obj); } +template +inline void deserialize_mdspan( + const raft::device_resources& handle, + std::istream& is, + raft::managed_mdspan& obj) +{ + using obj_t = raft::managed_mdspan; + using inner_accessor_type = typename obj_t::accessor_type::accessor_type; + auto tmp_mdspan = + raft::host_mdspan>( + obj.data_handle(), obj.extents()); + detail::numpy_serializer::deserialize_host_mdspan(is, tmp_mdspan); +} + +template +inline void deserialize_mdspan( + const raft::device_resources& handle, + std::istream& is, + raft::managed_mdspan&& obj) +{ + deserialize_mdspan(handle, is, obj); +} + template inline void deserialize_mdspan( const raft::device_resources& handle, diff --git a/cpp/test/core/numpy_serializer.cu b/cpp/test/core/numpy_serializer.cu index 856cceea80..fde99240ca 100644 --- a/cpp/test/core/numpy_serializer.cu +++ b/cpp/test/core/numpy_serializer.cu @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -107,6 +108,15 @@ TEST(NumPySerializerMDSpan, HeaderRoundTrip) } } +TEST(NumPySerializerMDSpan, ManagedMDSpan) +{ + raft::device_resources handle{}; + thrust::universal_vector vec = std::vector{1, 2, 3, 4, 5, 6, 7, 8}; + using managed_mdspan_matrix2d_c_layout = + raft::managed_mdspan, raft::layout_c_contiguous>; + test_mdspan_roundtrip(handle, vec, 2, 2, 2); +} + TEST(NumPySerializerMDSpan, Tuple2String) { { From a91c64a10cb02be60ef0be5b4b9b9b7588855b1a Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Tue, 31 Jan 2023 10:04:47 -0800 Subject: [PATCH 28/32] Fix build error --- cpp/include/raft/spatial/knn/detail/ann_utils.cuh | 4 ++-- cpp/test/core/numpy_serializer.cu | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh index 585853ece5..395714a161 100644 --- a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh +++ b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh @@ -520,12 +520,12 @@ struct batch_load_iterator { } [[nodiscard]] auto operator*() const -> reference { - cur_batch_->deserialize(cur_pos_); + cur_batch_->load(cur_pos_); return *cur_batch_; } [[nodiscard]] auto operator->() const -> pointer { - cur_batch_->deserialize(cur_pos_); + cur_batch_->load(cur_pos_); return cur_batch_.get(); } friend auto operator==(const batch_load_iterator& x, const batch_load_iterator& y) -> bool diff --git a/cpp/test/core/numpy_serializer.cu b/cpp/test/core/numpy_serializer.cu index fde99240ca..4131a33171 100644 --- a/cpp/test/core/numpy_serializer.cu +++ b/cpp/test/core/numpy_serializer.cu @@ -113,7 +113,7 @@ TEST(NumPySerializerMDSpan, ManagedMDSpan) raft::device_resources handle{}; thrust::universal_vector vec = std::vector{1, 2, 3, 4, 5, 6, 7, 8}; using managed_mdspan_matrix2d_c_layout = - raft::managed_mdspan, raft::layout_c_contiguous>; + raft::managed_mdspan, raft::layout_c_contiguous>; test_mdspan_roundtrip(handle, vec, 2, 2, 2); } From cf83274b4d48ae218dccf03fb32ebfde85025f9d Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Tue, 31 Jan 2023 10:20:56 -0800 Subject: [PATCH 29/32] Revert spurious copyright year updates --- cpp/bench/cluster/kmeans_balanced.cu | 2 +- cpp/include/raft/cluster/kmeans_balanced.cuh | 2 +- cpp/include/raft/cluster/kmeans_balanced_types.hpp | 2 +- cpp/include/raft/core/mdspan.hpp | 2 +- cpp/include/raft/util/cudart_utils.hpp | 2 +- cpp/include/raft/util/device_atomics.cuh | 2 +- cpp/test/cluster/kmeans_balanced.cu | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/bench/cluster/kmeans_balanced.cu b/cpp/bench/cluster/kmeans_balanced.cu index 705021ddcd..9c53e86d8c 100644 --- a/cpp/bench/cluster/kmeans_balanced.cu +++ b/cpp/bench/cluster/kmeans_balanced.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/cluster/kmeans_balanced.cuh b/cpp/include/raft/cluster/kmeans_balanced.cuh index ac4f1f9acb..405c7a8018 100644 --- a/cpp/include/raft/cluster/kmeans_balanced.cuh +++ b/cpp/include/raft/cluster/kmeans_balanced.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/cluster/kmeans_balanced_types.hpp b/cpp/include/raft/cluster/kmeans_balanced_types.hpp index 0c9588ebcb..11b77e288a 100644 --- a/cpp/include/raft/cluster/kmeans_balanced_types.hpp +++ b/cpp/include/raft/cluster/kmeans_balanced_types.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/core/mdspan.hpp b/cpp/include/raft/core/mdspan.hpp index 1ba6843716..786ce69f89 100644 --- a/cpp/include/raft/core/mdspan.hpp +++ b/cpp/include/raft/core/mdspan.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/util/cudart_utils.hpp b/cpp/include/raft/util/cudart_utils.hpp index 1c9793eb0a..5bead2c29d 100644 --- a/cpp/include/raft/util/cudart_utils.hpp +++ b/cpp/include/raft/util/cudart_utils.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/util/device_atomics.cuh b/cpp/include/raft/util/device_atomics.cuh index 14856bed8e..73c02c9e39 100644 --- a/cpp/include/raft/util/device_atomics.cuh +++ b/cpp/include/raft/util/device_atomics.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/test/cluster/kmeans_balanced.cu b/cpp/test/cluster/kmeans_balanced.cu index df559c9232..028819563e 100644 --- a/cpp/test/cluster/kmeans_balanced.cu +++ b/cpp/test/cluster/kmeans_balanced.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 3cf5a79ec23909f9c4b474fe24ceed68fb380f9b Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Tue, 31 Jan 2023 10:21:42 -0800 Subject: [PATCH 30/32] Revert spurious copyright year updates --- cpp/include/raft/cluster/kmeans_types.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/raft/cluster/kmeans_types.hpp b/cpp/include/raft/cluster/kmeans_types.hpp index f557414d64..4d956ad7a0 100644 --- a/cpp/include/raft/cluster/kmeans_types.hpp +++ b/cpp/include/raft/cluster/kmeans_types.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 3bd6f1337bd82c700433d2ed2ada809b2afadca5 Mon Sep 17 00:00:00 2001 From: Hyunsu Philip Cho Date: Tue, 31 Jan 2023 10:32:54 -0800 Subject: [PATCH 31/32] Add static_assert about size --- cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh | 4 ++++ cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh b/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh index 272ee42cf3..c417a97531 100644 --- a/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh +++ b/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh @@ -396,6 +396,10 @@ inline void fill_refinement_index(raft::device_resources const& handle, // compatible fashion. constexpr int serialization_version = 2; +static_assert(sizeof(index) == 408, + "The size of the index struct has changed since the last update; " + "paste in the new size and consider updating the save/load logic"); + /** * Save the index to file. * diff --git a/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh b/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh index 8b962391b7..66a4207b20 100644 --- a/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh @@ -1397,6 +1397,10 @@ auto build(raft::device_resources const& handle, // compatible fashion. constexpr int serialization_version = 2; +static_assert(sizeof(index) == 560, + "The size of the index struct has changed since the last update; " + "paste in the new size and consider updating the save/load logic"); + /** * Save the index to file. * From 9bb00434c0e023d85949eacd2aa4031c6913309a Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 1 Feb 2023 16:43:05 -0500 Subject: [PATCH 32/32] Setting parallel_level-8 --- conda/recipes/libraft/build_libraft_distance.sh | 4 ++-- conda/recipes/libraft/build_libraft_nn.sh | 4 ++-- conda/recipes/libraft/build_libraft_tests.sh | 4 ++-- cpp/include/raft/util/cudart_utils.hpp | 2 +- cpp/include/raft/util/device_atomics.cuh | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/conda/recipes/libraft/build_libraft_distance.sh b/conda/recipes/libraft/build_libraft_distance.sh index 35bf354e9b..dca32b5238 100644 --- a/conda/recipes/libraft/build_libraft_distance.sh +++ b/conda/recipes/libraft/build_libraft_distance.sh @@ -1,4 +1,4 @@ #!/usr/bin/env bash -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. -./build.sh libraft -v --allgpuarch --compile-dist --no-nvtx +PARALLEL_LEVEL=8 ./build.sh libraft -v --allgpuarch --compile-dist --no-nvtx diff --git a/conda/recipes/libraft/build_libraft_nn.sh b/conda/recipes/libraft/build_libraft_nn.sh index 773d6ab02e..1d82e902a2 100644 --- a/conda/recipes/libraft/build_libraft_nn.sh +++ b/conda/recipes/libraft/build_libraft_nn.sh @@ -1,4 +1,4 @@ #!/usr/bin/env bash -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. -./build.sh libraft -v --allgpuarch --compile-nn --no-nvtx +PARALLEL_LEVEL=8 ./build.sh libraft -v --allgpuarch --compile-nn --no-nvtx diff --git a/conda/recipes/libraft/build_libraft_tests.sh b/conda/recipes/libraft/build_libraft_tests.sh index 040a2f8b8c..dc2fed2e6b 100644 --- a/conda/recipes/libraft/build_libraft_tests.sh +++ b/conda/recipes/libraft/build_libraft_tests.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. -./build.sh tests bench -v --allgpuarch --no-nvtx +PARALLEL_LEVEL=8 ./build.sh tests bench -v --allgpuarch --no-nvtx cmake --install cpp/build --component testing diff --git a/cpp/include/raft/util/cudart_utils.hpp b/cpp/include/raft/util/cudart_utils.hpp index 5bead2c29d..1c9793eb0a 100644 --- a/cpp/include/raft/util/cudart_utils.hpp +++ b/cpp/include/raft/util/cudart_utils.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/util/device_atomics.cuh b/cpp/include/raft/util/device_atomics.cuh index 73c02c9e39..14856bed8e 100644 --- a/cpp/include/raft/util/device_atomics.cuh +++ b/cpp/include/raft/util/device_atomics.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License.