From 45583443c3f8ed8e93f382724ce1d154b55436e2 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Wed, 29 Nov 2023 13:39:39 -0500 Subject: [PATCH] Remove deprecated nvtext::load_merge_pairs_file (#14460) Remove deprecated `nvtext::load_merge_pairs_file` since it is not needed -- in favor `nvtext::load_merge_pairs` which accepts a strings column. Callers can use cuIO functions to load the file into a column. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Yunsong Wang (https://github.com/PointKernel) - Karthikeyan (https://github.com/karthikeyann) URL: https://github.com/rapidsai/cudf/pull/14460 --- cpp/include/nvtext/byte_pair_encoding.hpp | 37 -------------- cpp/src/text/bpe/load_merge_pairs.cu | 62 ----------------------- 2 files changed, 99 deletions(-) diff --git a/cpp/include/nvtext/byte_pair_encoding.hpp b/cpp/include/nvtext/byte_pair_encoding.hpp index 632a3cc279f..f9790a1a701 100644 --- a/cpp/include/nvtext/byte_pair_encoding.hpp +++ b/cpp/include/nvtext/byte_pair_encoding.hpp @@ -64,43 +64,6 @@ struct bpe_merge_pairs { bpe_merge_pairs(); }; -/** - * @brief Create a nvtext::bpe_merge_pairs from an input file. - * - * @deprecated Since 23.12 - * - * The file should contain a pair of strings per line separated by - * a single space. - * - * Example: - * @code{.txt} - * e n - * i t - * i s - * e s - * en t - * c e - * es t - * en ce - * T h - * Th is - * t est - * s ent - * ... - * @endcode - * - * The pairs are expected to be ordered in the file by their rank - * relative to each other. A pair earlier in the file has priority over - * any pairs below it. - * - * @param filename_merges Local file path of pairs encoded in UTF-8. - * @param mr Memory resource to allocate any returned objects. - * @return A nvtext::bpe_merge_pairs object - */ -[[deprecated]] std::unique_ptr load_merge_pairs_file( - std::string const& filename_merges, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - /** * @brief Create a nvtext::bpe_merge_pairs from a strings column * diff --git a/cpp/src/text/bpe/load_merge_pairs.cu b/cpp/src/text/bpe/load_merge_pairs.cu index 80073df5804..6d223a7ddb7 100644 --- a/cpp/src/text/bpe/load_merge_pairs.cu +++ b/cpp/src/text/bpe/load_merge_pairs.cu @@ -37,53 +37,6 @@ namespace nvtext { namespace detail { namespace { -/** - * @brief Loads a text file of merge-pairs into a strings column. - * - * The line position in the file indicates the pair's rank. - * - * @code{.pseudo} - * Format of the file: - * #version .. - * a1 a2 - * b1 b2 - * c1 c2 - * ... - * @endcode - * - * @param filename_merges Path to text file containing merge-pairs - * @return object containing table elements for the BPE function - */ -std::unique_ptr load_file_to_column(std::string const& filename_merges, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - std::ifstream merges_file(filename_merges); - CUDF_EXPECTS(merges_file.good(), "Could not open " + filename_merges); - - std::vector chars{}; - std::vector offsets(1, 0); - - std::string line; - std::getline(merges_file, line); - std::string version = "#version"; - if (line.substr(0, version.size()).compare(version) == 0) { std::getline(merges_file, line); } - - // This is a text file delimited only by CR/LF. - // TODO: Look into using the CSV reader to load the strings column instead. - while (!line.empty()) { - chars.insert(chars.end(), std::cbegin(line), std::cend(line)); - offsets.push_back(offsets.back() + line.length()); - std::getline(merges_file, line); - } - - CUDF_EXPECTS(!chars.empty(), "No data found in " + filename_merges); - - auto d_chars = cudf::detail::make_device_uvector_async(chars, stream, mr); - auto d_offsets = cudf::detail::make_device_uvector_async(offsets, stream, mr); - return cudf::make_strings_column(d_chars, d_offsets, {}, 0); -} - std::unique_ptr initialize_merge_pairs_map( cudf::column_device_view const& input, rmm::cuda_stream_view stream) { @@ -146,14 +99,6 @@ std::unique_ptr create_bpe_merge_pairs_im } // namespace -std::unique_ptr load_merge_pairs_file(std::string const& filename_merges, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - auto input_column = load_file_to_column(filename_merges, stream, mr); - return std::make_unique(std::move(input_column), stream, mr); -} - std::unique_ptr load_merge_pairs(cudf::strings_column_view const& merge_pairs, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -165,13 +110,6 @@ std::unique_ptr load_merge_pairs(cudf::strings_column_view cons } // namespace detail -std::unique_ptr load_merge_pairs_file(std::string const& filename_merges, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::load_merge_pairs_file(filename_merges, cudf::get_default_stream(), mr); -} - std::unique_ptr load_merge_pairs(cudf::strings_column_view const& merge_pairs, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr)