Skip to content

Commit

Permalink
Remove deprecated nvtext::load_merge_pairs_file (#14460)
Browse files Browse the repository at this point in the history
Remove deprecated `nvtext::load_merge_pairs_file` since it is not needed -- in favor `nvtext::load_merge_pairs` which accepts a strings column. Callers can use cuIO functions to load the file into a column.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Yunsong Wang (https://github.com/PointKernel)
  - Karthikeyan (https://github.com/karthikeyann)

URL: #14460
  • Loading branch information
davidwendt authored Nov 29, 2023
1 parent 75d5978 commit 4558344
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 99 deletions.
37 changes: 0 additions & 37 deletions cpp/include/nvtext/byte_pair_encoding.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,43 +64,6 @@ struct bpe_merge_pairs {
bpe_merge_pairs();
};

/**
* @brief Create a nvtext::bpe_merge_pairs from an input file.
*
* @deprecated Since 23.12
*
* The file should contain a pair of strings per line separated by
* a single space.
*
* Example:
* @code{.txt}
* e n
* i t
* i s
* e s
* en t
* c e
* es t
* en ce
* T h
* Th is
* t est
* s ent
* ...
* @endcode
*
* The pairs are expected to be ordered in the file by their rank
* relative to each other. A pair earlier in the file has priority over
* any pairs below it.
*
* @param filename_merges Local file path of pairs encoded in UTF-8.
* @param mr Memory resource to allocate any returned objects.
* @return A nvtext::bpe_merge_pairs object
*/
[[deprecated]] std::unique_ptr<bpe_merge_pairs> load_merge_pairs_file(
std::string const& filename_merges,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Create a nvtext::bpe_merge_pairs from a strings column
*
Expand Down
62 changes: 0 additions & 62 deletions cpp/src/text/bpe/load_merge_pairs.cu
Original file line number Diff line number Diff line change
Expand Up @@ -37,53 +37,6 @@ namespace nvtext {
namespace detail {
namespace {

/**
* @brief Loads a text file of merge-pairs into a strings column.
*
* The line position in the file indicates the pair's rank.
*
* @code{.pseudo}
* Format of the file:
* #version ..
* a1 a2
* b1 b2
* c1 c2
* ...
* @endcode
*
* @param filename_merges Path to text file containing merge-pairs
* @return object containing table elements for the BPE function
*/
std::unique_ptr<cudf::column> load_file_to_column(std::string const& filename_merges,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
std::ifstream merges_file(filename_merges);
CUDF_EXPECTS(merges_file.good(), "Could not open " + filename_merges);

std::vector<char> chars{};
std::vector<cudf::size_type> offsets(1, 0);

std::string line;
std::getline(merges_file, line);
std::string version = "#version";
if (line.substr(0, version.size()).compare(version) == 0) { std::getline(merges_file, line); }

// This is a text file delimited only by CR/LF.
// TODO: Look into using the CSV reader to load the strings column instead.
while (!line.empty()) {
chars.insert(chars.end(), std::cbegin(line), std::cend(line));
offsets.push_back(offsets.back() + line.length());
std::getline(merges_file, line);
}

CUDF_EXPECTS(!chars.empty(), "No data found in " + filename_merges);

auto d_chars = cudf::detail::make_device_uvector_async(chars, stream, mr);
auto d_offsets = cudf::detail::make_device_uvector_async(offsets, stream, mr);
return cudf::make_strings_column(d_chars, d_offsets, {}, 0);
}

std::unique_ptr<detail::merge_pairs_map_type> initialize_merge_pairs_map(
cudf::column_device_view const& input, rmm::cuda_stream_view stream)
{
Expand Down Expand Up @@ -146,14 +99,6 @@ std::unique_ptr<bpe_merge_pairs::bpe_merge_pairs_impl> create_bpe_merge_pairs_im

} // namespace

std::unique_ptr<bpe_merge_pairs> load_merge_pairs_file(std::string const& filename_merges,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto input_column = load_file_to_column(filename_merges, stream, mr);
return std::make_unique<bpe_merge_pairs>(std::move(input_column), stream, mr);
}

std::unique_ptr<bpe_merge_pairs> load_merge_pairs(cudf::strings_column_view const& merge_pairs,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand All @@ -165,13 +110,6 @@ std::unique_ptr<bpe_merge_pairs> load_merge_pairs(cudf::strings_column_view cons

} // namespace detail

std::unique_ptr<bpe_merge_pairs> load_merge_pairs_file(std::string const& filename_merges,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::load_merge_pairs_file(filename_merges, cudf::get_default_stream(), mr);
}

std::unique_ptr<bpe_merge_pairs> load_merge_pairs(cudf::strings_column_view const& merge_pairs,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand Down

0 comments on commit 4558344

Please sign in to comment.