FST benchmark (#11243)

Depends on #11242 Feature/finite state transducer Benchmark for Finite State Transducer parse and identify JSON symbols - [x] FST with output, output index, output str - [x] FST without output index - [x] FST without, output - [x] FST without output str Look into elstehle#1 for files modified only in this PR (i.e excluding parent depending PR) Authors: - Karthikeyan (https://github.com/karthikeyann) - Elias Stehle (https://github.com/elstehle) Approvers: - Yunsong Wang (https://github.com/PointKernel) - Elias Stehle (https://github.com/elstehle) URL: #11243
rapidsai · Jul 26, 2022 · 586da32 · 586da32
1 parent 2d214ea
commit 586da32
Show file tree

Hide file tree

Showing 4 changed files with 312 additions and 50 deletions.
diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
@@ -288,6 +288,7 @@ ConfigureBench(
 # ##################################################################################################
 # * json benchmark -------------------------------------------------------------------
 ConfigureBench(JSON_BENCH string/json.cu)
+ConfigureNVBench(FST_NVBENCH io/fst.cu)
 
 # ##################################################################################################
 # * io benchmark ---------------------------------------------------------------------

diff --git a/cpp/benchmarks/io/fst.cu b/cpp/benchmarks/io/fst.cu
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/rmm_pool_raii.hpp>
+#include <nvbench/nvbench.cuh>
+
+#include <io/fst/lookup_tables.cuh>
+#include <io/utilities/hostdevice_vector.hpp>  //TODO find better replacement
+
+#include <tests/io/fst/common.hpp>
+
+#include <cudf/scalar/scalar_factories.hpp>
+#include <cudf/strings/repeat_strings.hpp>
+#include <cudf/types.hpp>
+
+#include <rmm/cuda_stream.hpp>
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_buffer.hpp>
+#include <rmm/device_uvector.hpp>
+
+#include <thrust/iterator/discard_iterator.h>
+
+#include <cstdlib>
+
+namespace cudf {
+namespace {
+auto make_test_json_data(nvbench::state& state)
+{
+  auto const string_size{size_type(state.get_int64("string_size"))};
+
+  // Test input
+  std::string input = R"(  {)"
+                      R"("category": "reference",)"
+                      R"("index:" [4,12,42],)"
+                      R"("author": "Nigel Rees",)"
+                      R"("title": "Sayings of the Century",)"
+                      R"("price": 8.95)"
+                      R"(}  )"
+                      R"({)"
+                      R"("category": "reference",)"
+                      R"("index:" [4,{},null,{"a":[]}],)"
+                      R"("author": "Nigel Rees",)"
+                      R"("title": "Sayings of the Century",)"
+                      R"("price": 8.95)"
+                      R"(}  {} [] [ ])";
+
+  auto d_input_scalar          = cudf::make_string_scalar(input);
+  auto& d_string_scalar        = static_cast<cudf::string_scalar&>(*d_input_scalar);
+  const size_type repeat_times = string_size / input.size();
+  return cudf::strings::repeat_string(d_string_scalar, repeat_times);
+}
+
+using namespace cudf::test::io::json;
+// Type used to represent the atomic symbol type used within the finite-state machine
+using SymbolT = char;
+// Type sufficiently large to index symbols within the input and output (may be unsigned)
+using SymbolOffsetT = uint32_t;
+// Helper class to set up transition table, symbol group lookup table, and translation table
+using DfaFstT = cudf::io::fst::detail::Dfa<char, NUM_SYMBOL_GROUPS, TT_NUM_STATES>;
+constexpr std::size_t single_item = 1;
+
+}  // namespace
+
+void BM_FST_JSON(nvbench::state& state)
+{
+  // TODO: to be replaced by nvbench fixture once it's ready
+  cudf::rmm_pool_raii rmm_pool;
+
+  auto const string_size{size_type(state.get_int64("string_size"))};
+  // Prepare cuda stream for data transfers & kernels
+  rmm::cuda_stream stream{};
+  rmm::cuda_stream_view stream_view(stream);
+
+  auto input_string = make_test_json_data(state);
+  auto& d_input     = static_cast<cudf::scalar_type_t<std::string>&>(*input_string);
+
+  state.add_element_count(d_input.size());
+
+  // Prepare input & output buffers
+  hostdevice_vector<SymbolT> output_gpu(d_input.size(), stream_view);
+  hostdevice_vector<SymbolOffsetT> output_gpu_size(single_item, stream_view);
+  hostdevice_vector<SymbolOffsetT> out_indexes_gpu(d_input.size(), stream_view);
+
+  // Run algorithm
+  DfaFstT parser{pda_sgs, pda_state_tt, pda_out_tt, stream.value()};
+
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    // Allocate device-side temporary storage & run algorithm
+    parser.Transduce(d_input.data(),
+                     static_cast<SymbolOffsetT>(d_input.size()),
+                     output_gpu.device_ptr(),
+                     out_indexes_gpu.device_ptr(),
+                     output_gpu_size.device_ptr(),
+                     start_state,
+                     stream.value());
+  });
+}
+
+void BM_FST_JSON_no_outidx(nvbench::state& state)
+{
+  // TODO: to be replaced by nvbench fixture once it's ready
+  cudf::rmm_pool_raii rmm_pool;
+
+  auto const string_size{size_type(state.get_int64("string_size"))};
+  // Prepare cuda stream for data transfers & kernels
+  rmm::cuda_stream stream{};
+  rmm::cuda_stream_view stream_view(stream);
+
+  auto input_string = make_test_json_data(state);
+  auto& d_input     = static_cast<cudf::scalar_type_t<std::string>&>(*input_string);
+
+  state.add_element_count(d_input.size());
+
+  // Prepare input & output buffers
+  hostdevice_vector<SymbolT> output_gpu(d_input.size(), stream_view);
+  hostdevice_vector<SymbolOffsetT> output_gpu_size(single_item, stream_view);
+  hostdevice_vector<SymbolOffsetT> out_indexes_gpu(d_input.size(), stream_view);
+
+  // Run algorithm
+  DfaFstT parser{pda_sgs, pda_state_tt, pda_out_tt, stream.value()};
+
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    // Allocate device-side temporary storage & run algorithm
+    parser.Transduce(d_input.data(),
+                     static_cast<SymbolOffsetT>(d_input.size()),
+                     output_gpu.device_ptr(),
+                     thrust::make_discard_iterator(),
+                     output_gpu_size.device_ptr(),
+                     start_state,
+                     stream.value());
+  });
+}
+
+void BM_FST_JSON_no_out(nvbench::state& state)
+{
+  // TODO: to be replaced by nvbench fixture once it's ready
+  cudf::rmm_pool_raii rmm_pool;
+
+  auto const string_size{size_type(state.get_int64("string_size"))};
+  // Prepare cuda stream for data transfers & kernels
+  rmm::cuda_stream stream{};
+  rmm::cuda_stream_view stream_view(stream);
+
+  auto input_string = make_test_json_data(state);
+  auto& d_input     = static_cast<cudf::scalar_type_t<std::string>&>(*input_string);
+
+  state.add_element_count(d_input.size());
+
+  // Prepare input & output buffers
+  hostdevice_vector<SymbolOffsetT> output_gpu_size(single_item, stream_view);
+
+  // Run algorithm
+  DfaFstT parser{pda_sgs, pda_state_tt, pda_out_tt, stream.value()};
+
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    // Allocate device-side temporary storage & run algorithm
+    parser.Transduce(d_input.data(),
+                     static_cast<SymbolOffsetT>(d_input.size()),
+                     thrust::make_discard_iterator(),
+                     thrust::make_discard_iterator(),
+                     output_gpu_size.device_ptr(),
+                     start_state,
+                     stream.value());
+  });
+}
+
+void BM_FST_JSON_no_str(nvbench::state& state)
+{
+  // TODO: to be replaced by nvbench fixture once it's ready
+  cudf::rmm_pool_raii rmm_pool;
+
+  auto const string_size{size_type(state.get_int64("string_size"))};
+  // Prepare cuda stream for data transfers & kernels
+  rmm::cuda_stream stream{};
+  rmm::cuda_stream_view stream_view(stream);
+
+  auto input_string = make_test_json_data(state);
+  auto& d_input     = static_cast<cudf::scalar_type_t<std::string>&>(*input_string);
+
+  state.add_element_count(d_input.size());
+
+  // Prepare input & output buffers
+  hostdevice_vector<SymbolOffsetT> output_gpu_size(single_item, stream_view);
+  hostdevice_vector<SymbolOffsetT> out_indexes_gpu(d_input.size(), stream_view);
+
+  // Run algorithm
+  DfaFstT parser{pda_sgs, pda_state_tt, pda_out_tt, stream.value()};
+
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    // Allocate device-side temporary storage & run algorithm
+    parser.Transduce(d_input.data(),
+                     static_cast<SymbolOffsetT>(d_input.size()),
+                     thrust::make_discard_iterator(),
+                     out_indexes_gpu.device_ptr(),
+                     output_gpu_size.device_ptr(),
+                     start_state,
+                     stream.value());
+  });
+}
+
+NVBENCH_BENCH(BM_FST_JSON)
+  .set_name("FST_JSON")
+  .add_int64_power_of_two_axis("string_size", nvbench::range(20, 31, 1));
+
+NVBENCH_BENCH(BM_FST_JSON_no_outidx)
+  .set_name("FST_JSON_no_outidx")
+  .add_int64_power_of_two_axis("string_size", nvbench::range(20, 31, 1));
+
+NVBENCH_BENCH(BM_FST_JSON_no_out)
+  .set_name("FST_JSON_no_out")
+  .add_int64_power_of_two_axis("string_size", nvbench::range(20, 31, 1));
+
+NVBENCH_BENCH(BM_FST_JSON_no_str)
+  .set_name("FST_JSON_no_str")
+  .add_int64_power_of_two_axis("string_size", nvbench::range(20, 31, 1));
+
+}  // namespace cudf
diff --git a/cpp/tests/io/fst/common.hpp b/cpp/tests/io/fst/common.hpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+namespace cudf::test::io::json {
+//------------------------------------------------------------------------------
+// TEST FST SPECIFICATIONS
+//------------------------------------------------------------------------------
+// FST to check for brackets and braces outside of pairs of quotes
+enum DFA_STATES : char {
+  // The state being active while being outside of a string. When encountering an opening bracket or
+  // curly brace, we push it onto the stack. When encountering a closing bracket or brace, we pop it
+  // from the stack.
+  TT_OOS = 0U,
+  // The state being active while being within a string (e.g., field name or a string value). We do
+  // not push or pop from the stack while being in this state.
+  TT_STR,
+  // The state being active after encountering an escape symbol (e.g., '\') while being in the
+  // TT_STR state.
+  TT_ESC,
+  // Total number of states
+  TT_NUM_STATES
+};
+
+// Definition of the symbol groups
+enum PDA_SG_ID {
+  OBC = 0U,          ///< Opening brace SG: {
+  OBT,               ///< Opening bracket SG: [
+  CBC,               ///< Closing brace SG: }
+  CBT,               ///< Closing bracket SG: ]
+  QTE,               ///< Quote character SG: "
+  ESC,               ///< Escape character SG: '\'
+  OTR,               ///< SG implicitly matching all other characters
+  NUM_SYMBOL_GROUPS  ///< Total number of symbol groups
+};
+
+// Transition table
+const std::vector<std::vector<DFA_STATES>> pda_state_tt = {
+  /* IN_STATE         {       [       }       ]       "       \    OTHER */
+  /* TT_OOS    */ {TT_OOS, TT_OOS, TT_OOS, TT_OOS, TT_STR, TT_OOS, TT_OOS},
+  /* TT_STR    */ {TT_STR, TT_STR, TT_STR, TT_STR, TT_OOS, TT_ESC, TT_STR},
+  /* TT_ESC    */ {TT_STR, TT_STR, TT_STR, TT_STR, TT_STR, TT_STR, TT_STR}};
+
+// Translation table (i.e., for each transition, what are the symbols that we output)
+const std::vector<std::vector<std::vector<char>>> pda_out_tt = {
+  /* IN_STATE        {      [      }      ]     "  \   OTHER */
+  /* TT_OOS    */ {{'{'}, {'['}, {'}'}, {']'}, {'x'}, {'x'}, {'x'}},
+  /* TT_STR    */ {{'x'}, {'x'}, {'x'}, {'x'}, {'x'}, {'x'}, {'x'}},
+  /* TT_ESC    */ {{'x'}, {'x'}, {'x'}, {'x'}, {'x'}, {'x'}, {'x'}}};
+
+// The i-th string representing all the characters of a symbol group
+const std::vector<std::string> pda_sgs = {"{", "[", "}", "]", "\"", "\\"};
+
+// The DFA's starting state
+constexpr DFA_STATES start_state = TT_OOS;
+
+}  // namespace cudf::test::io::json
diff --git a/cpp/tests/io/fst/fst_test.cu b/cpp/tests/io/fst/fst_test.cu
@@ -16,6 +16,7 @@
 
 #include <io/fst/lookup_tables.cuh>
 #include <io/utilities/hostdevice_vector.hpp>
+#include <tests/io/fst/common.hpp>
 
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/cudf_gtest.hpp>
@@ -116,56 +117,7 @@ static std::pair<OutputItT, IndexOutputItT> fst_baseline(InputItT begin,
   return {out_tape, out_index_tape};
 }
 
-//------------------------------------------------------------------------------
-// TEST FST SPECIFICATIONS
-//------------------------------------------------------------------------------
-enum DFA_STATES : char {
-  // The state being active while being outside of a string. When encountering an opening bracket or
-  // curly brace, we push it onto the stack. When encountering a closing bracket or brace, we pop it
-  // from the stack.
-  TT_OOS = 0U,
-  // The state being active while being within a string (e.g., field name or a string value). We do
-  // not push or pop from the stack while being in this state.
-  TT_STR,
-  // The state being active after encountering an escape symbol (e.g., '\') while being in the
-  // TT_STR state.
-  TT_ESC,
-  // Total number of states
-  TT_NUM_STATES
-};
-
-// Definition of the symbol groups
-enum PDA_SG_ID {
-  OBC = 0U,          ///< Opening brace SG: {
-  OBT,               ///< Opening bracket SG: [
-  CBC,               ///< Closing brace SG: }
-  CBT,               ///< Closing bracket SG: ]
-  QTE,               ///< Quote character SG: "
-  ESC,               ///< Escape character SG: '\'
-  OTR,               ///< SG implicitly matching all other characters
-  NUM_SYMBOL_GROUPS  ///< Total number of symbol groups
-};
-
-// Transition table
-const std::vector<std::vector<char>> pda_state_tt = {
-  /* IN_STATE         {       [       }       ]       "       \    OTHER */
-  /* TT_OOS    */ {TT_OOS, TT_OOS, TT_OOS, TT_OOS, TT_STR, TT_OOS, TT_OOS},
-  /* TT_STR    */ {TT_STR, TT_STR, TT_STR, TT_STR, TT_OOS, TT_ESC, TT_STR},
-  /* TT_ESC    */ {TT_STR, TT_STR, TT_STR, TT_STR, TT_STR, TT_STR, TT_STR}};
-
-// Translation table (i.e., for each transition, what are the symbols that we output)
-const std::vector<std::vector<std::vector<char>>> pda_out_tt = {
-  /* IN_STATE        {      [      }      ]     "  \   OTHER */
-  /* TT_OOS    */ {{'{'}, {'['}, {'}'}, {']'}, {'x'}, {'x'}, {'x'}},
-  /* TT_STR    */ {{'x'}, {'x'}, {'x'}, {'x'}, {'x'}, {'x'}, {'x'}},
-  /* TT_ESC    */ {{'x'}, {'x'}, {'x'}, {'x'}, {'x'}, {'x'}, {'x'}}};
-
-// The i-th string representing all the characters of a symbol group
-const std::vector<std::string> pda_sgs = {"{", "[", "}", "]", "\"", "\\"};
-
-// The DFA's starting state
-constexpr int32_t start_state = TT_OOS;
-
+using namespace cudf::test::io::json;
 }  // namespace
 
 // Base test fixture for tests