From 1bb3aac2a918e95483fd52b1523855014bc03a34 Mon Sep 17 00:00:00 2001 From: Alessandro Bellina Date: Thu, 12 May 2022 15:43:07 -0500 Subject: [PATCH 1/2] Adds the JNI call for Cuda.deviceSynchronize (#10839) Running tests locally, but putting this up as WIP for now. Discussing with @jlowe a solution to https://github.com/NVIDIA/spark-rapids/issues/4818 could involve `cudaDeviceSynchronize.` I noticed that's not in our JNI exposed calls, so I am adding it here. Authors: - Alessandro Bellina (https://github.com/abellina) Approvers: - Jason Lowe (https://github.com/jlowe) URL: https://github.com/rapidsai/cudf/pull/10839 --- java/src/main/java/ai/rapids/cudf/Cuda.java | 6 ++++++ java/src/main/native/src/CudaJni.cpp | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/java/src/main/java/ai/rapids/cudf/Cuda.java b/java/src/main/java/ai/rapids/cudf/Cuda.java index 21843527fc2..56a754279fc 100755 --- a/java/src/main/java/ai/rapids/cudf/Cuda.java +++ b/java/src/main/java/ai/rapids/cudf/Cuda.java @@ -596,4 +596,10 @@ public static void multiBufferCopyAsync(long [] destAddrs, * no effect. */ public static native void profilerStop(); + + /** + * Synchronizes the whole device using cudaDeviceSynchronize. + * @note this is very expensive and should almost never be used + */ + public static native void deviceSynchronize(); } diff --git a/java/src/main/native/src/CudaJni.cpp b/java/src/main/native/src/CudaJni.cpp index 926521c55f9..ce1ad1b1671 100644 --- a/java/src/main/native/src/CudaJni.cpp +++ b/java/src/main/native/src/CudaJni.cpp @@ -390,4 +390,12 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_profilerStop(JNIEnv *env, jclass CATCH_STD(env, ); } +JNIEXPORT void JNICALL Java_ai_rapids_cudf_Cuda_deviceSynchronize(JNIEnv *env, jclass clazz) { + try { + cudf::jni::auto_set_device(env); + CUDF_CUDA_TRY(cudaDeviceSynchronize()); + } + CATCH_STD(env, ); +} + } // extern "C" From fe9aaebeedce9a870661db2fe547addc8f3388fe Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Thu, 12 May 2022 17:15:32 -0400 Subject: [PATCH 2/2] Cleanup regex compile optimize functions (#10825) Cleans up the internal `regcomp::optimize1()` function by replacing for-loops with STL functions. Hopefully this will make this part of the code a bit easier to understand and maintain. No external function or behavior has changed. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Mark Harris (https://github.com/harrism) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/10825 --- cpp/src/strings/regex/regcomp.cpp | 124 +++++++++++++++--------------- cpp/src/strings/regex/regcomp.h | 5 +- 2 files changed, 64 insertions(+), 65 deletions(-) diff --git a/cpp/src/strings/regex/regcomp.cpp b/cpp/src/strings/regex/regcomp.cpp index f99acc3448a..dd4b4116994 100644 --- a/cpp/src/strings/regex/regcomp.cpp +++ b/cpp/src/strings/regex/regcomp.cpp @@ -21,7 +21,9 @@ #include #include -#include +#include +#include +#include namespace cudf { namespace strings { @@ -862,8 +864,7 @@ class regex_compiler { ; // "unmatched left paren"; /* points to first and only operand */ m_prog.set_start_inst(andstack[andstack.size() - 1].id_first); - m_prog.optimize1(); - m_prog.optimize2(); + m_prog.finalize(); m_prog.check_for_errors(); m_prog.set_groups_count(cursubid); } @@ -880,81 +881,78 @@ reprog reprog::create_from(std::string_view pattern, regex_flags const flags) return rtn; } -// -void reprog::optimize1() +void reprog::finalize() { - // Treat non-capturing LBRAs/RBRAs as NOOP - for (int i = 0; i < static_cast(_insts.size()); i++) { - if (_insts[i].type == LBRA || _insts[i].type == RBRA) { - if (_insts[i].u1.subid < 1) { _insts[i].type = NOP; } + collapse_nops(); + build_start_ids(); +} + +void reprog::collapse_nops() +{ + // treat non-capturing LBRAs/RBRAs as NOP + std::transform(_insts.begin(), _insts.end(), _insts.begin(), [](auto inst) { + if ((inst.type == LBRA || inst.type == RBRA) && (inst.u1.subid < 1)) { inst.type = NOP; } + return inst; + }); + + // functor for finding the next valid op + auto find_next_op = [insts = _insts](int id) { + while (insts[id].type == NOP) { + id = insts[id].u2.next_id; } - } + return id; + }; - // get rid of NOP chains - for (int i = 0; i < insts_count(); i++) { - if (_insts[i].type != NOP) { - { - int target_id = _insts[i].u2.next_id; - while (_insts[target_id].type == NOP) - target_id = _insts[target_id].u2.next_id; - _insts[i].u2.next_id = target_id; - } - if (_insts[i].type == OR) { - int target_id = _insts[i].u1.right_id; - while (_insts[target_id].type == NOP) - target_id = _insts[target_id].u2.next_id; - _insts[i].u1.right_id = target_id; - } + // create new routes around NOP chains + std::transform(_insts.begin(), _insts.end(), _insts.begin(), [find_next_op](auto inst) { + if (inst.type != NOP) { + inst.u2.next_id = find_next_op(inst.u2.next_id); + if (inst.type == OR) { inst.u1.right_id = find_next_op(inst.u1.right_id); } } - } - // skip NOPs from the beginning - { - int target_id = _startinst_id; - while (_insts[target_id].type == NOP) - target_id = _insts[target_id].u2.next_id; - _startinst_id = target_id; - } - // actually remove the no-ops + return inst; + }); + + // find starting op + _startinst_id = find_next_op(_startinst_id); + + // build a map of op ids + // these are used to fix up the ids after the NOPs are removed std::vector id_map(insts_count()); - int j = 0; // compact the ops (non no-ops) - for (int i = 0; i < insts_count(); i++) { - id_map[i] = j; - if (_insts[i].type != NOP) { - _insts[j] = _insts[i]; - j++; - } - } - _insts.resize(j); - // fix up the ORs - for (int i = 0; i < insts_count(); i++) { - { - int target_id = _insts[i].u2.next_id; - _insts[i].u2.next_id = id_map[target_id]; - } - if (_insts[i].type == OR) { - int target_id = _insts[i].u1.right_id; - _insts[i].u1.right_id = id_map[target_id]; - } - } - // set the new start id + std::transform_exclusive_scan( + _insts.begin(), _insts.end(), id_map.begin(), 0, std::plus{}, [](auto inst) { + return static_cast(inst.type != NOP); + }); + + // remove the NOP instructions + auto end = std::remove_if(_insts.begin(), _insts.end(), [](auto i) { return i.type == NOP; }); + _insts.resize(std::distance(_insts.begin(), end)); + + // fix up the ids on the remaining instructions using the id_map + std::transform(_insts.begin(), _insts.end(), _insts.begin(), [id_map](auto inst) { + inst.u2.next_id = id_map[inst.u2.next_id]; + if (inst.type == OR) { inst.u1.right_id = id_map[inst.u1.right_id]; } + return inst; + }); + + // fix up the start instruction id too _startinst_id = id_map[_startinst_id]; } // expand leading ORs to multiple startinst_ids -void reprog::optimize2() +void reprog::build_start_ids() { _startinst_ids.clear(); - std::vector stack; - stack.push_back(_startinst_id); - while (!stack.empty()) { - int id = stack.back(); - stack.pop_back(); + std::stack ids; + ids.push(_startinst_id); + while (!ids.empty()) { + int id = ids.top(); + ids.pop(); const reinst& inst = _insts[id]; if (inst.type == OR) { if (inst.u2.left_id != id) // prevents infinite while-loop here - stack.push_back(inst.u2.left_id); + ids.push(inst.u2.left_id); if (inst.u1.right_id != id) // prevents infinite while-loop here - stack.push_back(inst.u1.right_id); + ids.push(inst.u1.right_id); } else { _startinst_ids.push_back(id); } diff --git a/cpp/src/strings/regex/regcomp.h b/cpp/src/strings/regex/regcomp.h index 162a2090268..ed87660f106 100644 --- a/cpp/src/strings/regex/regcomp.h +++ b/cpp/src/strings/regex/regcomp.h @@ -124,8 +124,7 @@ class reprog { void set_start_inst(int32_t id); [[nodiscard]] int32_t get_start_inst() const; - void optimize1(); - void optimize2(); + void finalize(); void check_for_errors(); #ifndef NDEBUG void print(regex_flags const flags); @@ -139,6 +138,8 @@ class reprog { int32_t _num_capturing_groups{}; reprog() = default; + void collapse_nops(); + void build_start_ids(); void check_for_errors(int32_t id, int32_t next_id); };