Adding lap.hpp back (with deprecation) (#529)

Authors: - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) URL: #529
rapidsai · Feb 25, 2022 · 97c3b2b · 97c3b2b
1 parent 08003c2
commit 97c3b2b
Show file tree

Hide file tree

Showing 2 changed files with 303 additions and 0 deletions.
diff --git a/cpp/include/raft/lap/lap.cuh b/cpp/include/raft/lap/lap.cuh
@@ -22,6 +22,10 @@
  *          for the Linear Assignment Problem." Parallel Computing 57 (2016): 52-72.
  *
  */
+
+#ifndef __LAP_H
+#define __LAP_H
+
 #pragma once
 
 #include <raft/handle.hpp>
@@ -284,3 +288,5 @@ class LinearAssignmentProblem {
 
 }  // namespace lap
 }  // namespace raft
+
+#endif
diff --git a/cpp/include/raft/lap/lap.hpp b/cpp/include/raft/lap/lap.hpp
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright 2020 KETAN DATE & RAKESH NAGI
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *      CUDA Implementation of O(n^3) alternating tree Hungarian Algorithm
+ *      Authors: Ketan Date and Rakesh Nagi
+ *
+ *      Article reference:
+ *          Date, Ketan, and Rakesh Nagi. "GPU-accelerated Hungarian algorithms
+ *          for the Linear Assignment Problem." Parallel Computing 57 (2016): 52-72.
+ *
+ */
+
+/**
+ * @warning This file is deprecated and will be removed in release 22.06.
+ * Please use the cuh version instead.
+ */
+
+#ifndef __LAP_H
+#define __LAP_H
+
+#pragma once
+
+#include <raft/handle.hpp>
+#include <rmm/device_uvector.hpp>
+
+#include "detail/d_structs.h"
+#include "detail/lap_functions.cuh"
+
+namespace raft {
+namespace lap {
+
+template <typename vertex_t, typename weight_t>
+class LinearAssignmentProblem {
+  vertex_t size_;
+  vertex_t batchsize_;
+  weight_t epsilon_;
+
+  weight_t const* d_costs_;
+
+  Vertices<vertex_t, weight_t> d_vertices_dev;
+  VertexData<vertex_t> d_row_data_dev, d_col_data_dev;
+
+  raft::handle_t const& handle_;
+  rmm::device_uvector<int> row_covers_v;
+  rmm::device_uvector<int> col_covers_v;
+  rmm::device_uvector<weight_t> row_duals_v;
+  rmm::device_uvector<weight_t> col_duals_v;
+  rmm::device_uvector<weight_t> col_slacks_v;
+  rmm::device_uvector<int> row_is_visited_v;
+  rmm::device_uvector<int> col_is_visited_v;
+  rmm::device_uvector<vertex_t> row_parents_v;
+  rmm::device_uvector<vertex_t> col_parents_v;
+  rmm::device_uvector<vertex_t> row_children_v;
+  rmm::device_uvector<vertex_t> col_children_v;
+  rmm::device_uvector<weight_t> obj_val_primal_v;
+  rmm::device_uvector<weight_t> obj_val_dual_v;
+
+ public:
+  LinearAssignmentProblem(raft::handle_t const& handle,
+                          vertex_t size,
+                          vertex_t batchsize,
+                          weight_t epsilon)
+    : handle_(handle),
+      size_(size),
+      batchsize_(batchsize),
+      epsilon_(epsilon),
+      d_costs_(nullptr),
+      row_covers_v(0, handle_.get_stream()),
+      col_covers_v(0, handle_.get_stream()),
+      row_duals_v(0, handle_.get_stream()),
+      col_duals_v(0, handle_.get_stream()),
+      col_slacks_v(0, handle_.get_stream()),
+      row_is_visited_v(0, handle_.get_stream()),
+      col_is_visited_v(0, handle_.get_stream()),
+      row_parents_v(0, handle_.get_stream()),
+      col_parents_v(0, handle_.get_stream()),
+      row_children_v(0, handle_.get_stream()),
+      col_children_v(0, handle_.get_stream()),
+      obj_val_primal_v(0, handle_.get_stream()),
+      obj_val_dual_v(0, handle_.get_stream())
+  {
+  }
+
+  // Executes Hungarian algorithm on the input cost matrix.
+  void solve(weight_t const* d_cost_matrix, vertex_t* d_row_assignment, vertex_t* d_col_assignment)
+  {
+    initializeDevice();
+
+    d_vertices_dev.row_assignments = d_row_assignment;
+    d_vertices_dev.col_assignments = d_col_assignment;
+
+    d_costs_ = d_cost_matrix;
+
+    int step = 0;
+
+    while (step != 100) {
+      switch (step) {
+        case 0: step = hungarianStep0(); break;
+        case 1: step = hungarianStep1(); break;
+        case 2: step = hungarianStep2(); break;
+        case 3: step = hungarianStep3(); break;
+        case 4: step = hungarianStep4(); break;
+        case 5: step = hungarianStep5(); break;
+        case 6: step = hungarianStep6(); break;
+      }
+    }
+
+    d_costs_ = nullptr;
+  }
+
+  // Function for getting optimal row dual vector for subproblem spId.
+  std::pair<const weight_t*, vertex_t> getRowDualVector(int spId) const
+  {
+    return std::make_pair(row_duals_v.data() + spId * size_, size_);
+  }
+
+  // Function for getting optimal col dual vector for subproblem spId.
+  std::pair<const weight_t*, vertex_t> getColDualVector(int spId)
+  {
+    return std::make_pair(col_duals_v.data() + spId * size_, size_);
+  }
+
+  // Function for getting optimal primal objective value for subproblem spId.
+  weight_t getPrimalObjectiveValue(int spId)
+  {
+    weight_t result;
+    raft::update_host(&result, obj_val_primal_v.data() + spId, 1, handle_.get_stream());
+    CHECK_CUDA(handle_.get_stream());
+    return result;
+  }
+
+  // Function for getting optimal dual objective value for subproblem spId.
+  weight_t getDualObjectiveValue(int spId)
+  {
+    weight_t result;
+    raft::update_host(&result, obj_val_dual_v.data() + spId, 1, handle_.get_stream());
+    CHECK_CUDA(handle_.get_stream());
+    return result;
+  }
+
+ private:
+  // Helper function for initializing global variables and arrays on a single host.
+  void initializeDevice()
+  {
+    cudaStream_t stream = handle_.get_stream();
+    row_covers_v.resize(batchsize_ * size_, stream);
+    col_covers_v.resize(batchsize_ * size_, stream);
+    row_duals_v.resize(batchsize_ * size_, stream);
+    col_duals_v.resize(batchsize_ * size_, stream);
+    col_slacks_v.resize(batchsize_ * size_, stream);
+    row_is_visited_v.resize(batchsize_ * size_, stream);
+    col_is_visited_v.resize(batchsize_ * size_, stream);
+    row_parents_v.resize(batchsize_ * size_, stream);
+    col_parents_v.resize(batchsize_ * size_, stream);
+    row_children_v.resize(batchsize_ * size_, stream);
+    col_children_v.resize(batchsize_ * size_, stream);
+    obj_val_primal_v.resize(batchsize_, stream);
+    obj_val_dual_v.resize(batchsize_, stream);
+
+    d_vertices_dev.row_covers = row_covers_v.data();
+    d_vertices_dev.col_covers = col_covers_v.data();
+
+    d_vertices_dev.row_duals  = row_duals_v.data();
+    d_vertices_dev.col_duals  = col_duals_v.data();
+    d_vertices_dev.col_slacks = col_slacks_v.data();
+
+    d_row_data_dev.is_visited = row_is_visited_v.data();
+    d_col_data_dev.is_visited = col_is_visited_v.data();
+    d_row_data_dev.parents    = row_parents_v.data();
+    d_row_data_dev.children   = row_children_v.data();
+    d_col_data_dev.parents    = col_parents_v.data();
+    d_col_data_dev.children   = col_children_v.data();
+
+    thrust::fill(thrust::device, row_covers_v.begin(), row_covers_v.end(), int{0});
+    thrust::fill(thrust::device, col_covers_v.begin(), col_covers_v.end(), int{0});
+    thrust::fill(thrust::device, row_duals_v.begin(), row_duals_v.end(), weight_t{0});
+    thrust::fill(thrust::device, col_duals_v.begin(), col_duals_v.end(), weight_t{0});
+  }
+
+  // Function for calculating initial zeros by subtracting row and column minima from each element.
+  int hungarianStep0()
+  {
+    detail::initialReduction(handle_, d_costs_, d_vertices_dev, batchsize_, size_);
+
+    return 1;
+  }
+
+  // Function for calculating initial zeros by subtracting row and column minima from each element.
+  int hungarianStep1()
+  {
+    detail::computeInitialAssignments(
+      handle_, d_costs_, d_vertices_dev, batchsize_, size_, epsilon_);
+
+    int next = 2;
+
+    while (true) {
+      if ((next = hungarianStep2()) == 6) break;
+
+      if ((next = hungarianStep3()) == 5) break;
+
+      hungarianStep4();
+    }
+
+    return next;
+  }
+
+  // Function for checking optimality and constructing predicates and covers.
+  int hungarianStep2()
+  {
+    int cover_count = detail::computeRowCovers(
+      handle_, d_vertices_dev, d_row_data_dev, d_col_data_dev, batchsize_, size_);
+
+    int next = (cover_count == batchsize_ * size_) ? 6 : 3;
+
+    return next;
+  }
+
+  // Function for building alternating tree rooted at unassigned rows.
+  int hungarianStep3()
+  {
+    int next;
+
+    rmm::device_scalar<bool> flag_v(handle_.get_stream());
+
+    bool h_flag = false;
+    flag_v.set_value_async(h_flag, handle_.get_stream());
+
+    detail::executeZeroCover(handle_,
+                             d_costs_,
+                             d_vertices_dev,
+                             d_row_data_dev,
+                             d_col_data_dev,
+                             flag_v.data(),
+                             batchsize_,
+                             size_,
+                             epsilon_);
+
+    h_flag = flag_v.value(handle_.get_stream());
+
+    next = h_flag ? 4 : 5;
+
+    return next;
+  }
+
+  // Function for augmenting the solution along multiple node-disjoint alternating trees.
+  int hungarianStep4()
+  {
+    detail::reversePass(handle_, d_row_data_dev, d_col_data_dev, batchsize_, size_);
+
+    detail::augmentationPass(
+      handle_, d_vertices_dev, d_row_data_dev, d_col_data_dev, batchsize_, size_);
+
+    return 2;
+  }
+
+  // Function for updating dual solution to introduce new zero-cost arcs.
+  int hungarianStep5()
+  {
+    detail::dualUpdate(
+      handle_, d_vertices_dev, d_row_data_dev, d_col_data_dev, batchsize_, size_, epsilon_);
+
+    return 3;
+  }
+
+  // Function for calculating primal and dual objective values at optimality.
+  int hungarianStep6()
+  {
+    detail::calcObjValPrimal(handle_,
+                             obj_val_primal_v.data(),
+                             d_costs_,
+                             d_vertices_dev.row_assignments,
+                             batchsize_,
+                             size_);
+
+    detail::calcObjValDual(handle_, obj_val_dual_v.data(), d_vertices_dev, batchsize_, size_);
+
+    return 100;
+  }
+};
+
+}  // namespace lap
+}  // namespace raft
+
+#endif