diff --git a/CHANGELOG.md b/CHANGELOG.md index afe42a6a3b7..1be12336793 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ - PR #782 Use Cython's `new_build_ext` (if available) - PR #788 Added options and config file to enable codecov - PR #793 Fix legacy cudf imports/cimports +- PR #802 Removed use of gdf_column from db code - PR #798 Edit return graph type in algorithms return graphs - PR #803 Enable Ninja build - PR #804 Cythonize in parallel diff --git a/cpp/src/db/db_object.cu b/cpp/src/db/db_object.cu index 9d4555b9dc1..aad9cfbe326 100644 --- a/cpp/src/db/db_object.cu +++ b/cpp/src/db/db_object.cu @@ -19,510 +19,494 @@ #include #include #include +#include -namespace cugraph { +namespace cugraph { namespace db { - // Define kernel for copying run length encoded values into offset slots. - template - __global__ void offsetsKernel(T runCounts, T* unique, T* counts, T* offsets) { - uint64_t tid = threadIdx.x + blockIdx.x * blockDim.x; - if (tid < runCounts) - offsets[unique[tid]] = counts[tid]; - } - - template - db_pattern_entry::db_pattern_entry(std::string variable) { - is_var = true; - variableName = variable; - } - - template - db_pattern_entry::db_pattern_entry(idx_t constant) { - is_var = false; - constantValue = constant; - } - - template - db_pattern_entry::db_pattern_entry(const db_pattern_entry& other) { - is_var = other.is_var; - constantValue = other.constantValue; - variableName = other.variableName; - } - - template - db_pattern_entry& db_pattern_entry::operator=(const db_pattern_entry& other) { - is_var = other.is_var; - constantValue = other.constantValue; - variableName = other.variableName; - return *this; - } - - template - bool db_pattern_entry::isVariable() const { - return is_var; - } - - template - idx_t db_pattern_entry::getConstant() const { - return constantValue; - } - - template - std::string db_pattern_entry::getVariable() const { - return variableName; - } - - template class db_pattern_entry ; - template class db_pattern_entry ; - - template - db_pattern::db_pattern() { - - } - - template - db_pattern::db_pattern(const db_pattern& other) { - for (size_t i = 0; i < other.entries.size(); i++) { - entries.push_back(other.getEntry(i)); - } - } - - template - db_pattern& db_pattern::operator=(const db_pattern& other) { - entries = other.entries; - return *this; - } - - template - int db_pattern::getSize() const { - return entries.size(); - } - - template - const db_pattern_entry& db_pattern::getEntry(int position) const { - return entries[position]; - } - - template - void db_pattern::addEntry(db_pattern_entry& entry) { - entries.push_back(entry); - } - - template - bool db_pattern::isAllConstants() { - for (size_t i = 0; i < entries.size(); i++) - if (entries[i].isVariable()) - return false; - return true; - } - - template class db_pattern ; - template class db_pattern ; - - template - void db_column_index::deleteData() { - if (offsets != nullptr && offsets->data != nullptr) - ALLOC_FREE_TRY(offsets->data, nullptr); - if (indirection != nullptr && indirection->data != nullptr) - ALLOC_FREE_TRY(indirection->data, nullptr); - if (offsets != nullptr) { - free(offsets); - offsets = nullptr; - } - if (indirection != nullptr) { - free(indirection); - indirection = nullptr; - } - } - - template - db_column_index::db_column_index() { - gdf_column* _offsets = (gdf_column*) malloc(sizeof(gdf_column)); - cugraph::detail::gdf_col_set_defaults(_offsets); - gdf_column_view(_offsets, - nullptr, - nullptr, - 0, - std::is_same::value ? GDF_INT32 : GDF_INT64); - offsets = _offsets; - gdf_column* _indirection = (gdf_column*) malloc(sizeof(gdf_column)); - cugraph::detail::gdf_col_set_defaults(_indirection); - gdf_column_view(_indirection, - nullptr, - nullptr, - 0, - std::is_same::value ? GDF_INT32 : GDF_INT64); - indirection = _indirection; - } - - template - db_column_index::db_column_index(gdf_column* _offsets, gdf_column* _indirection) { - offsets = _offsets; - indirection = _indirection; - } - - template - db_column_index::db_column_index(db_column_index&& other) { - offsets = other.offsets; - indirection = other.indirection; - other.offsets = nullptr; - other.indirection = nullptr; - } - - template - db_column_index::~db_column_index() { - deleteData(); - } - - template - db_column_index& db_column_index::operator=(db_column_index&& other) { - offsets = other.offsets; - indirection = other.indirection; - other.offsets = nullptr; - other.indirection = nullptr; - return *this; - } - - template - void db_column_index::resetData(gdf_column* _offsets, gdf_column* _indirection) { - deleteData(); - offsets = _offsets; - indirection = _indirection; - } - - template - gdf_column* db_column_index::getOffsets() { - return offsets; - } - - template - gdf_column* db_column_index::getIndirection() { - return indirection; - } - - template class db_column_index ; - template class db_column_index ; - - template - db_result::db_result() { - dataValid = false; - columnSize = 0; - } - - template - db_result::db_result(db_result&& other) { - dataValid = other.dataValid; - columns = std::move(other.columns); - names = std::move(other.names); - other.dataValid = false; - } - - template - db_result& db_result::operator =(db_result && other) { - dataValid = other.dataValid; - columns = std::move(other.columns); - names = std::move(other.names); - other.dataValid = false; - return *this; - } - - template - db_result::~db_result() { - deleteData(); - } - - template - void db_result::deleteData() { - if (dataValid) - for (size_t i = 0; i < columns.size(); i++) - ALLOC_FREE_TRY(columns[i], nullptr); - } - - template - idx_t db_result::getSize() { - return columnSize; - } - - template - idx_t* db_result::getData(std::string idx) { - if (!dataValid) - throw new std::invalid_argument("Data not valid"); - - idx_t* returnPtr = nullptr; - for (size_t i = 0; i < names.size(); i++) - if (names[i] == idx) - returnPtr = columns[i]; - return returnPtr; - } - - template - void db_result::addColumn(std::string columnName) { - if (dataValid) - throw new std::invalid_argument("Cannot add a column to an allocated result"); - names.push_back(columnName); - } - - template - void db_result::allocateColumns(idx_t size) { - if (dataValid) - throw new std::invalid_argument("Already allocated columns"); - for (size_t i = 0; i < names.size(); i++) { - idx_t* colPtr = nullptr; - ALLOC_TRY(&colPtr, sizeof(idx_t) * size, nullptr); - columns.push_back(colPtr); - } - dataValid = true; - columnSize = size; - } - - template - std::string db_result::toString() { - std::stringstream ss; - ss << "db_result with " << columns.size() << " columns of length " << columnSize << "\n"; +// Define kernel for copying run length encoded values into offset slots. +template +__global__ void offsetsKernel(T runCounts, T* unique, T* counts, T* offsets) { + uint64_t tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid < runCounts) + offsets[unique[tid]] = counts[tid]; +} + +template +db_pattern_entry::db_pattern_entry(std::string variable) { + is_var = true; + variableName = variable; +} + +template +db_pattern_entry::db_pattern_entry(idx_t constant) { + is_var = false; + constantValue = constant; +} + +template +db_pattern_entry::db_pattern_entry(const db_pattern_entry& other) { + is_var = other.is_var; + constantValue = other.constantValue; + variableName = other.variableName; +} + +template +db_pattern_entry& db_pattern_entry::operator=(const db_pattern_entry& other) { + is_var = other.is_var; + constantValue = other.constantValue; + variableName = other.variableName; + return *this; +} + +template +bool db_pattern_entry::isVariable() const { + return is_var; +} + +template +idx_t db_pattern_entry::getConstant() const { + return constantValue; +} + +template +std::string db_pattern_entry::getVariable() const { + return variableName; +} + +template class db_pattern_entry; +template class db_pattern_entry; + +template +db_pattern::db_pattern() { + +} + +template +db_pattern::db_pattern(const db_pattern& other) { + for (size_t i = 0; i < other.entries.size(); i++) { + entries.push_back(other.getEntry(i)); + } +} + +template +db_pattern& db_pattern::operator=(const db_pattern& other) { + entries = other.entries; + return *this; +} + +template +int db_pattern::getSize() const { + return entries.size(); +} + +template +const db_pattern_entry& db_pattern::getEntry(int position) const { + return entries[position]; +} + +template +void db_pattern::addEntry(db_pattern_entry& entry) { + entries.push_back(entry); +} + +template +bool db_pattern::isAllConstants() { + for (size_t i = 0; i < entries.size(); i++) + if (entries[i].isVariable()) + return false; + return true; +} + +template class db_pattern; +template class db_pattern; + +template +void db_column_index::deleteData() { + if (offsets != nullptr) { + ALLOC_FREE_TRY(offsets, nullptr); + offsets = nullptr; + offsets_size = 0; + } + if (indirection != nullptr) { + ALLOC_FREE_TRY(indirection, nullptr); + indirection = nullptr; + indirection_size = 0; + } +} + +template +db_column_index::db_column_index() { + offsets = nullptr; + offsets_size = 0; + indirection = nullptr; + indirection_size = 0; +} + +template +db_column_index::db_column_index(idx_t* _offsets, + idx_t _offsets_size, + idx_t* _indirection, + idx_t _indirection_size) { + offsets = _offsets; + offsets_size = _offsets_size; + indirection = _indirection; + indirection_size = _indirection_size; +} + +template +db_column_index::db_column_index(db_column_index&& other) { + offsets = other.offsets; + offsets_size = other.offsets_size; + indirection = other.indirection; + indirection_size = other.indirection_size; + other.offsets = nullptr; + other.offsets_size = 0; + other.indirection = nullptr; + other.indirection_size = 0; +} + +template +db_column_index::~db_column_index() { + deleteData(); +} + +template +db_column_index& db_column_index::operator=(db_column_index&& other) { + offsets = other.offsets; + offsets_size = other.offsets_size; + indirection = other.indirection; + indirection_size = other.indirection_size; + other.offsets = nullptr; + other.offsets_size = 0; + other.indirection = nullptr; + other.indirection_size = 0; + return *this; +} + +template +void db_column_index::resetData(idx_t* _offsets, + idx_t _offsets_size, + idx_t* _indirection, + idx_t _indirection_size) { + deleteData(); + offsets = _offsets; + offsets_size = _offsets_size; + indirection = _indirection; + indirection_size = _indirection_size; +} + +template +idx_t* db_column_index::getOffsets() { + return offsets; +} + +template +idx_t db_column_index::getOffsetsSize() { + return offsets_size; +} + +template +idx_t* db_column_index::getIndirection() { + return indirection; +} + +template +idx_t db_column_index::getIndirectionSize() { + return indirection_size; +} + +template +std::string db_column_index::toString(){ + std::stringstream ss; + ss << "db_column_index:\n"; + ss << "Offsets: "; + idx_t* hostOffsets = (idx_t*)malloc(sizeof(idx_t) * offsets_size); + cudaMemcpy(hostOffsets, offsets, sizeof(idx_t) * offsets_size, cudaMemcpyDefault); + for (idx_t i = 0; i < offsets_size; i++) { + ss << hostOffsets[i] << " "; + } + free(hostOffsets); + ss << "\nIndirection: "; + idx_t* hostIndirection = (idx_t*)malloc(sizeof(idx_t) * indirection_size); + cudaMemcpy(hostIndirection, indirection, sizeof(idx_t) * indirection_size, cudaMemcpyDefault); + for (idx_t i = 0; i < indirection_size; i++) { + ss << hostIndirection[i] << " "; + } + free(hostIndirection); + ss << "\n"; + return ss.str(); +} + +template class db_column_index; +template class db_column_index; + +template +db_result::db_result() { + dataValid = false; + columnSize = 0; +} + +template +db_result::db_result(db_result&& other) { + dataValid = other.dataValid; + columns = std::move(other.columns); + names = std::move(other.names); + other.dataValid = false; +} + +template +db_result& db_result::operator =(db_result&& other) { + dataValid = other.dataValid; + columns = std::move(other.columns); + names = std::move(other.names); + other.dataValid = false; + return *this; +} + +template +db_result::~db_result() { + deleteData(); +} + +template +void db_result::deleteData() { + if (dataValid) for (size_t i = 0; i < columns.size(); i++) - ss << names[i] << " "; + ALLOC_FREE_TRY(columns[i], nullptr); +} + +template +idx_t db_result::getSize() { + return columnSize; +} + +template +idx_t* db_result::getData(std::string idx) { + if (!dataValid) + throw new std::invalid_argument("Data not valid"); + + idx_t* returnPtr = nullptr; + for (size_t i = 0; i < names.size(); i++) + if (names[i] == idx) + returnPtr = columns[i]; + return returnPtr; +} + +template +void db_result::addColumn(std::string columnName) { + if (dataValid) + throw new std::invalid_argument("Cannot add a column to an allocated result"); + names.push_back(columnName); +} + +template +void db_result::allocateColumns(idx_t size) { + if (dataValid) + throw new std::invalid_argument("Already allocated columns"); + for (size_t i = 0; i < names.size(); i++) { + idx_t* colPtr = nullptr; + ALLOC_TRY(&colPtr, sizeof(idx_t) * size, nullptr); + columns.push_back(colPtr); + } + dataValid = true; + columnSize = size; +} + +template +std::string db_result::toString() { + std::stringstream ss; + ss << "db_result with " << columns.size() << " columns of length " << columnSize << "\n"; + for (size_t i = 0; i < columns.size(); i++) + ss << names[i] << " "; + ss << "\n"; + std::vector hostColumns; + for (size_t i = 0; i < columns.size(); i++) { + idx_t* hostColumn = (idx_t*) malloc(sizeof(idx_t) * columnSize); + cudaMemcpy(hostColumn, columns[i], sizeof(idx_t) * columnSize, cudaMemcpyDefault); + hostColumns.push_back(hostColumn); + } + for (idx_t i = 0; i < columnSize; i++) { + for (size_t j = 0; j < hostColumns.size(); j++) + ss << hostColumns[j][i] << " "; ss << "\n"; - std::vector hostColumns; - for (size_t i = 0; i < columns.size(); i++) { - idx_t* hostColumn = (idx_t*)malloc(sizeof(idx_t) * columnSize); - cudaMemcpy(hostColumn, columns[i], sizeof(idx_t) * columnSize, cudaMemcpyDefault); - hostColumns.push_back(hostColumn); - } - for (idx_t i = 0; i < columnSize; i++) { - for (size_t j = 0; j < hostColumns.size(); j++) - ss << hostColumns[j][i] << " "; - ss << "\n"; - } - for (size_t i = 0; i < hostColumns.size(); i++) - free(hostColumns[i]); - return ss.str(); - } - - template class db_result; - template class db_result; - - template - db_table::db_table() { - } - - template - db_table::~db_table() { - for (size_t i = 0; i < columns.size(); i++) { - if (columns[i]->data != nullptr) - ALLOC_FREE_TRY(columns[i]->data, nullptr); - free(columns[i]); + for (size_t i = 0; i < hostColumns.size(); i++) + free(hostColumns[i]); + return ss.str(); +} + +template class db_result; +template class db_result; + +template +db_table::db_table() { + column_size = 0; +} + +template +db_table::~db_table() { + for (size_t i = 0; i < columns.size(); i++) { + if (columns[i] != nullptr) { + ALLOC_FREE_TRY(columns[i], nullptr); + columns[i] = nullptr; } } - - template - void db_table::addColumn(std::string name) { - if (columns.size() > size_t{0} && columns[0]->size > 0) - throw new std::invalid_argument("Can't add a column to a non-empty table"); - - gdf_column* _col = (gdf_column*) malloc(sizeof(gdf_column)); - cugraph::detail::gdf_col_set_defaults(_col); - gdf_column_view(_col, - nullptr, - nullptr, - 0, - std::is_same::value ? GDF_INT32 : GDF_INT64); - columns.push_back(_col); - names.push_back(name); - indices.resize(indices.size() + 1); - } - - template - void db_table::addEntry(db_pattern& pattern) { - if (!pattern.isAllConstants()) - throw new std::invalid_argument("Can't add an entry that isn't all constants"); - if (static_cast(pattern.getSize()) != columns.size()) - throw new std::invalid_argument("Can't add an entry that isn't the right size"); - inputBuffer.push_back(pattern); - } - - template - void db_table::rebuildIndices() { - for (size_t i = 0; i < columns.size(); i++) { - // Copy the column's data to a new array - idx_t size = columns[i]->size; - idx_t* tempColumn; - ALLOC_TRY(&tempColumn, sizeof(idx_t) * size, nullptr); - cudaMemcpy(tempColumn, columns[i]->data, sizeof(idx_t) * size, cudaMemcpyDefault); - - // Construct an array of ascending integers - idx_t* indirection; - ALLOC_TRY(&indirection, sizeof(idx_t) * size, nullptr); - thrust::sequence(rmm::exec_policy(nullptr)->on(nullptr), indirection, indirection + size); - - // Sort the arrays together - thrust::sort_by_key(rmm::exec_policy(nullptr)->on(nullptr), - tempColumn, - tempColumn + size, - indirection); - - // Compute offsets array based on sorted column - idx_t maxId; - cudaMemcpy(&maxId, tempColumn + size - 1, sizeof(idx_t), cudaMemcpyDefault); - idx_t *unique, *counts, *runCount; - ALLOC_TRY(&unique, (maxId + 1) * sizeof(idx_t), nullptr); - ALLOC_TRY(&counts, (maxId + 1) * sizeof(idx_t), nullptr); - ALLOC_TRY(&runCount, sizeof(idx_t), nullptr); - void* tmpStorage = nullptr; - size_t tmpBytes = 0; - cub::DeviceRunLengthEncode::Encode(tmpStorage, - tmpBytes, - tempColumn, - unique, - counts, - runCount, - size); - ALLOC_TRY(&tmpStorage, tmpBytes, nullptr); - cub::DeviceRunLengthEncode::Encode(tmpStorage, - tmpBytes, - tempColumn, - unique, - counts, - runCount, - size); - ALLOC_FREE_TRY(tmpStorage, nullptr); - idx_t runCount_h; - cudaMemcpy(&runCount_h, runCount, sizeof(idx_t), cudaMemcpyDefault); - idx_t* offsets; - - // Allocating the new offsets array - ALLOC_TRY(&offsets, (maxId + 2) * sizeof(idx_t), nullptr); - - // Filling values in offsets array from the encoded run lengths - int threadsPerBlock = 1024; - int numBlocks = (runCount_h + threadsPerBlock - 1) / threadsPerBlock; - offsetsKernel<<>>(runCount_h, unique, counts, offsets); - CUDA_CHECK_LAST(); - - // Taking the exclusive scan of the run lengths to get the final offsets. - thrust::exclusive_scan(rmm::exec_policy(nullptr)->on(nullptr), - offsets, - offsets + maxId + 2, - offsets); - ALLOC_FREE_TRY(tempColumn, nullptr); - ALLOC_FREE_TRY(unique, nullptr); - ALLOC_FREE_TRY(counts, nullptr); - ALLOC_FREE_TRY(runCount, nullptr); - - // Assign new offsets array and indirection vector to index - gdf_column* offsetsCol = (gdf_column*) malloc(sizeof(gdf_column)); - cugraph::detail::gdf_col_set_defaults(offsetsCol); - gdf_column_view(offsetsCol, - offsets, - nullptr, - maxId + 2, - std::is_same::value ? GDF_INT32 : GDF_INT64); - - gdf_column* indirectionCol = (gdf_column*) malloc(sizeof(gdf_column)); - cugraph::detail::gdf_col_set_defaults(indirectionCol); - gdf_column_view(indirectionCol, - indirection, - nullptr, - size, - std::is_same::value ? GDF_INT32 : GDF_INT64); - - indices[i].resetData(offsetsCol, indirectionCol); - } - } - - template - void db_table::flush_input() { - if (inputBuffer.size() == size_t{0}) - return; - idx_t tempSize = inputBuffer.size(); - std::vector tempColumns; - for (size_t i = 0; i < columns.size(); i++) { - tempColumns.push_back((idx_t*) malloc(sizeof(idx_t) * tempSize)); - for (idx_t j = 0; j < tempSize; j++) { - tempColumns.back()[j] = inputBuffer[j].getEntry(i).getConstant(); - } - } - inputBuffer.clear(); - idx_t currentSize = columns[0]->size; - idx_t newSize = currentSize + tempSize; - std::vector newColumns; - for (size_t i = 0; i < columns.size(); i++) { - idx_t* newCol; - ALLOC_TRY(&newCol, sizeof(idx_t) * newSize, nullptr); - newColumns.push_back(newCol); - } - for (size_t i = 0; i < columns.size(); i++) { - if (currentSize > 0) - cudaMemcpy(newColumns[i], columns[i]->data, sizeof(idx_t) * currentSize, cudaMemcpyDefault); - cudaMemcpy(newColumns[i] + currentSize, - tempColumns[i], - sizeof(idx_t) * tempSize, - cudaMemcpyDefault); - free(tempColumns[i]); - if (columns[i]->data != nullptr) - ALLOC_FREE_TRY(columns[i]->data, nullptr); - columns[i]->data = newColumns[i]; - columns[i]->size = newSize; +} + +template +void db_table::addColumn(std::string name) { + if (columns.size() > size_t { 0 } && column_size > 0) + throw new std::invalid_argument("Can't add a column to a non-empty table"); + + idx_t* _col = nullptr; + columns.push_back(_col); + names.push_back(name); + indices.resize(indices.size() + 1); +} + +template +void db_table::addEntry(db_pattern& pattern) { + if (!pattern.isAllConstants()) + throw new std::invalid_argument("Can't add an entry that isn't all constants"); + if (static_cast(pattern.getSize()) != columns.size()) + throw new std::invalid_argument("Can't add an entry that isn't the right size"); + inputBuffer.push_back(pattern); +} + +template +void db_table::rebuildIndices() { + for (size_t i = 0; i < columns.size(); i++) { + // Copy the column's data to a new array + idx_t size = column_size; + idx_t* tempColumn; + ALLOC_TRY(&tempColumn, sizeof(idx_t) * size, nullptr); + cudaMemcpy(tempColumn, columns[i], sizeof(idx_t) * size, cudaMemcpyDefault); + + // Construct an array of ascending integers + idx_t* indirection; + ALLOC_TRY(&indirection, sizeof(idx_t) * size, nullptr); + thrust::sequence(rmm::exec_policy(nullptr)->on(nullptr), indirection, indirection + size); + + // Sort the arrays together + thrust::sort_by_key(rmm::exec_policy(nullptr)->on(nullptr), + tempColumn, + tempColumn + size, + indirection); + + // Compute offsets array based on sorted column + idx_t maxId; + cudaMemcpy(&maxId, tempColumn + size - 1, sizeof(idx_t), cudaMemcpyDefault); + idx_t* offsets; + ALLOC_TRY(&offsets, (maxId + 2) * sizeof(idx_t), nullptr); + thrust::lower_bound(rmm::exec_policy(nullptr)->on(nullptr), + tempColumn, + tempColumn + size, + thrust::counting_iterator(0), + thrust::counting_iterator(maxId + 2), + offsets); + + // Clean up temporary allocations + ALLOC_FREE_TRY(tempColumn, nullptr); + + // Assign new offsets array and indirection vector to index + indices[i].resetData(offsets, maxId + 2, indirection, size); + } +} + +template +void db_table::flush_input() { + if (inputBuffer.size() == size_t { 0 }) + return; + idx_t tempSize = inputBuffer.size(); + std::vector tempColumns; + for (size_t i = 0; i < columns.size(); i++) { + tempColumns.push_back((idx_t*) malloc(sizeof(idx_t) * tempSize)); + for (idx_t j = 0; j < tempSize; j++) { + tempColumns.back()[j] = inputBuffer[j].getEntry(i).getConstant(); } - - rebuildIndices(); } - - template - std::string db_table::toString() { - idx_t columnSize = 0; - if (columns.size() > 0) - columnSize = columns[0]->size; - std::stringstream ss; - ss << "Table with " << columns.size() << " columns of length " << columnSize << "\n"; - for (size_t i = 0; i < names.size(); i++) - ss << names[i] << " "; + inputBuffer.clear(); + idx_t currentSize = column_size; + idx_t newSize = currentSize + tempSize; + std::vector newColumns; + for (size_t i = 0; i < columns.size(); i++) { + idx_t* newCol; + ALLOC_TRY(&newCol, sizeof(idx_t) * newSize, nullptr); + newColumns.push_back(newCol); + } + for (size_t i = 0; i < columns.size(); i++) { + if (currentSize > 0) + cudaMemcpy(newColumns[i], columns[i], sizeof(idx_t) * currentSize, cudaMemcpyDefault); + cudaMemcpy(newColumns[i] + currentSize, + tempColumns[i], + sizeof(idx_t) * tempSize, + cudaMemcpyDefault); + free(tempColumns[i]); + if (columns[i] != nullptr) + ALLOC_FREE_TRY(columns[i], nullptr); + columns[i] = newColumns[i]; + column_size = newSize; + } + + rebuildIndices(); +} + +template +std::string db_table::toString() { + idx_t columnSize = 0; + if (columns.size() > 0) + columnSize = column_size; + std::stringstream ss; + ss << "Table with " << columns.size() << " columns of length " << columnSize << "\n"; + for (size_t i = 0; i < names.size(); i++) + ss << names[i] << " "; + ss << "\n"; + std::vector hostColumns; + for (size_t i = 0; i < columns.size(); i++) { + idx_t* hostColumn = (idx_t*) malloc(sizeof(idx_t) * columnSize); + cudaMemcpy(hostColumn, columns[i], sizeof(idx_t) * columnSize, cudaMemcpyDefault); + hostColumns.push_back(hostColumn); + } + for (idx_t i = 0; i < columnSize; i++) { + for (size_t j = 0; j < hostColumns.size(); j++) + ss << hostColumns[j][i] << " "; ss << "\n"; - std::vector hostColumns; - for (size_t i = 0; i < columns.size(); i++) { - idx_t* hostColumn = (idx_t*)malloc(sizeof(idx_t) * columnSize); - cudaMemcpy(hostColumn, columns[i]->data, sizeof(idx_t) * columnSize, cudaMemcpyDefault); - hostColumns.push_back(hostColumn); - } - for (idx_t i = 0; i < columnSize; i++) { - for (size_t j = 0; j < hostColumns.size(); j++) - ss << hostColumns[j][i] << " "; - ss << "\n"; - } - for (size_t i = 0; i < hostColumns.size(); i++) - free(hostColumns[i]); - return ss.str(); } - - template - db_column_index& db_table::getIndex(int idx) { - return indices[idx]; - } - - template - gdf_column* db_table::getColumn(int idx) { - return columns[idx]; - } - - template class db_table ; - template class db_table ; - - template - db_object::db_object() { - next_id = 0; - relationshipsTable.addColumn("begin"); - relationshipsTable.addColumn("end"); - relationshipsTable.addColumn("type"); - relationshipPropertiesTable.addColumn("id"); - relationshipPropertiesTable.addColumn("name"); - relationshipPropertiesTable.addColumn("value"); - } - - template - std::string db_object::query(std::string query) { - return ""; - } - - template class db_object ; - template class db_object ; -} } //namespace + for (size_t i = 0; i < hostColumns.size(); i++) + free(hostColumns[i]); + return ss.str(); +} + +template +db_column_index& db_table::getIndex(int idx) { + return indices[idx]; +} + +template +idx_t* db_table::getColumn(int idx) { + return columns[idx]; +} + +template class db_table; +template class db_table; + +template +db_object::db_object() { + next_id = 0; + relationshipsTable.addColumn("begin"); + relationshipsTable.addColumn("end"); + relationshipsTable.addColumn("type"); + relationshipPropertiesTable.addColumn("id"); + relationshipPropertiesTable.addColumn("name"); + relationshipPropertiesTable.addColumn("value"); +} + +template +std::string db_object::query(std::string query) { + return ""; +} + +template class db_object; +template class db_object; +} +} //namespace diff --git a/cpp/src/db/db_object.cuh b/cpp/src/db/db_object.cuh index 14dd9d5ee25..2dede1a337e 100644 --- a/cpp/src/db/db_object.cuh +++ b/cpp/src/db/db_object.cuh @@ -68,20 +68,31 @@ namespace db { */ template class db_column_index { - gdf_column* offsets; - gdf_column* indirection; + idx_t* offsets; + idx_t* indirection; + idx_t offsets_size; + idx_t indirection_size; + void deleteData(); public: db_column_index(); - db_column_index(gdf_column* offsets, gdf_column* indirection); + db_column_index(idx_t* offsets, idx_t offsets_size, idx_t* indirection, idx_t indirection_size); db_column_index(const db_column_index& other) = delete; db_column_index(db_column_index&& other); ~db_column_index(); db_column_index& operator=(const db_column_index& other) = delete; db_column_index& operator=(db_column_index&& other); - void resetData(gdf_column* offsets, gdf_column* indirection); - gdf_column* getOffsets(); - gdf_column* getIndirection(); + void resetData(idx_t* offsets, idx_t offsets_size, idx_t* indirection, idx_t indirection_size); + idx_t* getOffsets(); + idx_t getOffsetsSize(); + idx_t* getIndirection(); + idx_t getIndirectionSize(); + + /** + * For debugging purposes only. + * @return Human readable representation + */ + std::string toString(); }; /** @@ -119,7 +130,8 @@ namespace db { */ template class db_table { - std::vector columns; + std::vector columns; + idx_t column_size; std::vector names; std::vector> inputBuffer; std::vector> indices; @@ -151,7 +163,8 @@ namespace db { */ std::string toString(); db_column_index& getIndex(int idx); - gdf_column* getColumn(int idx); + idx_t* getColumn(int idx); + idx_t getColumnSize(); }; /** diff --git a/cpp/src/db/db_operators.cu b/cpp/src/db/db_operators.cu index 2b20411ce7e..69fecf4a792 100644 --- a/cpp/src/db/db_operators.cu +++ b/cpp/src/db/db_operators.cu @@ -149,10 +149,6 @@ namespace cugraph { idx_t valB = columnB[row_id]; idx_t valC = columnC[row_id]; - // Debugging output -// printf("RowId: %d, valA: %d, valB: %d, valC: %d\n", row_id, valA, valB, valC); -// printf("PatternA: %d, PatternB: %d, PatternC: %d\n", patternA, patternB, patternC); - // Compare the row values with constants in the pattern bool matchA = outputA != nullptr ? true : patternA == valA; bool matchB = outputB != nullptr ? true : patternB == valB; @@ -215,7 +211,7 @@ namespace cugraph { } else { // Making a sequence of values from zero to n where n is the highest ID present in the index. - idx_t highestId = theIndex.getOffsets()->size - 2; + idx_t highestId = theIndex.getOffsetsSize() - 2; ALLOC_TRY(&frontier_ptr, sizeof(idx_t) * (highestId + 1), nullptr); thrust::sequence(rmm::exec_policy(nullptr)->on(nullptr), frontier_ptr, @@ -225,11 +221,11 @@ namespace cugraph { } // Collect all the pointers needed to run the main kernel - idx_t* columnA = (idx_t*)table.getColumn(0)->data; - idx_t* columnB = (idx_t*)table.getColumn(1)->data; - idx_t* columnC = (idx_t*)table.getColumn(2)->data; - idx_t* offsets = (idx_t*)theIndex.getOffsets()->data; - idx_t* indirection = (idx_t*)theIndex.getIndirection()->data; + idx_t* columnA = table.getColumn(0); + idx_t* columnB = table.getColumn(1); + idx_t* columnC = table.getColumn(2); + idx_t* offsets = theIndex.getOffsets(); + idx_t* indirection = theIndex.getIndirection(); // Load balance the input idx_t *exsum_degree = nullptr; @@ -249,9 +245,6 @@ namespace cugraph { idx_t output_size; cudaMemcpy(&output_size, &exsum_degree[frontierSize], sizeof(idx_t), cudaMemcpyDefault); - // Debugging output -// std::cout << "OutputSize = " << output_size << "\n"; - idx_t num_blocks = (output_size + FIND_MATCHES_BLOCK_SIZE - 1) / FIND_MATCHES_BLOCK_SIZE; idx_t *block_bucket_offsets = nullptr; ALLOC_TRY(&block_bucket_offsets, sizeof(idx_t) * (num_blocks + 1), nullptr); @@ -320,44 +313,6 @@ namespace cugraph { patternB, patternC); - // Debugging output -// if (outputA != nullptr) { -// idx_t* outputA_h = (idx_t*)malloc(sizeof(idx_t) * output_size); -// cudaMemcpy(outputA_h, outputA, sizeof(idx_t)*output_size, cudaMemcpyDefault); -// std::cout << "OutputA: "; -// for (int i = 0; i < output_size; i++) -// std::cout << outputA_h[i] << " "; -// std::cout << "\n"; -// free(outputA_h); -// } -// if (outputB != nullptr) { -// idx_t* outputB_h = (idx_t*) malloc(sizeof(idx_t) * output_size); -// cudaMemcpy(outputB_h, outputB, sizeof(idx_t) * output_size, cudaMemcpyDefault); -// std::cout << "OutputB: "; -// for (int i = 0; i < output_size; i++) -// std::cout << outputB_h[i] << " "; -// std::cout << "\n"; -// free(outputB_h); -// } -// if (outputC != nullptr) { -// idx_t* outputC_h = (idx_t*) malloc(sizeof(idx_t) * output_size); -// cudaMemcpy(outputC_h, outputC, sizeof(idx_t) * output_size, cudaMemcpyDefault); -// std::cout << "OutputC: "; -// for (int i = 0; i < output_size; i++) -// std::cout << outputC_h[i] << " "; -// std::cout << "\n"; -// free(outputC_h); -// } -// if (outputD != nullptr) { -// idx_t* outputD_h = (idx_t*) malloc(sizeof(idx_t) * output_size); -// cudaMemcpy(outputD_h, outputD, sizeof(idx_t) * output_size, cudaMemcpyDefault); -// std::cout << "OutputD: "; -// for (int i = 0; i < output_size; i++) -// std::cout << outputD_h[i] << " "; -// std::cout << "\n"; -// free(outputD_h); -// } - // Get the non-null output columns std::vector columns; std::vector names; @@ -409,6 +364,7 @@ namespace cugraph { output_size); idx_t compactSize_h; cudaMemcpy(&compactSize_h, compactSize_d, sizeof(idx_t), cudaMemcpyDefault); + for (size_t i = 1; i < columns.size(); i++) { col_ptr = columns[i]; cub::DeviceSelect::Flagged(tempSpace, diff --git a/cpp/tests/db/find_matches_test.cu b/cpp/tests/db/find_matches_test.cu index 0b5cb6656d8..f2bc9f93aa3 100644 --- a/cpp/tests/db/find_matches_test.cu +++ b/cpp/tests/db/find_matches_test.cu @@ -53,62 +53,14 @@ public: }; TEST_F(Test_FindMatches, verifyIndices) { - int32_t* offsets_d = reinterpret_cast(table.getIndex(0).getOffsets()->data); - int32_t offsetsSize = table.getIndex(0).getOffsets()->size; - int32_t* indirection_d = reinterpret_cast(table.getIndex(0).getIndirection()->data); - int32_t indirectionSize = table.getIndex(0).getIndirection()->size; - int32_t* offsets_h = new int32_t[offsetsSize]; - int32_t* indirection_h = new int32_t[indirectionSize]; - cudaMemcpy(offsets_h, offsets_d, sizeof(int32_t) * offsetsSize, cudaMemcpyDefault); - cudaMemcpy(indirection_h, indirection_d, sizeof(int32_t) * indirectionSize, cudaMemcpyDefault); - std::cout << "Offsets[0]: "; - for (int i = 0; i < offsetsSize; i++) - std::cout << offsets_h[i] << " "; - std::cout << "\n"; - std::cout << "Indirection[0]: "; - for (int i = 0; i < indirectionSize; i++) - std::cout << indirection_h[i] << " "; - std::cout << "\n"; - delete[] offsets_h; - delete[] indirection_h; - - offsets_d = reinterpret_cast(table.getIndex(1).getOffsets()->data); - offsetsSize = table.getIndex(1).getOffsets()->size; - indirection_d = reinterpret_cast(table.getIndex(1).getIndirection()->data); - indirectionSize = table.getIndex(1).getIndirection()->size; - offsets_h = new int32_t[offsetsSize]; - indirection_h = new int32_t[indirectionSize]; - cudaMemcpy(offsets_h, offsets_d, sizeof(int32_t) * offsetsSize, cudaMemcpyDefault); - cudaMemcpy(indirection_h, indirection_d, sizeof(int32_t) * indirectionSize, cudaMemcpyDefault); - std::cout << "Offsets[1]: "; - for (int i = 0; i < offsetsSize; i++) - std::cout << offsets_h[i] << " "; - std::cout << "\n"; - std::cout << "Indirection[1]: "; - for (int i = 0; i < indirectionSize; i++) - std::cout << indirection_h[i] << " "; - std::cout << "\n"; - delete[] offsets_h; - delete[] indirection_h; - - offsets_d = reinterpret_cast(table.getIndex(2).getOffsets()->data); - offsetsSize = table.getIndex(2).getOffsets()->size; - indirection_d = reinterpret_cast(table.getIndex(2).getIndirection()->data); - indirectionSize = table.getIndex(2).getIndirection()->size; - offsets_h = new int32_t[offsetsSize]; - indirection_h = new int32_t[indirectionSize]; - cudaMemcpy(offsets_h, offsets_d, sizeof(int32_t) * offsetsSize, cudaMemcpyDefault); - cudaMemcpy(indirection_h, indirection_d, sizeof(int32_t) * indirectionSize, cudaMemcpyDefault); - std::cout << "Offsets[2]: "; - for (int i = 0; i < offsetsSize; i++) - std::cout << offsets_h[i] << " "; - std::cout << "\n"; - std::cout << "Indirection[2]: "; - for (int i = 0; i < indirectionSize; i++) - std::cout << indirection_h[i] << " "; - std::cout << "\n"; - delete[] offsets_h; - delete[] indirection_h; + insertConstantEntry(0, 1, 1); + insertConstantEntry(2, 0, 1); + table.flush_input(); + + std::cout << table.toString(); + std::cout << "Index[0]: " << table.getIndex(0).toString(); + std::cout << "Index[1]: " << table.getIndex(1).toString(); + std::cout << "Index[2]: " << table.getIndex(2).toString(); } TEST_F(Test_FindMatches, firstTest){ @@ -132,12 +84,16 @@ TEST_F(Test_FindMatches, firstTest){ delete[] resultB; } -/* + TEST_F(Test_FindMatches, secondTest) { insertConstantEntry(0, 1, 1); insertConstantEntry(2, 0, 1); table.flush_input(); + std::cout << table.toString() << "\n\n"; + + std::cout << table.getIndex(2).toString() << "\n"; + cugraph::db::db_pattern q; cugraph::db::db_pattern_entry q1(0); cugraph::db::db_pattern_entry q2("a"); @@ -148,14 +104,14 @@ TEST_F(Test_FindMatches, secondTest) { cugraph::db::db_result result = cugraph::db::findMatches(q, table, nullptr, 2); + std::cout << result.toString(); + ASSERT_EQ(result.getSize(), 2); int32_t* resultA = new int32_t[result.getSize()]; int32_t* resultB = new int32_t[result.getSize()]; cudaMemcpy(resultA, result.getData("a"), sizeof(int32_t) * result.getSize(), cudaMemcpyDefault); cudaMemcpy(resultB, result.getData("b"), sizeof(int32_t) * result.getSize(), cudaMemcpyDefault); - std::cout << result.toString(); - ASSERT_EQ(resultA[0], 1); ASSERT_EQ(resultB[0], 1); ASSERT_EQ(resultA[1], 1); @@ -164,7 +120,7 @@ TEST_F(Test_FindMatches, secondTest) { delete[] resultA; delete[] resultB; } -*/ + TEST_F(Test_FindMatches, thirdTest) { insertConstantEntry(1, 1, 2); insertConstantEntry(2, 1, 2); @@ -229,7 +185,7 @@ TEST_F(Test_FindMatches, fourthTest) { delete[] resultA; delete[] resultR; } -/* + TEST_F(Test_FindMatches, fifthTest) { insertConstantEntry(0, 1, 3); insertConstantEntry(0, 2, 1); @@ -261,7 +217,7 @@ TEST_F(Test_FindMatches, fifthTest) { delete[] resultA; delete[] resultB; } -*/ + int main( int argc, char** argv ) { rmmInitialize(nullptr); @@ -269,4 +225,4 @@ int main( int argc, char** argv ) int rc = RUN_ALL_TESTS(); rmmFinalize(); return rc; -} \ No newline at end of file +}