From 29721e48c1afa53560104c060df4f616d7606011 Mon Sep 17 00:00:00 2001 From: James Wyles Date: Mon, 22 Jul 2019 11:53:05 -0600 Subject: [PATCH 1/8] Added debug output --- cpp/src/snmg/COO2CSR/COO2CSR.cu | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/src/snmg/COO2CSR/COO2CSR.cu b/cpp/src/snmg/COO2CSR/COO2CSR.cu index 7247eb55587..f4beb8bd8c7 100644 --- a/cpp/src/snmg/COO2CSR/COO2CSR.cu +++ b/cpp/src/snmg/COO2CSR/COO2CSR.cu @@ -296,6 +296,10 @@ gdf_error snmg_coo2csr_impl(size_t* part_offsets, myRowCount += otherRowCounts[i]; } + std::stringstream ss; + ss << "myRowCount=" << myRowCount << " myEdgeCount=" << myEdgeCount; + serializeMessage(env, ss.str()); + // Each thread allocates space to receive their rows from others idx_t *cooRowNew, *cooColNew; val_t *cooValNew; From d731ed00a93ade9e411dec68cee223fa390a7f2b Mon Sep 17 00:00:00 2001 From: James Wyles Date: Mon, 22 Jul 2019 13:13:13 -0600 Subject: [PATCH 2/8] Changed COO2CSR tests to use transpose --- cpp/src/tests/snmg_coo2csr/snmg_coo2csr_test.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/tests/snmg_coo2csr/snmg_coo2csr_test.cu b/cpp/src/tests/snmg_coo2csr/snmg_coo2csr_test.cu index 5b9ec83ad3e..d6c54f10d96 100644 --- a/cpp/src/tests/snmg_coo2csr/snmg_coo2csr_test.cu +++ b/cpp/src/tests/snmg_coo2csr/snmg_coo2csr_test.cu @@ -89,7 +89,7 @@ public: std::vector csrVal(nnz, 0.0); // Read - ASSERT_EQ( (mm_to_coo(fpin, 1, nnz, &cooRowInd[0], &cooColInd[0], NULL, NULL)) , 0)<< "could not read matrix data"<< "\n"; + ASSERT_EQ( (mm_to_coo(fpin, 1, nnz, &cooColInd[0], &cooRowInd[0], NULL, NULL)) , 0)<< "could not read matrix data"<< "\n"; ASSERT_EQ(fclose(fpin), 0); //ASSERT_EQ( (coo_to_csr (m, m, nnz, &cooRowInd[0], &cooColInd[0], NULL, NULL, &csrRowPtr[0], NULL, NULL, NULL)), 0) << "could not covert COO to CSR "<< "\n"; std::vector cooRowInd_tmp(cooRowInd); From 90d9a6caa5e5e86ad34195e186355e6b5e64b06b Mon Sep 17 00:00:00 2001 From: James Wyles Date: Mon, 22 Jul 2019 14:09:10 -0600 Subject: [PATCH 3/8] Added new tests which fail --- .../tests/snmg_coo2csr/snmg_coo2csr_test.cu | 238 +++++++++++++++++- 1 file changed, 237 insertions(+), 1 deletion(-) diff --git a/cpp/src/tests/snmg_coo2csr/snmg_coo2csr_test.cu b/cpp/src/tests/snmg_coo2csr/snmg_coo2csr_test.cu index d6c54f10d96..300e88686de 100644 --- a/cpp/src/tests/snmg_coo2csr/snmg_coo2csr_test.cu +++ b/cpp/src/tests/snmg_coo2csr/snmg_coo2csr_test.cu @@ -89,7 +89,7 @@ public: std::vector csrVal(nnz, 0.0); // Read - ASSERT_EQ( (mm_to_coo(fpin, 1, nnz, &cooColInd[0], &cooRowInd[0], NULL, NULL)) , 0)<< "could not read matrix data"<< "\n"; + ASSERT_EQ( (mm_to_coo(fpin, 1, nnz, &cooRowInd[0], &cooColInd[0], NULL, NULL)) , 0)<< "could not read matrix data"<< "\n"; ASSERT_EQ(fclose(fpin), 0); //ASSERT_EQ( (coo_to_csr (m, m, nnz, &cooRowInd[0], &cooColInd[0], NULL, NULL, &csrRowPtr[0], NULL, NULL, NULL)), 0) << "could not covert COO to CSR "<< "\n"; std::vector cooRowInd_tmp(cooRowInd); @@ -276,6 +276,242 @@ INSTANTIATE_TEST_CASE_P(mtx_test, Tests_MGcoo2csr, MGcoo2csr_Usecase("test/datasets/web-Google.mtx"), MGcoo2csr_Usecase("test/datasets/wiki-Talk.mtx"))); +class Tests_MGcoo2csrTrans: public ::testing::TestWithParam { +public: + Tests_MGcoo2csrTrans() { + } + static void SetupTestCase() { + } + static void TearDownTestCase() { + } + virtual void SetUp() { + } + virtual void TearDown() { + } + + static std::vector mgspmv_time; + + template + void run_current_test(const MGcoo2csr_Usecase& param) { + const ::testing::TestInfo* const test_info = + ::testing::UnitTest::GetInstance()->current_test_info(); + std::stringstream ss; + std::string test_id = std::string(test_info->test_case_name()) + std::string(".") + + std::string(test_info->name()) + std::string("_") + getFileName(param.matrix_file) + + std::string("_") + ss.str().c_str(); + std::cout << test_id << "\n"; + int m, k, nnz, n_gpus; + MM_typecode mc; + gdf_error status; + + double t; + + FILE* fpin = fopen(param.matrix_file.c_str(), "r"); + + if (!fpin) { + std::cout << "Could not open file: " << param.matrix_file << "\n"; + FAIL(); + } + + ASSERT_EQ(mm_properties(fpin, 1, &mc, &m, &k, &nnz),0)<< "could not read Matrix Market file properties"<< "\n"; + ASSERT_TRUE(mm_is_matrix(mc)); + ASSERT_TRUE(mm_is_coordinate(mc)); + ASSERT_FALSE(mm_is_complex(mc)); + ASSERT_FALSE(mm_is_skew(mc)); + + // Allocate memory on host + std::vector cooRowInd(nnz), cooColInd(nnz), csrColInd(nnz), csrRowPtr(m + 1); + std::vector degree_h(m, 0.0), degree_ref(m, 0.0); + std::vector csrVal(nnz, 0.0); + + // Read + ASSERT_EQ( (mm_to_coo(fpin, 1, nnz, &cooColInd[0], &cooRowInd[0], NULL, NULL)) , 0)<< "could not read matrix data"<< "\n"; + ASSERT_EQ(fclose(fpin), 0); + //ASSERT_EQ( (coo_to_csr (m, m, nnz, &cooRowInd[0], &cooColInd[0], NULL, NULL, &csrRowPtr[0], NULL, NULL, NULL)), 0) << "could not covert COO to CSR "<< "\n"; + std::vector cooRowInd_tmp(cooRowInd); + std::vector cooColInd_tmp(cooColInd); + coo2csr(cooRowInd_tmp, cooColInd_tmp, csrRowPtr, csrColInd); + + CUDA_RT_CALL(cudaGetDeviceCount(&n_gpus)); + std::vector v_loc(n_gpus), e_loc(n_gpus), part_offset(n_gpus + 1), part_offset_r(n_gpus + + 1); + void* comm1; + + if (nnz < 1200000000) { +#pragma omp parallel num_threads(1) + { + //omp_set_num_threads(n_gpus); + auto i = omp_get_thread_num(); + auto p = omp_get_num_threads(); + CUDA_RT_CALL(cudaSetDevice(i)); + +#ifdef SNMG_VERBOSE +#pragma omp master + { + std::cout << "Number of GPUs : "<< n_gpus <(csr_off, csr_ind, col_off, col_ind)); + } + + gdf_col_delete(col_off); + gdf_col_delete(col_ind); + gdf_col_delete(col_val); + gdf_col_delete(csr_off); + gdf_col_delete(csr_ind); + gdf_col_delete(csr_val); + gdf_col_delete(coo_row); + gdf_col_delete(coo_col); + gdf_col_delete(coo_val); + } + } + if (n_gpus > 1) + { + // Only using the 4 fully connected GPUs on DGX1 + if (n_gpus == 8) + n_gpus = 4; + +#pragma omp parallel num_threads(n_gpus) + { + //omp_set_num_threads(n_gpus); + auto i = omp_get_thread_num(); + auto p = omp_get_num_threads(); + CUDA_RT_CALL(cudaSetDevice(i)); + +#ifdef SNMG_VERBOSE +#pragma omp master + { + std::cout << "Number of GPUs : "<< n_gpus <(csr_off, csr_ind, col_off, col_ind)); + } + + gdf_col_delete(col_off); + gdf_col_delete(col_ind); + gdf_col_delete(col_val); + gdf_col_delete(csr_off); + gdf_col_delete(csr_ind); + gdf_col_delete(csr_val); + gdf_col_delete(coo_row); + gdf_col_delete(coo_col); + gdf_col_delete(coo_val); + } + } + std::cout << std::endl; + } +}; + +TEST_P(Tests_MGcoo2csrTrans, CheckInt32_floatmtx) { + run_current_test(GetParam()); +} + +TEST_P(Tests_MGcoo2csrTrans, CheckInt32_doublemtx) { + run_current_test(GetParam()); +} + +INSTANTIATE_TEST_CASE_P(mtx_test, Tests_MGcoo2csrTrans, + ::testing::Values(MGcoo2csr_Usecase("test/datasets/karate.mtx"), + MGcoo2csr_Usecase("test/datasets/netscience.mtx"), + MGcoo2csr_Usecase("test/datasets/cit-Patents.mtx"), + MGcoo2csr_Usecase("test/datasets/webbase-1M.mtx"), + MGcoo2csr_Usecase("test/datasets/web-Google.mtx"), + MGcoo2csr_Usecase("test/datasets/wiki-Talk.mtx"))); + class Tests_MGcoo2csr_hibench: public ::testing::TestWithParam { public: Tests_MGcoo2csr_hibench() { From f6dbca93e5e0955cc4fa3207386c472ec4c6a1ef Mon Sep 17 00:00:00 2001 From: James Wyles Date: Mon, 22 Jul 2019 15:39:36 -0600 Subject: [PATCH 4/8] Fix? --- cpp/src/snmg/COO2CSR/COO2CSR.cu | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/cpp/src/snmg/COO2CSR/COO2CSR.cu b/cpp/src/snmg/COO2CSR/COO2CSR.cu index f4beb8bd8c7..7e27a761582 100644 --- a/cpp/src/snmg/COO2CSR/COO2CSR.cu +++ b/cpp/src/snmg/COO2CSR/COO2CSR.cu @@ -245,6 +245,7 @@ gdf_error snmg_coo2csr_impl(size_t* part_offsets, auto zippy = thrust::make_zip_iterator(thrust::make_tuple(cooRowTemp, cooColTemp)); thrust::sort(rmm::exec_policy(nullptr)->on(nullptr), zippy, zippy + size); } + cudaDeviceSynchronize(); cudaCheckError(); // Each thread determines the count of rows it needs to transfer to each other thread @@ -255,7 +256,7 @@ gdf_error snmg_coo2csr_impl(size_t* part_offsets, ALLOC_TRY(&endPositions, sizeof(idx_t) * (p - 1), nullptr); for (int j = 0; j < p - 1; j++) { idx_t endVertexId = part_offsets[j + 1]; - if (endVertexId < localMinId) { + if (endVertexId <= localMinId) { // Write out zero for this position writeSingleValue<<<1, 256>>>(endPositions + j, static_cast(0)); } @@ -263,7 +264,7 @@ gdf_error snmg_coo2csr_impl(size_t* part_offsets, // Write out size for this position writeSingleValue<<<1, 256>>>(endPositions + j, size); } - else if (endVertexId >= localMinId && endVertexId < localMaxId) { + else if (endVertexId > localMinId && endVertexId < localMaxId) { dim3 nthreads, nblocks; nthreads.x = min(size, static_cast(CUDA_MAX_KERNEL_THREADS)); nthreads.y = 1; @@ -300,6 +301,24 @@ gdf_error snmg_coo2csr_impl(size_t* part_offsets, ss << "myRowCount=" << myRowCount << " myEdgeCount=" << myEdgeCount; serializeMessage(env, ss.str()); + ss.str(""); + ss << "positions: ["; + for (int j = 0; j < p + 1; j++) + ss << " " << positions[j]; + ss << "]"; + serializeMessage(env, ss.str()); + + ss.str(""); + ss << "part_offsets: ["; + for (int j=0; j < p + 1; j++) + ss << " " << part_offsets[j]; + ss << "]"; + serializeMessage(env, ss.str()); + + ss.str(""); + ss << "localMinId=" << localMinId << " localMaxId=" << localMaxId; + serializeMessage(env, ss.str()); + // Each thread allocates space to receive their rows from others idx_t *cooRowNew, *cooColNew; val_t *cooValNew; From bd465550868bd5522b81d8de08a93ac71f974fc7 Mon Sep 17 00:00:00 2001 From: James Wyles Date: Tue, 23 Jul 2019 13:12:52 -0600 Subject: [PATCH 5/8] Fix? --- cpp/src/snmg/COO2CSR/COO2CSR.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/snmg/COO2CSR/COO2CSR.cu b/cpp/src/snmg/COO2CSR/COO2CSR.cu index 7e27a761582..4df6c0c5c6e 100644 --- a/cpp/src/snmg/COO2CSR/COO2CSR.cu +++ b/cpp/src/snmg/COO2CSR/COO2CSR.cu @@ -190,7 +190,7 @@ gdf_error snmg_coo2csr_impl(size_t* part_offsets, // Each thread searches the global source node counts prefix sum to find the start of its vertex ID range idx_t myStartVertex = 0; if (i != 0) { - idx_t edgeCount = (globalEdgeCount / p) * i; + unsigned long long int edgeCount = (globalEdgeCount / p) * i; idx_t* vertexRangeStart; ALLOC_TRY(&vertexRangeStart, sizeof(idx_t), nullptr); dim3 nthreads, nblocks; From 8fe39da7e0a660fc47da23d02df09d94db74376b Mon Sep 17 00:00:00 2001 From: James Wyles Date: Tue, 23 Jul 2019 13:22:56 -0600 Subject: [PATCH 6/8] Fix? --- cpp/src/snmg/COO2CSR/COO2CSR.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/snmg/COO2CSR/COO2CSR.cu b/cpp/src/snmg/COO2CSR/COO2CSR.cu index 4df6c0c5c6e..4b840ee5cba 100644 --- a/cpp/src/snmg/COO2CSR/COO2CSR.cu +++ b/cpp/src/snmg/COO2CSR/COO2CSR.cu @@ -68,7 +68,7 @@ void serializeMessage(cugraph::SNMGinfo& env, std::string message){ template __global__ void __launch_bounds__(CUDA_MAX_KERNEL_THREADS) -findStartRange(idx_t n, idx_t* result, idx_t edgeCount, val_t* scanned) { +findStartRange(idx_t n, idx_t* result, val_t edgeCount, val_t* scanned) { for (int i = threadIdx.x + blockIdx.x * blockDim.x; i < n; i += gridDim.x * blockDim.x) if (scanned[i] < edgeCount && scanned[i + 1] >= edgeCount) *result = i + 1; @@ -213,7 +213,7 @@ gdf_error snmg_coo2csr_impl(size_t* part_offsets, cudaCheckError(); #pragma omp barrier - // Each thread determines how many edges it will have in it's partition + // Each thread determines how many edges it will have in its partition idx_t myEndVertex = part_offsets[i + 1]; unsigned long long int startEdge; unsigned long long int endEdge; From 87dcc6f797a696d6fa0d68f0557271f27a10484a Mon Sep 17 00:00:00 2001 From: James Wyles Date: Tue, 23 Jul 2019 14:14:47 -0600 Subject: [PATCH 7/8] Removed debugging output --- cpp/src/snmg/COO2CSR/COO2CSR.cu | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/cpp/src/snmg/COO2CSR/COO2CSR.cu b/cpp/src/snmg/COO2CSR/COO2CSR.cu index 4b840ee5cba..8808b3e1286 100644 --- a/cpp/src/snmg/COO2CSR/COO2CSR.cu +++ b/cpp/src/snmg/COO2CSR/COO2CSR.cu @@ -297,28 +297,6 @@ gdf_error snmg_coo2csr_impl(size_t* part_offsets, myRowCount += otherRowCounts[i]; } - std::stringstream ss; - ss << "myRowCount=" << myRowCount << " myEdgeCount=" << myEdgeCount; - serializeMessage(env, ss.str()); - - ss.str(""); - ss << "positions: ["; - for (int j = 0; j < p + 1; j++) - ss << " " << positions[j]; - ss << "]"; - serializeMessage(env, ss.str()); - - ss.str(""); - ss << "part_offsets: ["; - for (int j=0; j < p + 1; j++) - ss << " " << part_offsets[j]; - ss << "]"; - serializeMessage(env, ss.str()); - - ss.str(""); - ss << "localMinId=" << localMinId << " localMaxId=" << localMaxId; - serializeMessage(env, ss.str()); - // Each thread allocates space to receive their rows from others idx_t *cooRowNew, *cooColNew; val_t *cooValNew; From 05c7b4cae7bc641490841d0c3d2e0b151e8119ad Mon Sep 17 00:00:00 2001 From: James Wyles Date: Tue, 23 Jul 2019 14:19:06 -0600 Subject: [PATCH 8/8] Updated change log --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bda91c14163..fac0b02b841 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ - PR #378 cugraph was unable to import device_of_gpu_pointer - PR #384 Fixed bug in snmg coo2csr causing error in dask-cugraph tests. - PR #382 Disabled vertex id check to allow Azure deployment +- PR #410 Fixed overflow error in SNMG COO2CSR # cuGraph 0.8.0 (27 June 2019)