Skip to content

Commit

Permalink
Add "deferred" argument
Browse files Browse the repository at this point in the history
Signed-off-by: Vadim Markovtsev <[email protected]>
  • Loading branch information
vmarkovtsev committed Nov 24, 2017
1 parent 77e056e commit 825cd31
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 21 deletions.
12 changes: 10 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ Python API
Import "libMHCUDA".

```python
def minhash_cuda_init(dim, samples, seed=time(), devices=0, verbosity=0)
def minhash_cuda_init(dim, samples, seed=time(), deferred=False, devices=0, verbosity=0)
```
Creates the hasher.

Expand All @@ -103,6 +103,10 @@ Creates the hasher.

**seed** integer, the random generator seed for reproducible results.

**deferred** boolean, if True, disables the initialization of WMH parameters with
random numbers. In that case, the user is expected to call
minhash_cuda_assign_random_vars() afterwards.

**devices** integer, bitwise OR-ed CUDA device indices, e.g. 1 means first device, 2 means second device,
3 means using first and second device. Special value 0 enables all available devices.
Default value is 0.
Expand Down Expand Up @@ -143,7 +147,7 @@ Include "minhashcuda.h".

```C
MinhashCudaGenerator* mhcuda_init(
uint32_t dim, uint16_t samples, uint32_t seed,
uint32_t dim, uint16_t samples, uint32_t seed, int deferred,
uint32_t devices, int verbosity, MHCUDAResult *status)
```
Initializes the Weighted MinHash generator.
Expand All @@ -156,6 +160,10 @@ Initializes the Weighted MinHash generator.
**seed** the random generator seed for reproducible results.
**deferred** if set to anything except 0, disables the initialization of WMH parameters with
random numbers. In that case, the user is expected to call
mhcuda_assign_random_vars() afterwards.
**devices** bitwise OR-ed CUDA device indices, e.g. 1 means first device, 2 means second device,
3 means using first and second device. Special value 0 enables all available devices.
Expand Down
28 changes: 16 additions & 12 deletions minhashcuda.cc
Original file line number Diff line number Diff line change
Expand Up @@ -159,12 +159,23 @@ class CurandGenerator : public unique_devptr_parent<curandGenerator_st> {
};

static MHCUDAResult mhcuda_init_internal(
MinhashCudaGenerator *gen, uint32_t seed, const std::vector<int>& devs) {
MinhashCudaGenerator *gen, uint32_t seed, bool deferred,
const std::vector<int>& devs) {
int verbosity = gen->verbosity;
size_t const_size = gen->dim * gen->samples;
CUMALLOC(gen->rs, const_size);
CUMALLOC(gen->ln_cs, const_size);
CUMALLOC(gen->betas, const_size);
FOR_EACH_DEV(
cudaDeviceProp props;
CUCH(cudaGetDeviceProperties(&props, dev), mhcudaRuntimeError);
gen->shmem_sizes.push_back(props.sharedMemPerBlock);
DEBUG("GPU #%" PRIu32 " has %d bytes of shared memory per block\n",
dev, gen->shmem_sizes.back());
);
if (deferred) {
return mhcudaSuccess;
}
CUCH(cudaSetDevice(devs.back()), mhcudaNoSuchDevice);
curandGenerator_t rndgen_;
CURANDCH(curandCreateGenerator(&rndgen_, CURAND_RNG_PSEUDO_DEFAULT),
Expand Down Expand Up @@ -193,23 +204,16 @@ static MHCUDAResult mhcuda_init_internal(
CUP2P(&gen->ln_cs, 0, const_size);
CUP2P(&gen->betas, 0, const_size);
);
FOR_EACH_DEV(
cudaDeviceProp props;
CUCH(cudaGetDeviceProperties(&props, dev), mhcudaRuntimeError);
gen->shmem_sizes.push_back(props.sharedMemPerBlock);
DEBUG("GPU #%" PRIu32 " has %d bytes of shared memory per block\n",
dev, gen->shmem_sizes.back());
);
return mhcudaSuccess;
}

extern "C" {

MinhashCudaGenerator *mhcuda_init(
uint32_t dim, uint16_t samples, uint32_t seed,
uint32_t dim, uint16_t samples, uint32_t seed, int deferred,
uint32_t devices, int verbosity, MHCUDAResult *status) {
DEBUG("mhcuda_init: %" PRIu32 " %" PRIu16 " %" PRIu32 " %" PRIu32
" %d %p\n", dim, samples, seed, devices, verbosity, status);
DEBUG("mhcuda_init: %" PRIu32 " %" PRIu16 " %" PRIu32 " %d %" PRIu32
" %d %p\n", dim, samples, seed, deferred, devices, verbosity, status);
if (dim == 0 || samples == 0) {
if (status) *status = mhcudaInvalidArguments;
return nullptr;
Expand All @@ -228,7 +232,7 @@ MinhashCudaGenerator *mhcuda_init(
return nullptr; \
} \
} while(false)
CHECK_SUCCESS(mhcuda_init_internal(gen.get(), seed, devs));
CHECK_SUCCESS(mhcuda_init_internal(gen.get(), seed, deferred, devs));
if (verbosity > 1) {
CHECK_SUCCESS(print_memory_stats(devs));
}
Expand Down
4 changes: 3 additions & 1 deletion minhashcuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,16 @@ enum MHCUDAResult {
/// but the larger the hash size and the longer to calculate (linear). Must not be prime
/// for performance considerations.
/// @param seed The random generator seed for reproducible results.
/// @param deferred Do not initialize the generator. Instead, expect the user to
/// call mhcuda_assign_random_vars() afterwards.
/// @param devices Bitwise OR-ed CUDA device indices, e.g. 1 means first device, 2 means second device,
/// 3 means using first and second device. Special value 0 enables all available devices.
/// @param verbosity 0 means complete silence, 1 means mere progress logging, 2 means lots of output.
/// @param status The pointer to the reported return code. May be nullptr. In case of any error, the
/// returned result is nullptr and the code is stored into *status (with nullptr check).
/// @return The pointer to the allocated generator opaque struct.
MinhashCudaGenerator* mhcuda_init(
uint32_t dim, uint16_t samples, uint32_t seed,
uint32_t dim, uint16_t samples, uint32_t seed, int deferred,
uint32_t devices, int verbosity, MHCUDAResult *status) MALLOC;

/// @brief Extracts the parameters for the specified Weighted MinHash generator.
Expand Down
9 changes: 5 additions & 4 deletions python.cc
Original file line number Diff line number Diff line change
Expand Up @@ -98,21 +98,22 @@ static PyObject *py_minhash_cuda_init(PyObject *self, PyObject *args,
PyObject *kwargs) {
uint32_t dim, seed = static_cast<uint32_t>(time(NULL)), devices = 0;
uint16_t samples;
int deferred = false;
int verbosity = 0;
static const char *kwlist[] = {
"dim", "samples", "seed", "devices", "verbosity", NULL
"dim", "samples", "seed", "deferred", "devices", "verbosity", NULL
};

/* Parse the input tuple */
if (!PyArg_ParseTupleAndKeywords(
args, kwargs, "IH|IIi", const_cast<char**>(kwlist), &dim, &samples,
&seed, &devices, &verbosity)) {
args, kwargs, "IH|IpIi", const_cast<char**>(kwlist), &dim, &samples,
&seed, &deferred, &devices, &verbosity)) {
return NULL;
}
MHCUDAResult result = mhcudaSuccess;
MinhashCudaGenerator *gen;
Py_BEGIN_ALLOW_THREADS
gen = mhcuda_init(dim, samples, seed, devices, verbosity, &result);
gen = mhcuda_init(dim, samples, seed, deferred, devices, verbosity, &result);
Py_END_ALLOW_THREADS
switch (result) {
case mhcudaInvalidArguments:
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def is_pure(self):
setup(
name="libMHCUDA",
description="Accelerated Weighted MinHash-ing on GPU",
version="1.1.5",
version="2.0.0",
license="MIT",
author="Vadim Markovtsev",
author_email="[email protected]",
Expand All @@ -57,7 +57,7 @@ def is_pure(self):
distclass=BinaryDistribution,
cmdclass={'build_py': CMakeBuild},
classifiers=[
"Development Status :: 4 - Beta",
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Operating System :: POSIX :: Linux",
Expand Down
29 changes: 29 additions & 0 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,35 @@ def test_backwards(self):
print(hashes)
raise e from None

def test_deferred(self):
v1 = [1, 0, 0, 0, 3, 4, 5, 0, 0, 0, 0, 6, 7, 8, 0, 0, 0, 0, 0, 0, 9, 10, 4]
v2 = [2, 0, 0, 0, 4, 3, 8, 0, 0, 0, 0, 4, 7, 10, 0, 0, 0, 0, 0, 0, 9, 0, 0]
gen = libMHCUDA.minhash_cuda_init(len(v1), 128, devices=1, verbosity=2)
vars = libMHCUDA.minhash_cuda_retrieve_vars(gen)
libMHCUDA.minhash_cuda_fini(gen)
gen = libMHCUDA.minhash_cuda_init(
len(v1), 128, devices=1, deferred=True, verbosity=2)
libMHCUDA.minhash_cuda_assign_vars(gen, *vars)
bgen = WeightedMinHashGenerator.__new__(WeightedMinHashGenerator)
bgen.dim = len(v1)
bgen.rs, bgen.ln_cs, bgen.betas = vars
bgen.sample_size = 128
bgen.seed = None
m = csr_matrix(numpy.array([v1, v2], dtype=numpy.float32))
hashes = libMHCUDA.minhash_cuda_calc(gen, m)
libMHCUDA.minhash_cuda_fini(gen)
self.assertEqual(hashes.shape, (2, 128, 2))
true_hashes = numpy.array([bgen.minhash(v1).hashvalues,
bgen.minhash(v2).hashvalues], dtype=numpy.uint32)
self.assertEqual(true_hashes.shape, (2, 128, 2))
try:
self.assertTrue((hashes == true_hashes).all())
except AssertionError as e:
print("---- TRUE ----")
print(true_hashes)
print("---- FALSE ----")
print(hashes)
raise e from None

if __name__ == "__main__":
unittest.main()

0 comments on commit 825cd31

Please sign in to comment.