From a38ce5d5970cb3a6960f67f24c386a117bc96f49 Mon Sep 17 00:00:00 2001 From: Markus Goetz Date: Thu, 6 Jun 2019 18:18:37 +0200 Subject: [PATCH 01/24] Provided get and set state functions, reworked seed setting, first threefry function (32bit) --- heat/core/random.py | 152 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 133 insertions(+), 19 deletions(-) diff --git a/heat/core/random.py b/heat/core/random.py index 284f806943..87d47eff5b 100644 --- a/heat/core/random.py +++ b/heat/core/random.py @@ -1,32 +1,32 @@ +import time import torch from . import communication from . import devices from . import dndarray -from . import types from . import stride_tricks +from . import types +# introduce the variables, will be correctly initialized at the end of file +__seed = None +__counter = None -def set_gseed(seed): - # TODO: think about proper random number generation - # TODO: comment me - # TODO: test me - torch.manual_seed(seed) - - -def uniform(low=0.0, high=1.0, size=None, device=None, comm=None): - # TODO: comment me - # TODO: test me - # TODO: make me splitable - # TODO: add device capabilities - if size is None: - size = (1,) - device = devices.sanitize_device(device) - comm = communication.sanitize_comm(comm) - data = torch.rand(*size, device=device.torch_device) * (high - low) + low +def get_state(): + """ + Return a tuple representing the internal state of the generator. - return dndarray.DNDarray(data, size, types.float32, None, device, comm) + Returns + ------- + out : tuple(str, int, int, int, float) + The returned tuple has the following items: + 1. the string ‘Threefry’, + 2. the Threefry key value, aka seed, + 3. the internal counter value, + 4. an integer has_gauss, always set to 0 (present for compatibility with numpy) and + 5. a float cached_gaussian, always set to 0.0 (present for compatibility with numpy). + """ + return 'Threefry', __seed, __counter, 0, 0.0 def randn(*args, split=None, device=None, comm=None): @@ -86,3 +86,117 @@ def randn(*args, split=None, device=None, comm=None): data = torch.randn(args, device=device.torch_device) return dndarray.DNDarray(data, gshape, types.canonical_heat_type(data.dtype), split, device, comm) + + +def seed(seed=None): + """ + Seed the generator. + + Parameters + ---------- + seed : int, optional + Value to seed the algorithm with, if not set a time-based seed is generated. + """ + if seed is None: + seed = communication.MPI_WORLD.bcast(int(time.time() * 256)) + + global __seed, __counter + __seed = seed + __counter = 0 + torch.manual_seed(seed) + + +def set_state(state): + """ + Set the internal state of the generator from a tuple. + + Parameters + ---------- + state : tuple(str, int, int, int, float) + The returned tuple has the following items: + 1. the string ‘Threefry’, + 2. the Threefry key value, aka seed, + 3. the internal counter value, + 4. an integer has_gauss, ignored (present for compatibility with numpy), optional and + 5. a float cached_gaussian, ignored (present for compatibility with numpy), optional. + + Raises + ------ + TypeError + If and improper state is passed. + ValueError + If one of the items in the state tuple is of wrong type or value. + """ + if not isinstance(state, tuple) or (len(state) != 3 and len(state) != 5): + raise TypeError('state needs to be a three- or five-tuple') + + if state[0] != 'Threefry': + raise ValueError('algorithm must be "Threefry"') + + global __seed, __counter + __seed = int(state[1]) + __counter = int(state[2]) + + +def __threefry_32(num_samples): + samples = (num_samples + 1) // 2 + + # set up X, i.e. output buffer + X_0 = t.arange(samples, dtype=t.int32) + X_1 = t.arange(samples, dtype=t.int32) + X_0 //= t.iinfo(t.int32).max + + # set up key buffer + ks_0 = t.full((samples,), 0, dtype=t.int32) # seed instead of 0 + ks_1 = t.full((samples,), 0, dtype=t.int32) # seed instead of 0 + ks_2 = t.full((samples,), 466688986, dtype=t.int32) + ks_2 ^= ks_0 + ks_2 ^= ks_0 + + # initialize output using the key + X_0 += ks_0 + X_1 += ks_1 + + # perform rounds + X_0 += X_1; X_1 = (X_1 << 13) | (X_1 >> 19); X_1 ^= X_0 # round 1 + X_0 += X_1; X_1 = (X_1 << 15) | (X_1 >> 17); X_1 ^= X_0 # round 2 + X_0 += X_1; X_1 = (X_1 << 26) | (X_1 >> 6); X_1 ^= X_0 # round 3 + X_0 += X_1; X_1 = (X_1 << 6) | (X_1 >> 26); X_1 ^= X_0 # round 4 + + # inject key + X_0 += ks_1; X_1 += (ks_2 + 1) + + X_0 += X_1; X_1 = (X_1 << 17) | (X_1 >> 15); X_1 ^= X_0 # round 5 + X_0 += X_1; X_1 = (X_1 << 29) | (X_1 >> 3); X_1 ^= X_0 # round 6 + X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 16); X_1 ^= X_0 # round 7 + X_0 += X_1; X_1 = (X_1 << 24) | (X_1 >> 8); X_1 ^= X_0 # round 8 + + # inject key + X_0 += ks_2; X_1 += (ks_0 + 2) + + X_0 += X_1; X_1 = (X_1 << 13) | (X_1 >> 19); X_1 ^= X_0 # round 9 + X_0 += X_1; X_1 = (X_1 << 15) | (X_1 >> 17); X_1 ^= X_0 # round 10 + X_0 += X_1; X_1 = (X_1 << 26) | (X_1 >> 6); X_1 ^= X_0 # round 11 + X_0 += X_1; X_1 = (X_1 << 6) | (X_1 >> 26); X_1 ^= X_0 # round 12 + + # inject key + X_0 += ks_0; X_1 += (ks_1 + 3) + + +def uniform(low=0.0, high=1.0, size=None, device=None, comm=None): + # TODO: comment me + # TODO: test me + # TODO: make me splitable + # TODO: add device capabilities + if size is None: + size = (1,) + + device = devices.sanitize_device(device) + comm = communication.sanitize_comm(comm) + data = torch.rand(*size, device=device.torch_device) * (high - low) + low + + return dndarray.DNDarray(data, size, types.float32, None, device, comm) + + +# roll a global time-based seed +seed() From 08e583530b84e52f99bd8ba3531df0bd553087ea Mon Sep 17 00:00:00 2001 From: Markus Goetz Date: Thu, 13 Jun 2019 11:12:59 +0200 Subject: [PATCH 02/24] Added threefry64, added intxx to floatxx conversion functions --- heat/core/random.py | 140 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 133 insertions(+), 7 deletions(-) diff --git a/heat/core/random.py b/heat/core/random.py index 87d47eff5b..6721688a0e 100644 --- a/heat/core/random.py +++ b/heat/core/random.py @@ -7,7 +7,7 @@ from . import stride_tricks from . import types -# introduce the variables, will be correctly initialized at the end of file +# introduce the global random state variables, will be correctly initialized at the end of file __seed = None __counter = None @@ -29,6 +29,44 @@ def get_state(): return 'Threefry', __seed, __counter, 0, 0.0 +def __int32_to_float32(values): + """ + Converts a tensor of 32-bit (random) numbers to matching single-precision floating point numbers (equally 32-bit) in + the bounded interval [0.0, 1.0). Extracts the 23 least-significant bits of the integers (0x7fffff) and sets them to + be the mantissa of the floating point number. Interval is bound by dividing by 2^23 = 8388608.0. + + Parameters + ---------- + values : torch.Tensor (int32) + Values to be converted to floating points numbers in interval [0.0, 1.0). + + Returns + ------- + floats : torch.Tensor (float32) + Corresponding single-precision floating point numbers. + """ + return (values & 0x7fffff).type(torch.float32) / 8388608.0 + + +def __int64_to_float64(values): + """ + Converts a tensor of 64-bit (random) numbers to matching double-precision floating point numbers (equally 64-bit) in + the bounded interval [0.0, 1.0). Extracts the 53 least-significant bits of the integers (0x1fffffffffffff) and sets + them to be the mantissa of the floating point number. Interval is bound by dividing by 2^53 = 9007199254740992.0. + + Parameters + ---------- + values : torch.Tensor (int64) + Values to be converted to floating points numbers in interval [0.0, 1.0). + + Returns + ------- + floats : torch.Tensor (float64) + Corresponding single-precision floating point numbers. + """ + return (values & 0x1fffffffffffff).type(torch.float64) / 9007199254740992.0 + + def randn(*args, split=None, device=None, comm=None): """ Returns a tensor filled with random numbers from a standard normal distribution with zero mean and variance of one. @@ -139,17 +177,37 @@ def set_state(state): def __threefry_32(num_samples): + """ + Counter-based pseudo random number generator. Based on a 12-round Threefry "encryption" algorithm [1]. This is the + 32-bit version. + + Parameters + ---------- + num_samples : int + Number of 32-bit pseudo random numbers to be generated. + + Returns + ------- + random_numbers : torch.Tensor (int32) + Vector with num_samples pseudo random numbers. + + References + ---------- + [1] Salmon, John K., Moraes, Mark A., Dror, Ron O. and Shaw, David E., "Parallel random numbers: as easy as 1, 2, 3" + Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis, + p. 16, 2011 + """ samples = (num_samples + 1) // 2 # set up X, i.e. output buffer - X_0 = t.arange(samples, dtype=t.int32) - X_1 = t.arange(samples, dtype=t.int32) - X_0 //= t.iinfo(t.int32).max + X_0 = torch.arange(samples, dtype=torch.int32) + X_1 = torch.arange(samples, dtype=torch.int32) + X_0 //= torch.iinfo(torch.int32).max # set up key buffer - ks_0 = t.full((samples,), 0, dtype=t.int32) # seed instead of 0 - ks_1 = t.full((samples,), 0, dtype=t.int32) # seed instead of 0 - ks_2 = t.full((samples,), 466688986, dtype=t.int32) + ks_0 = torch.full((samples,), __seed, dtype=torch.int32) + ks_1 = torch.full((samples,), __seed, dtype=torch.int32) + ks_2 = torch.full((samples,), 466688986, dtype=torch.int32) ks_2 ^= ks_0 ks_2 ^= ks_0 @@ -182,6 +240,74 @@ def __threefry_32(num_samples): # inject key X_0 += ks_0; X_1 += (ks_1 + 3) + return X_0, X_1 + + +def __threefry64(num_samples): + """ + Counter-based pseudo random number generator. Based on a 12-round Threefry "encryption" algorithm [1]. This is the + 64-bit version. + + Parameters + ---------- + num_samples : int + Number of 64-bit pseudo random numbers to be generated. + + Returns + ------- + random_numbers : torch.Tensor (int64) + Vector with num_samples pseudo random numbers. + + References + ---------- + [1] Salmon, John K., Moraes, Mark A., Dror, Ron O. and Shaw, David E., "Parallel random numbers: as easy as 1, 2, 3" + Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis, + p. 16, 2011 + """ + samples = (num_samples + 1) // 2 + + # set up X, i.e. output buffer + X_0 = torch.arange(samples, dtype=torch.int64) + X_1 = torch.arange(samples, dtype=torch.int64) + X_0 //= torch.iinfo(torch.int64).max + + # set up key buffer + ks_0 = torch.full((samples,), __seed, dtype=torch.int64) + ks_1 = torch.full((samples,), __seed, dtype=torch.int64) + ks_2 = torch.full((samples,), 2004413935125273122, dtype=torch.int64) + ks_2 ^= ks_0 + ks_2 ^= ks_0 + + # initialize output using the key + X_0 += ks_0 + X_1 += ks_1 + + # perform rounds + X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 48); X_1 ^= X_0 # round 1 + X_0 += X_1; X_1 = (X_1 << 42) | (X_1 >> 22); X_1 ^= X_0 # round 2 + X_0 += X_1; X_1 = (X_1 << 12) | (X_1 >> 52); X_1 ^= X_0 # round 3 + X_0 += X_1; X_1 = (X_1 << 31) | (X_1 >> 33); X_1 ^= X_0 # round 4 + # inject key + X_0 += ks_1; X_1 += (ks_2 + 1) + + X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 48); X_1 ^= X_0 # round 5 + X_0 += X_1; X_1 = (X_1 << 32) | (X_1 >> 32); X_1 ^= X_0 # round 6 + X_0 += X_1; X_1 = (X_1 << 24) | (X_1 >> 40); X_1 ^= X_0 # round 7 + X_0 += X_1; X_1 = (X_1 << 21) | (X_1 >> 43); X_1 ^= X_0 # round 8 + + # inject key + X_0 += ks_2; X_1 += (ks_0 + 2) + + X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 48); X_1 ^= X_0 # round 9 + X_0 += X_1; X_1 = (X_1 << 42) | (X_1 >> 22); X_1 ^= X_0 # round 10 + X_0 += X_1; X_1 = (X_1 << 12) | (X_1 >> 52); X_1 ^= X_0 # round 11 + X_0 += X_1; X_1 = (X_1 << 31) | (X_1 >> 33); X_1 ^= X_0 # round 12 + + # inject key + X_0 += ks_0; X_1 += (ks_1 + 3) + + return X_0, X_1 + def uniform(low=0.0, high=1.0, size=None, device=None, comm=None): # TODO: comment me From 52837a5bf4beb18e6e16620b16badc7ef9262852 Mon Sep 17 00:00:00 2001 From: Markus Goetz Date: Mon, 17 Jun 2019 13:46:02 +0200 Subject: [PATCH 03/24] Added float conversion sugar, added Kundu random normal transformation --- heat/core/random.py | 54 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 45 insertions(+), 9 deletions(-) diff --git a/heat/core/random.py b/heat/core/random.py index 6721688a0e..9a737cd77f 100644 --- a/heat/core/random.py +++ b/heat/core/random.py @@ -7,11 +7,42 @@ from . import stride_tricks from . import types + # introduce the global random state variables, will be correctly initialized at the end of file __seed = None __counter = None +# float conversion constants +__INT32_TO_FLOAT32 = 1.0 / 8388608.0 +__INT64_TO_FLOAT64 = 1.0 / 9007199254740992.0 +__KUNDU_INVERSE = 1.0 / 0.3807 + + +def __kundu_transform(values): + """ + Transforms uniformly distributed floating point random values in the interval [0.0, 1.0) into normal distributed + floating point random values with mean 0.0 and standard deviation 1.0. The algorithm makes use of the generalized + exponential distribution transformation [1]. + + Parameters + ---------- + values : torch.Tensor + A tensor containing uniformly distributed floating point values in the interval [0.0, 1.0). + + Returns + ------- + normal_values : torch.Tensor + A tensor containing the equivalent normally distributed floating point values with mean of 0.0 and standard + deviation of 1.0. + + References + ---------- + [1] Boiroju, N. K. and Reddy, K. M., "Generation of Standard Normal Random Numbers", Interstat, vol 5., 2012. + """ + return (torch.log(-torch.log(1 - values ** 0.0775)) - 1.0821) * __KUNDU_INVERSE + + def get_state(): """ Return a tuple representing the internal state of the generator. @@ -45,7 +76,7 @@ def __int32_to_float32(values): floats : torch.Tensor (float32) Corresponding single-precision floating point numbers. """ - return (values & 0x7fffff).type(torch.float32) / 8388608.0 + return (values & 0x7fffff).type(torch.float32) * __INT32_TO_FLOAT32 def __int64_to_float64(values): @@ -64,7 +95,7 @@ def __int64_to_float64(values): floats : torch.Tensor (float64) Corresponding single-precision floating point numbers. """ - return (values & 0x1fffffffffffff).type(torch.float64) / 9007199254740992.0 + return (values & 0x1fffffffffffff).type(torch.float64) * __INT64_TO_FLOAT64 def randn(*args, split=None, device=None, comm=None): @@ -101,6 +132,9 @@ def randn(*args, split=None, device=None, comm=None): [ 1.3365, -1.5212, 1.4159, -0.1671], [ 0.1260, 1.2126, -0.0804, 0.0907]]) """ + # TODO: FIX ME! + return + # TODO: make me splitable # TODO: add device capabilities # check if all positional arguments are integers @@ -169,7 +203,7 @@ def set_state(state): raise TypeError('state needs to be a three- or five-tuple') if state[0] != 'Threefry': - raise ValueError('algorithm must be "Threefry"') + raise ValueError('algorithm must be \'Threefry\'') global __seed, __counter __seed = int(state[1]) @@ -188,8 +222,8 @@ def __threefry_32(num_samples): Returns ------- - random_numbers : torch.Tensor (int32) - Vector with num_samples pseudo random numbers. + random_numbers : tuple(torch.Tensor (int32)) + Two vectors with num_samples / 2 (rounded-up) pseudo random numbers. References ---------- @@ -200,8 +234,8 @@ def __threefry_32(num_samples): samples = (num_samples + 1) // 2 # set up X, i.e. output buffer - X_0 = torch.arange(samples, dtype=torch.int32) - X_1 = torch.arange(samples, dtype=torch.int32) + X_0 = torch.arange(samples, dtype=torch.int32) + (__counter | 0xffffffff) + X_1 = torch.arange(samples, dtype=torch.int32) + (__counter >> 32) X_0 //= torch.iinfo(torch.int32).max # set up key buffer @@ -255,8 +289,8 @@ def __threefry64(num_samples): Returns ------- - random_numbers : torch.Tensor (int64) - Vector with num_samples pseudo random numbers. + random_numbers : tuple(torch.Tensor (int64)) + Two vectors with num_samples / 2 (rounded-up) pseudo random numbers. References ---------- @@ -310,6 +344,8 @@ def __threefry64(num_samples): def uniform(low=0.0, high=1.0, size=None, device=None, comm=None): + # TODO: FIX ME! + # TODO: comment me # TODO: test me # TODO: make me splitable From b0a2a90ee59afa6d14e797ab8d86cf704470d947 Mon Sep 17 00:00:00 2001 From: Markus Goetz Date: Thu, 4 Jul 2019 17:33:05 +0200 Subject: [PATCH 04/24] Broken inbetween state, nothing working yet, but would like to backup changes made --- heat/core/random.py | 268 +++++++++++++++++++++++++++++++------------- 1 file changed, 193 insertions(+), 75 deletions(-) diff --git a/heat/core/random.py b/heat/core/random.py index 9a737cd77f..b23b8a1834 100644 --- a/heat/core/random.py +++ b/heat/core/random.py @@ -1,3 +1,4 @@ +import numpy as np import time import torch @@ -19,28 +20,43 @@ __KUNDU_INVERSE = 1.0 / 0.3807 -def __kundu_transform(values): +def __counter_sequence(shape, dtype, split, device, comm): """ - Transforms uniformly distributed floating point random values in the interval [0.0, 1.0) into normal distributed - floating point random values with mean 0.0 and standard deviation 1.0. The algorithm makes use of the generalized - exponential distribution transformation [1]. Parameters ---------- - values : torch.Tensor - A tensor containing uniformly distributed floating point values in the interval [0.0, 1.0). + shape + dtype + split + device + comm Returns ------- - normal_values : torch.Tensor - A tensor containing the equivalent normally distributed floating point values with mean of 0.0 and standard - deviation of 1.0. - References - ---------- - [1] Boiroju, N. K. and Reddy, K. M., "Generation of Standard Normal Random Numbers", Interstat, vol 5., 2012. """ - return (torch.log(-torch.log(1 - values ** 0.0775)) - 1.0821) * __KUNDU_INVERSE + global __counter + + total_elements = np.prod(shape) + + + dimensions = len(shape) + elements_in_higher_dims = 1 + offset, lshape, _ = comm.chunk(shape, split) + ranges = dimensions * [None] + + for i in range(dimensions - 2, -1, -1): + elements_in_dim = lshape[i] + if i != split: + values = torch.arange(elements_in_dim, dtype=dtype, device=device) * elements_in_higher_dims + else: + values = (torch.arange(elements_in_dim, dtype=dtype, device=device) + offset) * elements_in_higher_dims + + values = values.reshape(*[1 if j != i else -1 for j in range(dimensions)]) + ranges[i] = values + elements_in_higher_dims *= elements_in_dim + + return torch.sum(ranges) def get_state(): @@ -98,16 +114,161 @@ def __int64_to_float64(values): return (values & 0x1fffffffffffff).type(torch.float64) * __INT64_TO_FLOAT64 +def __kundu_transform(values): + """ + Transforms uniformly distributed floating point random values in the interval [0.0, 1.0) into normal distributed + floating point random values with mean 0.0 and standard deviation 1.0. The algorithm makes use of the generalized + exponential distribution transformation [1]. + + Parameters + ---------- + values : torch.Tensor + A tensor containing uniformly distributed floating point values in the interval [0.0, 1.0). + + Returns + ------- + normal_values : torch.Tensor + A tensor containing the equivalent normally distributed floating point values with mean of 0.0 and standard + deviation of 1.0. + + References + ---------- + [1] Boiroju, N. K. and Reddy, K. M., "Generation of Standard Normal Random Numbers", Interstat, vol 5., 2012. + """ + return (torch.log(-torch.log(1 - values ** 0.0775)) - 1.0821) * __KUNDU_INVERSE + + +def rand(*args, split=None, device=None, comm=None): + """ + Random values in a given shape. + + Create a tensor of the given shape and populate it with random samples from a uniform distribution over [0, 1). + + Parameters + ---------- + d0, d1, …, dn : int, optional + The dimensions of the returned array, should all be positive. If no argument is given a single random samples is + generated. + split: int, optional + The axis along which the array is split and distributed, defaults to None (no distribution). + device : str or None, optional + Specifies the device the tensor shall be allocated on, defaults to None (i.e. globally set default device). + comm: Communication, optional + Handle to the nodes holding distributed parts or copies of this tensor. + + Returns + ------- + out : ndarray, shape (d0, d1, ..., dn) + The uniformly distributed [0.0, 1.0)-bound random values. + """ + # if args are not set, generate a single sample + if not args: + args = (1,) + + # ensure that the passed dimensions are positive integer-likes + shape = tuple(int(ele) for ele in args) + if not all(ele > 0 for ele in shape): + raise ValueError('negative dimensions are not allowed') + + # make sure the remaining parameters are of proper type + split = stride_tricks.sanitize_axis(shape, split) + device = devices.sanitize_device(device) + comm = communication.sanitize_comm(comm) + + # generate the random sequence + x_0, x_1, counter_shape, slices = __counter_sequence(shape, torch.int64, split, device, comm) + x_0, x_1 = __threefry64(x_0, x_1) + + # combine the values into one tensor and convert them to floats + values = torch.stack([x_0, x_1], dim=1).reshape(counter_shape)[slices] + values = __int64_to_float64(values) + + return dndarray.DNDarray(values, shape, types.float64, split, device, comm) + + +def randint(low, high=None, size=None, dtype=None, split=None, device=None, comm=None): + """ + Random values in a given shape. + + Create a tensor of the given shape and populate it with random samples from a uniform distribution over [0, 1). + + Parameters + ---------- + low : int + Lowest (signed) integer to be drawn from the distribution (unless high=None, in which case this parameter is one + above the highest such integer). + high : int, optional + If provided, one above the largest (signed) integer to be drawn from the distribution (see above for behavior if high=None). + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., (m, n, k), then m * n * k samples are drawn. Default is None, in + which case a single value is returned. + dtype : dtype, optional + Desired dtype of the result. Must be an integer type. Defaults to ht.int64. + split: int, optional + The axis along which the array is split and distributed, defaults to None (no distribution). + device : str or None, optional + Specifies the device the tensor shall be allocated on, defaults to None (i.e. globally set default device). + comm: Communication, optional + Handle to the nodes holding distributed parts or copies of this tensor. + + Returns + ------- + out : ndarray, shape (d0, d1, ..., dn) + The uniformly distributed [0.0, 1.0)-bound random values. + """ + # determine range bounds + if high is None: + low, high = 0, int(low) + else: + low, high = int(low), int(high) + if low >= high: + raise ValueError('low >= high') + span = high - low + 1 + + # sanitize shape + if size is None: + size = (1,) + shape = tuple(int(ele) for ele in size) + if not all(ele > 0 for ele in shape): + raise ValueError('negative dimensions are not allowed') + + # sanitize the data type + if dtype is None: + dtype = types.int64 + dtype = types.canonical_heat_type(dtype) + if dtype is not types.int64 and dtype is not types.int32: + raise ValueError('Unsupported dtype for randint') + + # make sure the remaining parameters are of proper type + split = stride_tricks.sanitize_axis(shape, split) + device = devices.sanitize_device(device) + comm = communication.sanitize_comm(comm) + + # generate the random sequence + x_0, x_1, counter_shape, slices = __counter_sequence(shape, torch.int64, split, device, comm) + x_0, x_1 = __threefry64(x_0, x_1) + + # combine the values into one tensor and convert them to floats + values = torch.stack([x_0, x_1], dim=1).reshape(counter_shape)[slices] + values = __int64_to_float64(values) + + return dndarray.DNDarray(values, shape, types.float64, split, device, comm) + + def randn(*args, split=None, device=None, comm=None): """ Returns a tensor filled with random numbers from a standard normal distribution with zero mean and variance of one. - The shape of the tensor is defined by the varargs args. - Parameters ---------- d0, d1, …, dn : int, optional The dimensions of the returned array, should be all positive. + split: int, optional + The axis along which the array is split and distributed, defaults to None (no distribution). + device : str or None, optional + Specifies the device the tensor shall be allocated on, defaults to None (i.e. globally set default device). + comm: Communication, optional + Handle to the nodes holding distributed parts or copies of this tensor. Returns ------- @@ -132,32 +293,12 @@ def randn(*args, split=None, device=None, comm=None): [ 1.3365, -1.5212, 1.4159, -0.1671], [ 0.1260, 1.2126, -0.0804, 0.0907]]) """ - # TODO: FIX ME! - return - - # TODO: make me splitable - # TODO: add device capabilities - # check if all positional arguments are integers - if not all(isinstance(_, int) for _ in args): - raise TypeError('dimensions have to be integers') - if not all(_ > 0 for _ in args): - raise ValueError('negative dimension are not allowed') - - gshape = tuple(args) if args else(1,) - split = stride_tricks.sanitize_axis(gshape, split) - - try: - torch.randn(gshape) - except RuntimeError as exception: - # re-raise the exception to be consistent with numpy's exception interface - raise ValueError(str(exception)) - - # compose the local tensor - device = devices.sanitize_device(device) - comm = communication.sanitize_comm(comm) - data = torch.randn(args, device=device.torch_device) + # generate uniformly distributed random numbers first + normal_tensor = rand(*args, split, device, comm) + # convert the the values to a normal distribution using the kundu transform + normal_tensor._DNDarray__array = __kundu_transform(normal_tensor._DNDarray__array) - return dndarray.DNDarray(data, gshape, types.canonical_heat_type(data.dtype), split, device, comm) + return normal_tensor def seed(seed=None): @@ -210,15 +351,17 @@ def set_state(state): __counter = int(state[2]) -def __threefry_32(num_samples): +def __threefry_32(X_0, X_1): """ Counter-based pseudo random number generator. Based on a 12-round Threefry "encryption" algorithm [1]. This is the 32-bit version. Parameters ---------- - num_samples : int - Number of 32-bit pseudo random numbers to be generated. + X_0 : torch.Tensor + Upper bits of the to be encoded random sequence + X_1 : torch.Tensor + Lower bits of the to be encoded random sequence Returns ------- @@ -231,12 +374,7 @@ def __threefry_32(num_samples): Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis, p. 16, 2011 """ - samples = (num_samples + 1) // 2 - - # set up X, i.e. output buffer - X_0 = torch.arange(samples, dtype=torch.int32) + (__counter | 0xffffffff) - X_1 = torch.arange(samples, dtype=torch.int32) + (__counter >> 32) - X_0 //= torch.iinfo(torch.int32).max + samples = len(X_0) # set up key buffer ks_0 = torch.full((samples,), __seed, dtype=torch.int32) @@ -277,15 +415,17 @@ def __threefry_32(num_samples): return X_0, X_1 -def __threefry64(num_samples): +def __threefry64(X_0, X_1): """ Counter-based pseudo random number generator. Based on a 12-round Threefry "encryption" algorithm [1]. This is the 64-bit version. Parameters ---------- - num_samples : int - Number of 64-bit pseudo random numbers to be generated. + X_0 : torch.Tensor + Upper bits of the to be encoded random sequence + X_1 : torch.Tensor + Lower bits of the to be encoded random sequence Returns ------- @@ -298,12 +438,7 @@ def __threefry64(num_samples): Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis, p. 16, 2011 """ - samples = (num_samples + 1) // 2 - - # set up X, i.e. output buffer - X_0 = torch.arange(samples, dtype=torch.int64) - X_1 = torch.arange(samples, dtype=torch.int64) - X_0 //= torch.iinfo(torch.int64).max + samples = len(X_0) # set up key buffer ks_0 = torch.full((samples,), __seed, dtype=torch.int64) @@ -343,22 +478,5 @@ def __threefry64(num_samples): return X_0, X_1 -def uniform(low=0.0, high=1.0, size=None, device=None, comm=None): - # TODO: FIX ME! - - # TODO: comment me - # TODO: test me - # TODO: make me splitable - # TODO: add device capabilities - if size is None: - size = (1,) - - device = devices.sanitize_device(device) - comm = communication.sanitize_comm(comm) - data = torch.rand(*size, device=device.torch_device) * (high - low) + low - - return dndarray.DNDarray(data, size, types.float32, None, device, comm) - - # roll a global time-based seed seed() From 4e593f838455e237480f7bba07bd2c16fa257dca Mon Sep 17 00:00:00 2001 From: Markus Goetz Date: Wed, 14 Aug 2019 10:29:47 +0200 Subject: [PATCH 05/24] Simon taking over --- heat/core/random.py | 77 +++++++++++++++++++++++++--------- heat/core/tests/test_random.py | 26 +++--------- 2 files changed, 64 insertions(+), 39 deletions(-) diff --git a/heat/core/random.py b/heat/core/random.py index b23b8a1834..fbfbd37ef5 100644 --- a/heat/core/random.py +++ b/heat/core/random.py @@ -22,27 +22,59 @@ def __counter_sequence(shape, dtype, split, device, comm): """ + Generates a sequence of numbers to be used as the "clear text" for the threefry encryption, i.e. the pseudo random + number generator. Due to the fact that threefry always requires pairs of inputs, the input sequence may not just be + a simple range including the global offset, but rather needs to be to independent vectors, one containing the range + and the other having the interleaved high-bits counter in it. Parameters ---------- - shape - dtype - split - device - comm + shape : tuple of ints + The global shape of the random tensor to be generated. + dtype : torch.dtype + The data type of the elements to be generated. Needs to be either torch.int32 or torch.int64. + split : int or None + The split axis along which the random number tensor is split + device : 'str' + Specifies the device the tensor shall be allocated on. + comm: ht.Communication + Handle to the nodes holding distributed parts or copies of this tensor. Returns ------- - + x_0 : torch.Tensor + The high-bits vector for the threefry encryption. + x_1 : torch.Tensor + The low-bits vector for the threefry encryption. + lshape : tuple of ints + The shape x_0 and x_1 need to be reshaped to after encryption. May be slightly larger than the actual local + portion of the random number tensor due to sequence overlaps of the counter sequence. + slices : list of slices + The indices into the reshaped tensor to obtain the actual local portion. """ + # get the global random state into the function, might want to factor this out into a class later global __counter + # extract the counter state of the random number generator + if dtype is torch.int32: + c_0 = __counter & (0xffffffff << 32) + c_1 = __counter & 0xffffffff + else: # torch.int64 + c_0 = __counter & (0xffffffffffffffff << 64) + c_1 = __counter & 0xffffffffffffffff + + # prepare some reusable values + dimensions = len(shape) total_elements = np.prod(shape) + offset, lshape, _ = comm.chunk(shape, split) + + # generate the x_0 counter sequence + x_0 = torch.full + + # generate the x_1 counter sequence - dimensions = len(shape) elements_in_higher_dims = 1 - offset, lshape, _ = comm.chunk(shape, split) ranges = dimensions * [None] for i in range(dimensions - 2, -1, -1): @@ -56,7 +88,10 @@ def __counter_sequence(shape, dtype, split, device, comm): ranges[i] = values elements_in_higher_dims *= elements_in_dim - return torch.sum(ranges) + # advance the global counter + __counter += total_elements + + return x_0, x_1, lshape, slices def get_state(): @@ -176,12 +211,11 @@ def rand(*args, split=None, device=None, comm=None): comm = communication.sanitize_comm(comm) # generate the random sequence - x_0, x_1, counter_shape, slices = __counter_sequence(shape, torch.int64, split, device, comm) + x_0, x_1, lshape = __counter_sequence(shape, torch.int64, split, device, comm) x_0, x_1 = __threefry64(x_0, x_1) # combine the values into one tensor and convert them to floats - values = torch.stack([x_0, x_1], dim=1).reshape(counter_shape)[slices] - values = __int64_to_float64(values) + values = __int64_to_float64(torch.stack([x_0, x_1], dim=1)).reshape(lshape) return dndarray.DNDarray(values, shape, types.float64, split, device, comm) @@ -238,6 +272,7 @@ def randint(low, high=None, size=None, dtype=None, split=None, device=None, comm dtype = types.canonical_heat_type(dtype) if dtype is not types.int64 and dtype is not types.int32: raise ValueError('Unsupported dtype for randint') + torch_dtype = dtype.torch_type() # make sure the remaining parameters are of proper type split = stride_tricks.sanitize_axis(shape, split) @@ -245,14 +280,18 @@ def randint(low, high=None, size=None, dtype=None, split=None, device=None, comm comm = communication.sanitize_comm(comm) # generate the random sequence - x_0, x_1, counter_shape, slices = __counter_sequence(shape, torch.int64, split, device, comm) - x_0, x_1 = __threefry64(x_0, x_1) + x_0, x_1, lshape = __counter_sequence(shape, torch_dtype, split, device, comm) + if torch_dtype is torch.int32: + x_0, x_1 = __threefry32(x_0, x_1) + else: + x_0, x_1 = __threefry64(x_0, x_1) - # combine the values into one tensor and convert them to floats - values = torch.stack([x_0, x_1], dim=1).reshape(counter_shape)[slices] - values = __int64_to_float64(values) + # stack the resulting sequence and normalize to given range + values = torch.stack([x_0, x_1], dim=1).reshape(lshape) + # ATTENTION: this is biased and known, bias-free rejection sampling is difficult to do in parallel + values = (values.abs_() % span) + low - return dndarray.DNDarray(values, shape, types.float64, split, device, comm) + return dndarray.DNDarray(values, shape, dtype, split, device, comm) def randn(*args, split=None, device=None, comm=None): @@ -351,7 +390,7 @@ def set_state(state): __counter = int(state[2]) -def __threefry_32(X_0, X_1): +def __threefry32(X_0, X_1): """ Counter-based pseudo random number generator. Based on a 12-round Threefry "encryption" algorithm [1]. This is the 32-bit version. diff --git a/heat/core/tests/test_random.py b/heat/core/tests/test_random.py index ce93daa8ea..bb03418e4a 100644 --- a/heat/core/tests/test_random.py +++ b/heat/core/tests/test_random.py @@ -4,25 +4,11 @@ class TestTensor(unittest.TestCase): - def test_randn(self): - # scalar input - simple_randn_float = ht.random.randn(3) - self.assertIsInstance(simple_randn_float, ht.DNDarray) - self.assertEqual(simple_randn_float.shape, (3,)) - self.assertEqual(simple_randn_float.lshape, (3,)) - self.assertEqual(simple_randn_float.split, None) - self.assertEqual(simple_randn_float.dtype, ht.float32) + def test_rand(self): + pass - # multi-dimensional - elaborate_randn_float = ht.random.randn(2, 3) - self.assertIsInstance(elaborate_randn_float, ht.DNDarray) - self.assertEqual(elaborate_randn_float.shape, (2, 3)) - self.assertEqual(elaborate_randn_float.lshape, (2, 3)) - self.assertEqual(elaborate_randn_float.split, None) - self.assertEqual(elaborate_randn_float.dtype, ht.float32) + def test_randint(self): + pass - # exceptions - with self.assertRaises(TypeError): - ht.random.randn('(2, 3,)') - with self.assertRaises(ValueError): - ht.random.randn(-1, 3) + def test_randn(self): + pass From 8a87c4f1a0040caf3dd24e699afc9fde896e6e9f Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 26 Aug 2019 17:42:51 +0200 Subject: [PATCH 06/24] Implemented the counter_sequenze function and added multiple test cases --- heat/core/random.py | 125 +++++++++++++++++++++++++-------- heat/core/tests/test_random.py | 76 +++++++++++++++++++- 2 files changed, 169 insertions(+), 32 deletions(-) diff --git a/heat/core/random.py b/heat/core/random.py index fbfbd37ef5..07f28c3c03 100644 --- a/heat/core/random.py +++ b/heat/core/random.py @@ -49,49 +49,112 @@ def __counter_sequence(shape, dtype, split, device, comm): lshape : tuple of ints The shape x_0 and x_1 need to be reshaped to after encryption. May be slightly larger than the actual local portion of the random number tensor due to sequence overlaps of the counter sequence. - slices : list of slices - The indices into the reshaped tensor to obtain the actual local portion. + slice : python slice + The slice that needs to be applied to the resulting random number tensor """ # get the global random state into the function, might want to factor this out into a class later global __counter - + tmp_counter = __counter # Share this initial local state to update it correctly later + rank = comm.Get_rank() + size = comm.Get_size() + max_count = 0xffffffff if dtype == torch.int32 else 0xffffffffffffffff # extract the counter state of the random number generator if dtype is torch.int32: - c_0 = __counter & (0xffffffff << 32) - c_1 = __counter & 0xffffffff + c_0 = (__counter & (max_count << 32)) >> 32 + c_1 = __counter & max_count else: # torch.int64 - c_0 = __counter & (0xffffffffffffffff << 64) - c_1 = __counter & 0xffffffffffffffff + c_0 = (__counter & (max_count << 64)) >> 64 + c_1 = __counter & max_count - # prepare some reusable values - dimensions = len(shape) total_elements = np.prod(shape) - offset, lshape, _ = comm.chunk(shape, split) + if total_elements > 2 * max_count: + raise ValueError('Shape is to big with {} elements'.format(total_elements)) + + if split is None: + values = total_elements / 2 + even_end = values % 2 == 0 + lslice = slice(None) if even_end else slice(None, -1) + start = c_1 + end = start + int(values) + lshape = shape + else: + offset, lshape, _ = comm.chunk(shape, split) + counts, displs, _ = comm.counts_displs_shape(shape, split) + + # Calculate number of local elements per process + local_elements = [total_elements / shape[split] * counts[i] for i in range(size)] + cum_elements = np.cumsum(local_elements) + + # Calculate the correct borders and slices + even_start = True if rank == 0 else cum_elements[rank-1] % 2 == 0 + start = c_1 if rank == 0 else int(cum_elements[rank-1] / 2) + c_1 + elements = local_elements[rank] / 2 + lslice = slice(None) + if even_start: + # No overlap with previous processes + if elements == int(elements): + # Even number of elements + end = int(elements) + else: + # Odd number of elements + end = int(elements) + 1 + lslice = slice(None, -1) + else: + # Overlap with previous processes + if elements == int(elements): + # Even number of elements + end = int(elements) + 1 + lslice = slice(1, -1) + else: + # Odd number of elements + end = int(elements) + 1 + lslice = slice(1, None) + start = int(start) + end += start + + # Check x_1 for overflow + lrange = [start, end] + signed_mask = 0x7fffffff if dtype == torch.int32 else 0x7fffffffffffffff + diff = 0 if lrange[1] <= signed_mask else lrange[1] - signed_mask + lrange[0], lrange[1] = lrange[0] - diff, lrange[1] - diff + + # create x_1 counter sequence + x_1 = torch.arange(*lrange, dtype=dtype) + while diff > signed_mask: + # signed_mask is maximum that can be added at a time because torch does not support unit64 or unit32 + x_1 += signed_mask + diff -= signed_mask + x_1 += diff # generate the x_0 counter sequence - x_0 = torch.full - - # generate the x_1 counter sequence - - - elements_in_higher_dims = 1 - ranges = dimensions * [None] + x_0 = torch.empty_like(x_1) + diff = c_0 - signed_mask + if diff > 0: + # same problem as for x_1 with the overflow + x_0.fill_(signed_mask) + while diff > signed_mask: + x_0 += signed_mask + diff -= signed_mask + x_0 += diff + else: + x_0.fill_(c_0) - for i in range(dimensions - 2, -1, -1): - elements_in_dim = lshape[i] - if i != split: - values = torch.arange(elements_in_dim, dtype=dtype, device=device) * elements_in_higher_dims + # Detect if x_0 needs to be increased for current values + if end > max_count: + if start > max_count: + # x_0 changed in previous process, increase all values + x_0 += 1 else: - values = (torch.arange(elements_in_dim, dtype=dtype, device=device) + offset) * elements_in_higher_dims - - values = values.reshape(*[1 if j != i else -1 for j in range(dimensions)]) - ranges[i] = values - elements_in_higher_dims *= elements_in_dim + # x_0 changes after reaching the overflow in this process + x_0[-(end-max_count-1):] += 1 - # advance the global counter - __counter += total_elements + # Correctly increase the counter variable + used_values = int(np.ceil(total_elements / 2)) + # Increase counter but not over 128 bit + tmp_counter += used_values & 0xffffffffffffffffffffffffffffffff # 128bit mask + __counter = tmp_counter - return x_0, x_1, lshape, slices + return x_0, x_1, lshape, lslice def get_state(): @@ -211,11 +274,11 @@ def rand(*args, split=None, device=None, comm=None): comm = communication.sanitize_comm(comm) # generate the random sequence - x_0, x_1, lshape = __counter_sequence(shape, torch.int64, split, device, comm) + x_0, x_1, lshape, lslice = __counter_sequence(shape, torch.int64, split, device, comm) x_0, x_1 = __threefry64(x_0, x_1) # combine the values into one tensor and convert them to floats - values = __int64_to_float64(torch.stack([x_0, x_1], dim=1)).reshape(lshape) + values = __int64_to_float64(torch.stack([x_0, x_1], dim=1).flatten()[lslice]).reshape(lshape) return dndarray.DNDarray(values, shape, types.float64, split, device, comm) diff --git a/heat/core/tests/test_random.py b/heat/core/tests/test_random.py index bb03418e4a..68ebbfe593 100644 --- a/heat/core/tests/test_random.py +++ b/heat/core/tests/test_random.py @@ -1,11 +1,85 @@ import unittest import heat as ht +import numpy as np class TestTensor(unittest.TestCase): def test_rand(self): - pass + # int64 tests + + # Resetting seed works + seed = 12345 + ht.random.seed(seed) + a = ht.random.rand(2, 5, 7, 3, split=0, comm=ht.MPI_WORLD) + b = ht.random.rand(2, 5, 7, 3, split=0, comm=ht.MPI_WORLD) + self.assertFalse(ht.equal(a, b)) + ht.random.seed(seed) + c = ht.random.rand(2, 5, 7, 3, split=0, comm=ht.MPI_WORLD) + self.assertTrue(ht.equal(a, c)) + + # Random numbers with overflow + ht.random.set_state(('Threefry', seed, 0xfffffffffffffff0)) + a = ht.random.rand(2, 3, 4, 5, split=0, comm=ht.MPI_WORLD) + ht.random.set_state(('Threefry', seed, 0x10000000000000000)) + b = ht.random.rand(2, 44, split=0, comm=ht.MPI_WORLD) + a = a.numpy().flatten() + b = b.numpy().flatten() + self.assertTrue(np.array_equal(a[32:], b)) + + # Check that random numbers don't repeat after first overflow + seed = 12345 + ht.random.set_state(('Threefry', seed, 0x10000000000000000)) + a = ht.random.rand(2, 44) + ht.random.seed(seed) + b = ht.random.rand(2, 44) + self.assertFalse(ht.equal(a, b)) + + # Check that we start from beginning after 128 bit overflow + ht.random.seed(seed) + a = ht.random.rand(2, 34, split=0) + ht.random.set_state(('Threefry', seed, 0xfffffffffffffffffffffffffffffff0)) + b = ht.random.rand(2, 50, split=0) + a = a.numpy().flatten() + b = b.numpy(). flatten() + self.assertTrue(np.array_equal(a, b[32:])) + + # different split axis with resetting seed + ht.random.seed(seed) + a = ht.random.rand(3, 5, 2, 9, split=3, comm=ht.MPI_WORLD) + ht.random.seed(seed) + c = ht.random.rand(3, 5, 2, 9, split=3, comm=ht.MPI_WORLD) + self.assertTrue(ht.equal(a, c)) + + # Random values are in correct order + ht.random.seed(seed) + a = ht.random.rand(2, 50, split=0) + ht.random.seed(seed) + b = ht.random.rand(100, split=None) + a = a.numpy().flatten() + b = b._DNDarray__array.numpy() + self.assertTrue(np.array_equal(a, b)) + + # On different shape and split the same random values are used + ht.random.seed(seed) + a = ht.random.rand(3, 5, 2, 9, split=3, comm=ht.MPI_WORLD) + ht.random.seed(seed) + b = ht.random.rand(30, 9, split=1, comm=ht.MPI_WORLD) + a = np.sort(a.numpy().flatten()) + b = np.sort(b.numpy().flatten()) + self.assertTrue(np.array_equal(a, b)) + + # One large array does not have two similar values + a = ht.random.rand(11, 15, 3, 7, split=2, comm=ht.MPI_WORLD) + a = a.numpy() + _, counts = np.unique(a, return_counts=True) + self.assertTrue((counts == 1).all()) # Assert that no value appears more than once + + # Two large arrays that were created after each other don't share any values + b = ht.random.rand(14, 7, 3, 12, 18, 42, split=5, comm=ht.MPI_WORLD) + c = np.concatenate((a.flatten(), b.numpy().flatten())) + _, counts = np.unique(c, return_counts=True) + self.assertTrue((counts == 1).all()) def test_randint(self): pass From 3d46337211ca7846df9ddfc932e36caec4ab0cab Mon Sep 17 00:00:00 2001 From: simon Date: Thu, 29 Aug 2019 13:52:38 +0200 Subject: [PATCH 07/24] fixing unit test that broke down because of new random generator --- heat/core/random.py | 8 ++-- heat/core/statistics.py | 9 +++-- heat/core/tests/test_communication.py | 27 +++++++------ heat/core/tests/test_manipulations.py | 16 ++++---- heat/core/tests/test_statistics.py | 56 +++++++++++++-------------- 5 files changed, 59 insertions(+), 57 deletions(-) diff --git a/heat/core/random.py b/heat/core/random.py index 07f28c3c03..9dbab52e67 100644 --- a/heat/core/random.py +++ b/heat/core/random.py @@ -71,8 +71,8 @@ def __counter_sequence(shape, dtype, split, device, comm): raise ValueError('Shape is to big with {} elements'.format(total_elements)) if split is None: - values = total_elements / 2 - even_end = values % 2 == 0 + values = int(total_elements / 2) + even_end = total_elements % 2 == 0 lslice = slice(None) if even_end else slice(None, -1) start = c_1 end = start + int(values) @@ -259,6 +259,7 @@ def rand(*args, split=None, device=None, comm=None): out : ndarray, shape (d0, d1, ..., dn) The uniformly distributed [0.0, 1.0)-bound random values. """ + print('args', args) # if args are not set, generate a single sample if not args: args = (1,) @@ -395,8 +396,9 @@ def randn(*args, split=None, device=None, comm=None): [ 1.3365, -1.5212, 1.4159, -0.1671], [ 0.1260, 1.2126, -0.0804, 0.0907]]) """ + print('args', args) # generate uniformly distributed random numbers first - normal_tensor = rand(*args, split, device, comm) + normal_tensor = rand(*args, split=split, device=device, comm=comm) # convert the the values to a normal distribution using the kundu transform normal_tensor._DNDarray__array = __kundu_transform(normal_tensor._DNDarray__array) diff --git a/heat/core/statistics.py b/heat/core/statistics.py index ba3a214489..ae0d70b2a3 100644 --- a/heat/core/statistics.py +++ b/heat/core/statistics.py @@ -390,6 +390,7 @@ def max(x, axis=None, out=None, keepdim=None): [12.]]) """ def local_max(*args, **kwargs): + print('args', *args) result = torch.max(*args, **kwargs) if isinstance(result, tuple): return result[0] @@ -496,14 +497,14 @@ def maximum(x1, x2, out=None): # locally: apply torch.max(x1, x2) output_lshape = stride_tricks.broadcast_shape(x1.lshape, x2.lshape) - lresult = factories.empty(output_lshape) + lresult = factories.empty(output_lshape, dtype=x1.dtype) lresult._DNDarray__array = torch.max(x1._DNDarray__array, x2._DNDarray__array) lresult._DNDarray__dtype = types.promote_types(x1.dtype, x2.dtype) lresult._DNDarray__split = split if x1.split is not None or x2.split is not None: if x1.comm.is_distributed(): # assuming x1.comm = x2.comm output_gshape = stride_tricks.broadcast_shape(x1.gshape, x2.gshape) - result = factories.empty(output_gshape) + result = factories.empty(output_gshape, dtype=x1.dtype) x1.comm.Allgather(lresult, result) # TODO: adopt Allgatherv() as soon as it is fixed, Issue #233 result._DNDarray__dtype = lresult._DNDarray__dtype @@ -912,14 +913,14 @@ def minimum(x1, x2, out=None): # locally: apply torch.min(x1, x2) output_lshape = stride_tricks.broadcast_shape(x1.lshape, x2.lshape) - lresult = factories.empty(output_lshape) + lresult = factories.empty(output_lshape, dtype=x1.dtype) lresult._DNDarray__array = torch.min(x1._DNDarray__array, x2._DNDarray__array) lresult._DNDarray__dtype = types.promote_types(x1.dtype, x2.dtype) lresult._DNDarray__split = split if x1.split is not None or x2.split is not None: if x1.comm.is_distributed(): # assuming x1.comm = x2.comm output_gshape = stride_tricks.broadcast_shape(x1.gshape, x2.gshape) - result = factories.empty(output_gshape) + result = factories.empty(output_gshape, dtype=x1.dtype) x1.comm.Allgather(lresult, result) # TODO: adopt Allgatherv() as soon as it is fixed, Issue #233 result._DNDarray__dtype = lresult._DNDarray__dtype diff --git a/heat/core/tests/test_communication.py b/heat/core/tests/test_communication.py index f7d85f44dd..118094c1db 100644 --- a/heat/core/tests/test_communication.py +++ b/heat/core/tests/test_communication.py @@ -181,10 +181,10 @@ def test_allgather(self): # check result self.assertTrue(data._DNDarray__array.is_contiguous()) self.assertTrue(output._DNDarray__array.is_contiguous()) - self.assertTrue((output._DNDarray__array == torch.ones(ht.MPI_WORLD.size, 7,)).all()) + self.assertTrue((output._DNDarray__array == torch.ones(ht.MPI_WORLD.size, 7)).all()) # contiguous data, different gather axis - data = ht.ones((7, 2,)) + data = ht.ones((7, 2,), dtype=ht.float64) output = ht.random.randn(7, 2 * ht.MPI_WORLD.size) # ensure prior invariants @@ -195,7 +195,7 @@ def test_allgather(self): # check result self.assertTrue(data._DNDarray__array.is_contiguous()) self.assertTrue(output._DNDarray__array.is_contiguous()) - self.assertTrue((output._DNDarray__array == torch.ones(7, 2 * ht.MPI_WORLD.size)).all()) + self.assertTrue((output._DNDarray__array == torch.ones(7, 2 * ht.MPI_WORLD.size, dtype=torch.float64)).all()) # non-contiguous data data = ht.ones((4, 5,)).T @@ -825,7 +825,7 @@ def test_iallgather(self): self.assertTrue((output._DNDarray__array == torch.ones(ht.MPI_WORLD.size, 7,)).all()) # contiguous data, different gather axis - data = ht.ones((7, 2,)) + data = ht.ones((7, 2,), dtype=ht.float64) output = ht.random.randn(7, 2 * ht.MPI_WORLD.size) # ensure prior invariants @@ -833,11 +833,10 @@ def test_iallgather(self): self.assertTrue(output._DNDarray__array.is_contiguous()) req = data.comm.Iallgather(data, output, send_axis=1) req.wait() - # check scatter result self.assertTrue(data._DNDarray__array.is_contiguous()) self.assertTrue(output._DNDarray__array.is_contiguous()) - self.assertTrue((output._DNDarray__array == torch.ones(7, 2 * ht.MPI_WORLD.size)).all()) + self.assertTrue((output._DNDarray__array == torch.ones(7, 2 * ht.MPI_WORLD.size, dtype=torch.float64)).all()) # non-contiguous data data = ht.ones((4, 5,)).T @@ -1288,7 +1287,7 @@ def test_iexscan(self): def test_igather(self): try: # contiguous data - data = ht.ones((1, 5,), dtype=ht.float32) + data = ht.ones((1, 5,), dtype=ht.float64) output = ht.random.randn(ht.MPI_WORLD.size, 5) # ensure prior invariants @@ -1301,10 +1300,10 @@ def test_igather(self): self.assertTrue(data._DNDarray__array.is_contiguous()) self.assertTrue(output._DNDarray__array.is_contiguous()) if data.comm.rank == 0: - self.assertTrue((output._DNDarray__array == torch.ones((ht.MPI_WORLD.size, 5,), dtype=torch.float32)).all()) + self.assertTrue((output._DNDarray__array == torch.ones((ht.MPI_WORLD.size, 5,), dtype=torch.float64)).all()) # contiguous data, different gather axis - data = ht.ones((5, 2,), dtype=ht.float32) + data = ht.ones((5, 2,), dtype=ht.float64) output = ht.random.randn(5, 2 * ht.MPI_WORLD.size) # ensure prior invariants @@ -1318,11 +1317,11 @@ def test_igather(self): self.assertTrue(output._DNDarray__array.is_contiguous()) if data.comm.rank == 0: self.assertTrue( - (output._DNDarray__array == torch.ones((5, 2 * ht.MPI_WORLD.size,), dtype=torch.float32)).all() + (output._DNDarray__array == torch.ones((5, 2 * ht.MPI_WORLD.size,), dtype=torch.float64)).all() ) # non-contiguous data - data = ht.ones((3, 5,), dtype=ht.float32).T + data = ht.ones((3, 5,), dtype=ht.float64).T output = ht.random.randn(5, 3 * ht.MPI_WORLD.size) # ensure prior invariants @@ -1336,11 +1335,11 @@ def test_igather(self): self.assertTrue(output._DNDarray__array.is_contiguous()) if data.comm.rank == 0: self.assertTrue( - (output._DNDarray__array == torch.ones((5, 3 * ht.MPI_WORLD.size,), dtype=torch.float32)).all() + (output._DNDarray__array == torch.ones((5, 3 * ht.MPI_WORLD.size,), dtype=torch.float64)).all() ) # non-contiguous output, different gather axis - data = ht.ones((5, 3,), dtype=ht.float32) + data = ht.ones((5, 3,), dtype=ht.float64) output = ht.random.randn(3 * ht.MPI_WORLD.size, 5).T # ensure prior invariants @@ -1354,7 +1353,7 @@ def test_igather(self): self.assertFalse(output._DNDarray__array.is_contiguous()) if data.comm.rank == 0: self.assertTrue( - (output._DNDarray__array == torch.ones((5, 3 * ht.MPI_WORLD.size,), dtype=torch.float32)).all() + (output._DNDarray__array == torch.ones((5, 3 * ht.MPI_WORLD.size,), dtype=torch.float64)).all() ) # MPI implementation may not support asynchronous operations diff --git a/heat/core/tests/test_manipulations.py b/heat/core/tests/test_manipulations.py index d0ecb6e2f8..bfe23bf7b3 100644 --- a/heat/core/tests/test_manipulations.py +++ b/heat/core/tests/test_manipulations.py @@ -526,8 +526,8 @@ def test_squeeze(self): # 4D local tensor, no axis result = ht.squeeze(data) self.assertIsInstance(result, ht.DNDarray) - self.assertEqual(result.dtype, ht.float32) - self.assertEqual(result._DNDarray__array.dtype, torch.float32) + self.assertEqual(result.dtype, ht.float64) + self.assertEqual(result._DNDarray__array.dtype, torch.float64) self.assertEqual(result.shape, (4, 5)) self.assertEqual(result.lshape, (4, 5)) self.assertEqual(result.split, None) @@ -536,8 +536,8 @@ def test_squeeze(self): # 4D local tensor, major axis result = ht.squeeze(data, axis=0) self.assertIsInstance(result, ht.DNDarray) - self.assertEqual(result.dtype, ht.float32) - self.assertEqual(result._DNDarray__array.dtype, torch.float32) + self.assertEqual(result.dtype, ht.float64) + self.assertEqual(result._DNDarray__array.dtype, torch.float64) self.assertEqual(result.shape, (4, 5, 1)) self.assertEqual(result.lshape, (4, 5, 1)) self.assertEqual(result.split, None) @@ -546,8 +546,8 @@ def test_squeeze(self): # 4D local tensor, minor axis result = ht.squeeze(data, axis=-1) self.assertIsInstance(result, ht.DNDarray) - self.assertEqual(result.dtype, ht.float32) - self.assertEqual(result._DNDarray__array.dtype, torch.float32) + self.assertEqual(result.dtype, ht.float64) + self.assertEqual(result._DNDarray__array.dtype, torch.float64) self.assertEqual(result.shape, (1, 4, 5)) self.assertEqual(result.lshape, (1, 4, 5)) self.assertEqual(result.split, None) @@ -556,8 +556,8 @@ def test_squeeze(self): # 4D local tensor, tuple axis result = data.squeeze(axis=(0, -1)) self.assertIsInstance(result, ht.DNDarray) - self.assertEqual(result.dtype, ht.float32) - self.assertEqual(result._DNDarray__array.dtype, torch.float32) + self.assertEqual(result.dtype, ht.float64) + self.assertEqual(result._DNDarray__array.dtype, torch.float64) self.assertEqual(result.shape, (4, 5)) self.assertEqual(result.lshape, (4, 5)) self.assertEqual(result.split, None) diff --git a/heat/core/tests/test_statistics.py b/heat/core/tests/test_statistics.py index d08a418c15..bcf29d2726 100644 --- a/heat/core/tests/test_statistics.py +++ b/heat/core/tests/test_statistics.py @@ -248,8 +248,8 @@ def test_average(self): self.assertIsInstance(avg_volume, ht.DNDarray) self.assertEqual(avg_volume.shape, (3,)) self.assertEqual(avg_volume.lshape[0], random_volume.lshape[0]) - self.assertEqual(avg_volume.dtype, ht.float32) - self.assertEqual(avg_volume._DNDarray__array.dtype, torch.float32) + self.assertEqual(avg_volume.dtype, ht.float64) + self.assertEqual(avg_volume._DNDarray__array.dtype, torch.float64) self.assertEqual(avg_volume.split, 0) # check weighted average over all float elements of split 5d tensor, along split axis @@ -261,8 +261,8 @@ def test_average(self): self.assertIsInstance(avg_5d, ht.DNDarray) self.assertEqual(avg_5d.gshape, (size, 3, 4, 5)) self.assertLessEqual(avg_5d.lshape[1], 3) - self.assertEqual(avg_5d.dtype, ht.float32) - self.assertEqual(avg_5d._DNDarray__array.dtype, torch.float32) + self.assertEqual(avg_5d.dtype, ht.float64) + self.assertEqual(avg_5d._DNDarray__array.dtype, torch.float64) self.assertEqual(avg_5d.split, 0) # check exceptions @@ -344,8 +344,8 @@ def test_max(self): self.assertIsInstance(maximum_volume, ht.DNDarray) self.assertEqual(maximum_volume.shape, (3, 3)) self.assertEqual(maximum_volume.lshape, (3, 3)) - self.assertEqual(maximum_volume.dtype, ht.float32) - self.assertEqual(maximum_volume._DNDarray__array.dtype, torch.float32) + self.assertEqual(maximum_volume.dtype, ht.float64) + self.assertEqual(maximum_volume._DNDarray__array.dtype, torch.float64) self.assertEqual(maximum_volume.split, None) # check max over all float elements of split 3d tensor, tuple axis @@ -356,8 +356,8 @@ def test_max(self): self.assertIsInstance(maximum_volume, ht.DNDarray) self.assertEqual(maximum_volume.shape, (3,)) self.assertEqual(maximum_volume.lshape, (3,)) - self.assertEqual(maximum_volume.dtype, ht.float32) - self.assertEqual(maximum_volume._DNDarray__array.dtype, torch.float32) + self.assertEqual(maximum_volume.dtype, ht.float64) + self.assertEqual(maximum_volume._DNDarray__array.dtype, torch.float64) self.assertEqual(maximum_volume.split, 0) self.assertTrue((maximum_volume == alt_maximum_volume).all()) @@ -368,8 +368,8 @@ def test_max(self): self.assertIsInstance(maximum_5d, ht.DNDarray) self.assertEqual(maximum_5d.shape, (1, 3, 4, 5)) self.assertLessEqual(maximum_5d.lshape[1], 3) - self.assertEqual(maximum_5d.dtype, ht.float32) - self.assertEqual(maximum_5d._DNDarray__array.dtype, torch.float32) + self.assertEqual(maximum_5d.dtype, ht.float64) + self.assertEqual(maximum_5d._DNDarray__array.dtype, torch.float64) self.assertEqual(maximum_5d.split, 0) # check exceptions @@ -421,8 +421,8 @@ def test_maximum(self): self.assertIsInstance(maximum_volume, ht.DNDarray) self.assertEqual(maximum_volume.shape, (size * 12, 3, 3)) self.assertEqual(maximum_volume.lshape, (size * 12, 3, 3)) - self.assertEqual(maximum_volume.dtype, ht.float32) - self.assertEqual(maximum_volume._DNDarray__array.dtype, torch.float32) + self.assertEqual(maximum_volume.dtype, ht.float64) + self.assertEqual(maximum_volume._DNDarray__array.dtype, torch.float64) self.assertEqual(maximum_volume.split, random_volume_1.split) # check maximum over float elements of split 3d tensors with different split axis @@ -433,8 +433,8 @@ def test_maximum(self): self.assertIsInstance(maximum_volume_splitdiff, ht.DNDarray) self.assertEqual(maximum_volume_splitdiff.shape, (size*3, size*3, 4)) self.assertEqual(maximum_volume_splitdiff.lshape, (size*3, size*3, 4)) - self.assertEqual(maximum_volume_splitdiff.dtype, ht.float32) - self.assertEqual(maximum_volume_splitdiff._DNDarray__array.dtype, torch.float32) + self.assertEqual(maximum_volume_splitdiff.dtype, ht.float64) + self.assertEqual(maximum_volume_splitdiff._DNDarray__array.dtype, torch.float64) self.assertEqual(maximum_volume_splitdiff.split, 0) random_volume_1_splitdiff = ht.array(ht.random.randn(size*3, size*3, 4), split=1) @@ -459,8 +459,8 @@ def test_maximum(self): self.assertIsInstance(output, ht.DNDarray) self.assertEqual(output.shape, (ht.MPI_WORLD.size * 12, 3, 3)) self.assertEqual(output.lshape, (ht.MPI_WORLD.size * 12, 3, 3)) - self.assertEqual(output.dtype, ht.float32) - self.assertEqual(output._DNDarray__array.dtype, torch.float32) + self.assertEqual(output.dtype, ht.float64) + self.assertEqual(output._DNDarray__array.dtype, torch.float64) self.assertEqual(output.split, random_volume_1.split) # check exceptions @@ -593,8 +593,8 @@ def test_min(self): self.assertIsInstance(minimum_volume, ht.DNDarray) self.assertEqual(minimum_volume.shape, (3, 3)) self.assertEqual(minimum_volume.lshape, (3, 3)) - self.assertEqual(minimum_volume.dtype, ht.float32) - self.assertEqual(minimum_volume._DNDarray__array.dtype, torch.float32) + self.assertEqual(minimum_volume.dtype, ht.float64) + self.assertEqual(minimum_volume._DNDarray__array.dtype, torch.float64) self.assertEqual(minimum_volume.split, None) # check min over all float elements of split 3d tensor, tuple axis @@ -605,8 +605,8 @@ def test_min(self): self.assertIsInstance(minimum_volume, ht.DNDarray) self.assertEqual(minimum_volume.shape, (3,)) self.assertEqual(minimum_volume.lshape, (3,)) - self.assertEqual(minimum_volume.dtype, ht.float32) - self.assertEqual(minimum_volume._DNDarray__array.dtype, torch.float32) + self.assertEqual(minimum_volume.dtype, ht.float64) + self.assertEqual(minimum_volume._DNDarray__array.dtype, torch.float64) self.assertEqual(minimum_volume.split, 0) self.assertTrue((minimum_volume == alt_minimum_volume).all()) @@ -617,8 +617,8 @@ def test_min(self): self.assertIsInstance(minimum_5d, ht.DNDarray) self.assertEqual(minimum_5d.shape, (1, 3, 4, 5)) self.assertLessEqual(minimum_5d.lshape[1], 3) - self.assertEqual(minimum_5d.dtype, ht.float32) - self.assertEqual(minimum_5d._DNDarray__array.dtype, torch.float32) + self.assertEqual(minimum_5d.dtype, ht.float64) + self.assertEqual(minimum_5d._DNDarray__array.dtype, torch.float64) self.assertEqual(minimum_5d.split, 0) # check exceptions @@ -670,8 +670,8 @@ def test_minimum(self): self.assertIsInstance(minimum_volume, ht.DNDarray) self.assertEqual(minimum_volume.shape, (size * 12, 3, 3)) self.assertEqual(minimum_volume.lshape, (size * 12, 3, 3)) - self.assertEqual(minimum_volume.dtype, ht.float32) - self.assertEqual(minimum_volume._DNDarray__array.dtype, torch.float32) + self.assertEqual(minimum_volume.dtype, ht.float64) + self.assertEqual(minimum_volume._DNDarray__array.dtype, torch.float64) self.assertEqual(minimum_volume.split, random_volume_1.split) # check minimum over float elements of split 3d tensors with different split axis @@ -682,8 +682,8 @@ def test_minimum(self): self.assertIsInstance(minimum_volume_splitdiff, ht.DNDarray) self.assertEqual(minimum_volume_splitdiff.shape, (size*3, size*3, 4)) self.assertEqual(minimum_volume_splitdiff.lshape, (size*3, size*3, 4)) - self.assertEqual(minimum_volume_splitdiff.dtype, ht.float32) - self.assertEqual(minimum_volume_splitdiff._DNDarray__array.dtype, torch.float32) + self.assertEqual(minimum_volume_splitdiff.dtype, ht.float64) + self.assertEqual(minimum_volume_splitdiff._DNDarray__array.dtype, torch.float64) self.assertEqual(minimum_volume_splitdiff.split, 0) random_volume_1_splitdiff = ht.array(ht.random.randn(size*3, size*3, 4), split=1) @@ -708,8 +708,8 @@ def test_minimum(self): self.assertIsInstance(output, ht.DNDarray) self.assertEqual(output.shape, (ht.MPI_WORLD.size * 12, 3, 3)) self.assertEqual(output.lshape, (ht.MPI_WORLD.size * 12, 3, 3)) - self.assertEqual(output.dtype, ht.float32) - self.assertEqual(output._DNDarray__array.dtype, torch.float32) + self.assertEqual(output.dtype, ht.float64) + self.assertEqual(output._DNDarray__array.dtype, torch.float64) self.assertEqual(output.split, random_volume_1.split) # check exceptions From 526c029a5ab5156b055710464433cac4431b63b3 Mon Sep 17 00:00:00 2001 From: simon Date: Thu, 29 Aug 2019 15:18:56 +0200 Subject: [PATCH 08/24] fixed a but in random --- heat/core/operations.py | 18 +++++++++++++----- heat/core/random.py | 2 +- heat/core/statistics.py | 1 - 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/heat/core/operations.py b/heat/core/operations.py index 81fd9cfb59..4e2e177e67 100644 --- a/heat/core/operations.py +++ b/heat/core/operations.py @@ -216,17 +216,25 @@ def __reduce_op(x, partial_op, reduction_op, **kwargs): partial = x._DNDarray__array output_shape = x.gshape for dim in axis: - partial = partial_op(partial, dim=dim, keepdim=True) - output_shape = output_shape[:dim] + (1,) + output_shape[dim + 1:] + if 0 not in partial.shape: + partial = partial_op(partial, dim=dim, keepdim=True) + output_shape = output_shape[:dim] + (1,) + output_shape[dim + 1:] + else: + output_shape = output_shape[:dim] + (0, ) + output_shape[dim + 1:] + print('output', output_shape) if not keepdim and not len(partial.shape) == 1: gshape_losedim = tuple(x.gshape[dim] for dim in range(len(x.gshape)) if dim not in axis) lshape_losedim = tuple(x.lshape[dim] for dim in range(len(x.lshape)) if dim not in axis) + if 0 in partial.shape: + lshape_losedim = (0, ) output_shape = gshape_losedim + print('output_shape', lshape_losedim) # Take care of special cases argmin and argmax: keep partial.shape[0] - if (0 in axis and partial.shape[0] != 1): + if 0 in axis and partial.shape[0] != 1: lshape_losedim = (partial.shape[0],) + lshape_losedim - if (not 0 in axis and partial.shape[0] != x.lshape[0]): + if 0 not in axis and partial.shape[0] != x.lshape[0]: lshape_losedim = (partial.shape[0],) + lshape_losedim[1:] + print('output_shape', lshape_losedim) partial = partial.reshape(lshape_losedim) # Check shape of output buffer, if any @@ -241,7 +249,7 @@ def __reduce_op(x, partial_op, reduction_op, **kwargs): # if reduction_op is a Boolean operation, then resulting tensor is bool boolean_ops = [MPI.LAND, MPI.LOR, MPI.BAND, MPI.BOR] - tensor_type = bool if reduction_op in boolean_ops else partial[0].dtype + tensor_type = bool if reduction_op in boolean_ops else partial.dtype if out is not None: out._DNDarray__array = partial diff --git a/heat/core/random.py b/heat/core/random.py index 9dbab52e67..0e98d00258 100644 --- a/heat/core/random.py +++ b/heat/core/random.py @@ -71,7 +71,7 @@ def __counter_sequence(shape, dtype, split, device, comm): raise ValueError('Shape is to big with {} elements'.format(total_elements)) if split is None: - values = int(total_elements / 2) + values = np.ceil(total_elements / 2) even_end = total_elements % 2 == 0 lslice = slice(None) if even_end else slice(None, -1) start = c_1 diff --git a/heat/core/statistics.py b/heat/core/statistics.py index ae0d70b2a3..ab1b73b6e9 100644 --- a/heat/core/statistics.py +++ b/heat/core/statistics.py @@ -390,7 +390,6 @@ def max(x, axis=None, out=None, keepdim=None): [12.]]) """ def local_max(*args, **kwargs): - print('args', *args) result = torch.max(*args, **kwargs) if isinstance(result, tuple): return result[0] From f53311d214b72a4445617148ea005395d65ba912 Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 2 Sep 2019 14:02:43 +0200 Subject: [PATCH 09/24] fixed the reduce function max and min --- heat/core/operations.py | 12 +----- heat/core/statistics.py | 64 ++++++++++++++++++++++++++++-- heat/core/tests/test_statistics.py | 28 +++++++++---- 3 files changed, 82 insertions(+), 22 deletions(-) diff --git a/heat/core/operations.py b/heat/core/operations.py index 4e2e177e67..d57c62b83e 100644 --- a/heat/core/operations.py +++ b/heat/core/operations.py @@ -216,25 +216,17 @@ def __reduce_op(x, partial_op, reduction_op, **kwargs): partial = x._DNDarray__array output_shape = x.gshape for dim in axis: - if 0 not in partial.shape: - partial = partial_op(partial, dim=dim, keepdim=True) - output_shape = output_shape[:dim] + (1,) + output_shape[dim + 1:] - else: - output_shape = output_shape[:dim] + (0, ) + output_shape[dim + 1:] - print('output', output_shape) + partial = partial_op(partial, dim=dim, keepdim=True) + output_shape = output_shape[:dim] + (1,) + output_shape[dim + 1:] if not keepdim and not len(partial.shape) == 1: gshape_losedim = tuple(x.gshape[dim] for dim in range(len(x.gshape)) if dim not in axis) lshape_losedim = tuple(x.lshape[dim] for dim in range(len(x.lshape)) if dim not in axis) - if 0 in partial.shape: - lshape_losedim = (0, ) output_shape = gshape_losedim - print('output_shape', lshape_losedim) # Take care of special cases argmin and argmax: keep partial.shape[0] if 0 in axis and partial.shape[0] != 1: lshape_losedim = (partial.shape[0],) + lshape_losedim if 0 not in axis and partial.shape[0] != x.lshape[0]: lshape_losedim = (partial.shape[0],) + lshape_losedim[1:] - print('output_shape', lshape_losedim) partial = partial.reshape(lshape_losedim) # Check shape of output buffer, if any diff --git a/heat/core/statistics.py b/heat/core/statistics.py index ab1b73b6e9..19287d7bfa 100644 --- a/heat/core/statistics.py +++ b/heat/core/statistics.py @@ -390,9 +390,37 @@ def max(x, axis=None, out=None, keepdim=None): [12.]]) """ def local_max(*args, **kwargs): - result = torch.max(*args, **kwargs) + array = args[0] + dim = kwargs.get('dim') + if 0 in array.shape: + # Empty local vector would throw an error in the torch max function + if dim == x.split or (dim is None and x.split == 0): + # No distributed result + out_shape = list(array.shape) + empty_dim = next(i for i, d in enumerate(array.shape) if d == 0) + out_shape[empty_dim] = 1 + + # Lowest possible value should be neutral to the max function + if array.dtype is torch.int8: + fill_value = -(1 << 7) + elif array.dtype is torch.int16: + fill_value = -(1 << 15) + elif array.dtype is torch.int32: + fill_value = -(1 << 31) + elif array.dtype is torch.int64: + fill_value = -(1 << 63) + else: + fill_value = float('-inf') + + # Create a local result with a "neutral" value that should not affect the global result + result = torch.empty(out_shape, dtype=array.dtype).fill_(fill_value) + else: + # Distributed result: return an empty tensor as the local result + result = torch.empty_like(array) + else: + result = torch.max(*args, **kwargs) if isinstance(result, tuple): - return result[0] + result = result[0] return result return operations.__reduce_op(x, local_max, MPI.MAX, axis=axis, out=out, keepdim=keepdim) @@ -806,9 +834,37 @@ def min(x, axis=None, out=None, keepdim=None): """ def local_min(*args, **kwargs): - result = torch.min(*args, **kwargs) + array = args[0] + dim = kwargs.get('dim') + if 0 in array.shape: + # Empty local vector would throw an error in the torch min function + if dim == x.split or (dim is None and x.split == 0): + # No distributed result + out_shape = list(array.shape) + empty_dim = next(i for i, d in enumerate(array.shape) if d == 0) + out_shape[empty_dim] = 1 + + # Highest possible value should be neutral to the min function + if array.dtype is torch.int8: + fill_value = (1 << 7) - 1 + elif array.dtype is torch.int16: + fill_value = (1 << 15) - 1 + elif array.dtype is torch.int32: + fill_value = (1 << 31) - 1 + elif array.dtype is torch.int64: + fill_value = (1 << 63) - 1 + else: + fill_value = float('inf') + + # Create a local result with a "neutral" value that should not affect the global result + result = torch.empty(out_shape, dtype=array.dtype).fill_(fill_value) + else: + # Distributed result: return an empty tensor as the local result + result = torch.empty_like(array) + else: + result = torch.min(*args, **kwargs) if isinstance(result, tuple): - return result[0] + result = result[0] return result return operations.__reduce_op(x, local_min, MPI.MIN, axis=axis, out=out, keepdim=keepdim) diff --git a/heat/core/tests/test_statistics.py b/heat/core/tests/test_statistics.py index bcf29d2726..6bace2ad7d 100644 --- a/heat/core/tests/test_statistics.py +++ b/heat/core/tests/test_statistics.py @@ -43,16 +43,16 @@ def test_argmax(self): self.assertTrue((result._DNDarray__array == torch.tensor([19]))) # 2D split tensor, along the axis - torch.manual_seed(1) data = ht.array(ht.random.randn(4, 5), is_split=0) result = ht.argmax(data, axis=1) + expected = torch.argmax(data._DNDarray__array, dim=1) self.assertIsInstance(result, ht.DNDarray) self.assertEqual(result.dtype, ht.int64) self.assertEqual(result._DNDarray__array.dtype, torch.int64) self.assertEqual(result.shape, (ht.MPI_WORLD.size * 4,)) self.assertEqual(result.lshape, (4,)) self.assertEqual(result.split, 0) - self.assertTrue((result._DNDarray__array == torch.tensor([4, 4, 2, 4])).all()) + self.assertTrue((result._DNDarray__array == expected).all()) # 2D split tensor, across the axis size = ht.MPI_WORLD.size * 2 @@ -127,16 +127,16 @@ def test_argmin(self): self.assertTrue((result._DNDarray__array == data._DNDarray__array.argmin(-1, keepdim=True)).all()) # 2D split tensor, along the axis - torch.manual_seed(1) data = ht.array(ht.random.randn(4, 5), is_split=0) result = ht.argmin(data, axis=1) + expected = torch.argmin(data._DNDarray__array, dim=1) self.assertIsInstance(result, ht.DNDarray) self.assertEqual(result.dtype, ht.int64) self.assertEqual(result._DNDarray__array.dtype, torch.int64) self.assertEqual(result.shape, (ht.MPI_WORLD.size * 4,)) self.assertEqual(result.lshape, (4,)) self.assertEqual(result.split, 0) - self.assertTrue((result._DNDarray__array == torch.tensor([3, 1, 1, 3])).all()) + self.assertTrue((result._DNDarray__array == expected).all()) # 2D split tensor, across the axis size = ht.MPI_WORLD.size * 2 @@ -290,8 +290,6 @@ def test_average(self): with self.assertRaises(ValueError): ht.average(ht_array, axis=-4) - - def test_max(self): data = [ [1, 2, 3], @@ -355,7 +353,6 @@ def test_max(self): self.assertIsInstance(maximum_volume, ht.DNDarray) self.assertEqual(maximum_volume.shape, (3,)) - self.assertEqual(maximum_volume.lshape, (3,)) self.assertEqual(maximum_volume.dtype, ht.float64) self.assertEqual(maximum_volume._DNDarray__array.dtype, torch.float64) self.assertEqual(maximum_volume.split, 0) @@ -372,6 +369,14 @@ def test_max(self): self.assertEqual(maximum_5d._DNDarray__array.dtype, torch.float64) self.assertEqual(maximum_5d.split, 0) + # Calculating max with empty local vectors works + size = ht.MPI_WORLD.size + if size > 1: + a = ht.arange(size - 1, split=0) + res = ht.max(a) + expected = torch.tensor([size - 2], dtype=a.dtype.torch_type()) + self.assertTrue(torch.equal(res._DNDarray__array, expected)) + # check exceptions with self.assertRaises(TypeError): ht_array.max(axis=1.1) @@ -604,7 +609,6 @@ def test_min(self): self.assertIsInstance(minimum_volume, ht.DNDarray) self.assertEqual(minimum_volume.shape, (3,)) - self.assertEqual(minimum_volume.lshape, (3,)) self.assertEqual(minimum_volume.dtype, ht.float64) self.assertEqual(minimum_volume._DNDarray__array.dtype, torch.float64) self.assertEqual(minimum_volume.split, 0) @@ -621,6 +625,14 @@ def test_min(self): self.assertEqual(minimum_5d._DNDarray__array.dtype, torch.float64) self.assertEqual(minimum_5d.split, 0) + # Calculating min with empty local vectors works + size = ht.MPI_WORLD.size + if size > 1: + a = ht.arange(size - 1, split=0) + res = ht.min(a) + expected = torch.tensor([0], dtype=a.dtype.torch_type()) + self.assertTrue(torch.equal(res._DNDarray__array, expected)) + # check exceptions with self.assertRaises(TypeError): ht_array.min(axis=1.1) From 3d086489362c1c42dc04e733c61352a03e9d1f18 Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 2 Sep 2019 14:33:31 +0200 Subject: [PATCH 10/24] fixed the kmeans setup to fit the new random module --- heat/ml/cluster/kmeans.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/heat/ml/cluster/kmeans.py b/heat/ml/cluster/kmeans.py index 4f22518d5b..7befa802d2 100644 --- a/heat/ml/cluster/kmeans.py +++ b/heat/ml/cluster/kmeans.py @@ -17,8 +17,11 @@ def initialize_centroids(k, dimensions, seed, device): # TODO: document me # TODO: extend me with further initialization methods # zero-centered uniform random distribution in [-1, 1] - ht.random.set_gseed(seed) - return ht.random.uniform(low=-1.0, high=1.0, size=(1, dimensions, k), device=device) + ht.random.seed(seed) + rands = ht.random.rand((1, dimensions, k), device=device) + # change the range of the values from [0, 1) to [-1, 1) + rands = rands * 2 - 1 + return rands def fit(self, data): # TODO: document me From d33f743bb1981a11c8b7b0693b8b58d8d597b971 Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 2 Sep 2019 14:37:16 +0200 Subject: [PATCH 11/24] unit tests now running in kmeans --- heat/core/manipulations.py | 2 -- heat/core/random.py | 2 -- heat/ml/cluster/kmeans.py | 2 +- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/heat/core/manipulations.py b/heat/core/manipulations.py index 2830695f7e..029b8527d7 100644 --- a/heat/core/manipulations.py +++ b/heat/core/manipulations.py @@ -568,8 +568,6 @@ def sort(a, axis=None, descending=False, out=None): second_result[idx_slice] = r_val second_indices[idx_slice] = r_ind - # print('second_result', second_result, 'tmp_indices', second_indices) - second_result, tmp_indices = second_result.sort(dim=0, descending=descending) final_result = second_result.transpose(0, axis) final_indices = torch.empty_like(second_indices) diff --git a/heat/core/random.py b/heat/core/random.py index 0e98d00258..9a768ccddb 100644 --- a/heat/core/random.py +++ b/heat/core/random.py @@ -259,7 +259,6 @@ def rand(*args, split=None, device=None, comm=None): out : ndarray, shape (d0, d1, ..., dn) The uniformly distributed [0.0, 1.0)-bound random values. """ - print('args', args) # if args are not set, generate a single sample if not args: args = (1,) @@ -396,7 +395,6 @@ def randn(*args, split=None, device=None, comm=None): [ 1.3365, -1.5212, 1.4159, -0.1671], [ 0.1260, 1.2126, -0.0804, 0.0907]]) """ - print('args', args) # generate uniformly distributed random numbers first normal_tensor = rand(*args, split=split, device=device, comm=comm) # convert the the values to a normal distribution using the kundu transform diff --git a/heat/ml/cluster/kmeans.py b/heat/ml/cluster/kmeans.py index 7befa802d2..a97982cb0a 100644 --- a/heat/ml/cluster/kmeans.py +++ b/heat/ml/cluster/kmeans.py @@ -18,7 +18,7 @@ def initialize_centroids(k, dimensions, seed, device): # TODO: extend me with further initialization methods # zero-centered uniform random distribution in [-1, 1] ht.random.seed(seed) - rands = ht.random.rand((1, dimensions, k), device=device) + rands = ht.random.rand(1, dimensions, k, device=device) # change the range of the values from [0, 1) to [-1, 1) rands = rands * 2 - 1 return rands From 2d511a5546deaeff5d767131e45623085c1c143c Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 6 Sep 2019 07:14:44 +0200 Subject: [PATCH 12/24] reduced the number of iterations for the threefry algorithm --- heat/core/random.py | 18 +++++++++--------- heat/core/tests/test_random.py | 20 ++++++++++++++++++-- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/heat/core/random.py b/heat/core/random.py index 9a768ccddb..5dc98f39c1 100644 --- a/heat/core/random.py +++ b/heat/core/random.py @@ -564,15 +564,15 @@ def __threefry64(X_0, X_1): X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 48); X_1 ^= X_0 # round 5 X_0 += X_1; X_1 = (X_1 << 32) | (X_1 >> 32); X_1 ^= X_0 # round 6 X_0 += X_1; X_1 = (X_1 << 24) | (X_1 >> 40); X_1 ^= X_0 # round 7 - X_0 += X_1; X_1 = (X_1 << 21) | (X_1 >> 43); X_1 ^= X_0 # round 8 - - # inject key - X_0 += ks_2; X_1 += (ks_0 + 2) - - X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 48); X_1 ^= X_0 # round 9 - X_0 += X_1; X_1 = (X_1 << 42) | (X_1 >> 22); X_1 ^= X_0 # round 10 - X_0 += X_1; X_1 = (X_1 << 12) | (X_1 >> 52); X_1 ^= X_0 # round 11 - X_0 += X_1; X_1 = (X_1 << 31) | (X_1 >> 33); X_1 ^= X_0 # round 12 + # X_0 += X_1; X_1 = (X_1 << 21) | (X_1 >> 43); X_1 ^= X_0 # round 8 + # + # # inject key + # X_0 += ks_2; X_1 += (ks_0 + 2) + # + # X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 48); X_1 ^= X_0 # round 9 + # X_0 += X_1; X_1 = (X_1 << 42) | (X_1 >> 22); X_1 ^= X_0 # round 10 + # X_0 += X_1; X_1 = (X_1 << 12) | (X_1 >> 52); X_1 ^= X_0 # round 11 + # X_0 += X_1; X_1 = (X_1 << 31) | (X_1 >> 33); X_1 ^= X_0 # round 12 # inject key X_0 += ks_0; X_1 += (ks_1 + 3) diff --git a/heat/core/tests/test_random.py b/heat/core/tests/test_random.py index 68ebbfe593..1932067f98 100644 --- a/heat/core/tests/test_random.py +++ b/heat/core/tests/test_random.py @@ -1,7 +1,9 @@ +import time import unittest import heat as ht import numpy as np +import matplotlib.pyplot as plt class TestTensor(unittest.TestCase): @@ -82,7 +84,21 @@ def test_rand(self): self.assertTrue((counts == 1).all()) def test_randint(self): - pass + a = ht.random.rand(1000, 1000) + b = a.numpy() + plt.imshow(b) + plt.gray() + plt.show() def test_randn(self): - pass + t1 = time.time() + a = ht.random.rand(1000, 1000, split=1) + t2 = time.time() + print('time taken', t2-t1) + self.fail() + + def test_read(self): + a = np.load('../../../all_rounds.npy') + plt.imshow(a) + plt.gray() + plt.show() \ No newline at end of file From e9d2f9b5eacaf69f4c7f58d88b13c03667861b1a Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 6 Sep 2019 11:18:41 +0200 Subject: [PATCH 13/24] Fixed the randn and randint functions and added test cases for both of them --- heat/core/manipulation.py | 0 heat/core/random.py | 14 +++-- heat/core/tests/test_random.py | 109 ++++++++++++++++++++++++++++----- 3 files changed, 101 insertions(+), 22 deletions(-) delete mode 100644 heat/core/manipulation.py diff --git a/heat/core/manipulation.py b/heat/core/manipulation.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/heat/core/random.py b/heat/core/random.py index 5dc98f39c1..87246a0b22 100644 --- a/heat/core/random.py +++ b/heat/core/random.py @@ -320,7 +320,7 @@ def randint(low, high=None, size=None, dtype=None, split=None, device=None, comm low, high = int(low), int(high) if low >= high: raise ValueError('low >= high') - span = high - low + 1 + span = high - low # sanitize shape if size is None: @@ -333,7 +333,7 @@ def randint(low, high=None, size=None, dtype=None, split=None, device=None, comm if dtype is None: dtype = types.int64 dtype = types.canonical_heat_type(dtype) - if dtype is not types.int64 and dtype is not types.int32: + if dtype not in [types.int64, types.int32]: raise ValueError('Unsupported dtype for randint') torch_dtype = dtype.torch_type() @@ -341,16 +341,15 @@ def randint(low, high=None, size=None, dtype=None, split=None, device=None, comm split = stride_tricks.sanitize_axis(shape, split) device = devices.sanitize_device(device) comm = communication.sanitize_comm(comm) - # generate the random sequence - x_0, x_1, lshape = __counter_sequence(shape, torch_dtype, split, device, comm) + x_0, x_1, lshape, lslice = __counter_sequence(shape, dtype.torch_type(), split, device, comm) if torch_dtype is torch.int32: x_0, x_1 = __threefry32(x_0, x_1) else: x_0, x_1 = __threefry64(x_0, x_1) # stack the resulting sequence and normalize to given range - values = torch.stack([x_0, x_1], dim=1).reshape(lshape) + values = torch.stack([x_0, x_1], dim=1).flatten()[lslice].reshape(lshape) # ATTENTION: this is biased and known, bias-free rejection sampling is difficult to do in parallel values = (values.abs_() % span) + low @@ -563,7 +562,10 @@ def __threefry64(X_0, X_1): X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 48); X_1 ^= X_0 # round 5 X_0 += X_1; X_1 = (X_1 << 32) | (X_1 >> 32); X_1 ^= X_0 # round 6 - X_0 += X_1; X_1 = (X_1 << 24) | (X_1 >> 40); X_1 ^= X_0 # round 7 + + # With half of the iterations the "randomness" is already achieved and computation time is halved + + # X_0 += X_1; X_1 = (X_1 << 24) | (X_1 >> 40); X_1 ^= X_0 # round 7 # X_0 += X_1; X_1 = (X_1 << 21) | (X_1 >> 43); X_1 ^= X_0 # round 8 # # # inject key diff --git a/heat/core/tests/test_random.py b/heat/core/tests/test_random.py index 1932067f98..f91c8c03f0 100644 --- a/heat/core/tests/test_random.py +++ b/heat/core/tests/test_random.py @@ -83,22 +83,99 @@ def test_rand(self): _, counts = np.unique(c, return_counts=True) self.assertTrue((counts == 1).all()) + # Values should be spread evenly across the range [0, 1) + mean = np.mean(c) + median = np.median(c) + std = np.std(c) + self.assertTrue(0.49 < mean < 0.51) + self.assertTrue(0.49 < median < 0.51) + self.assertTrue(std < 0.3) + self.assertTrue(((0 <= c) & (c < 1)).all()) + def test_randint(self): - a = ht.random.rand(1000, 1000) - b = a.numpy() - plt.imshow(b) - plt.gray() - plt.show() + # Checked that the random values are in the correct range + a = ht.random.randint(low=0, high=10, size=(10, 10)) + a = a.numpy() + self.assertTrue(((0 <= a) & (a < 10)).all()) + + a = ht.random.randint(low=100000, high=150000, size=(31, 25, 11), split=2) + a = a.numpy() + self.assertTrue(((100000 <= a) & (a < 150000)).all()) + + # For the range [0, 1) only the value 0 is allowed + a = ht.random.randint(1, size=(10, ), split=0) + b = ht.zeros((10, ), dtype=ht.int64, split=0) + self.assertTrue(ht.equal(a, b)) + + # Two arrays with the same seed and same number of elements have the same random values + ht.random.seed(13579) + shape = (15, 13, 9, 21, 65) + a = ht.random.randint(15, 100, size=shape, split=0) + a = a.numpy().flatten() + + ht.random.seed(13579) + elements = np.prod(shape) + b = ht.random.randint(low=15, high=100, size=(elements, )) + b = b.numpy() + self.assertTrue(np.array_equal(a, b)) + + # Two arrays with the same seed and shape have identical values + ht.random.seed(13579) + a = ht.random.randint(10000, size=shape, split=2) + a = a.numpy() + + ht.random.seed(13579) + b = ht.random.randint(low=0, high=10000, size=shape, split=2) + b = b.numpy() + + self.assertTrue(np.array_equal(a, b)) + mean = np.mean(a) + median = np.median(a) + std = np.std(a) + + # Mean and median should be in the center while the std is very high due to an even distribution + self.assertTrue(4900 < mean < 5100) + self.assertTrue(4900 < median < 5100) + self.assertTrue(std < 2900) def test_randn(self): - t1 = time.time() - a = ht.random.rand(1000, 1000, split=1) - t2 = time.time() - print('time taken', t2-t1) - self.fail() - - def test_read(self): - a = np.load('../../../all_rounds.npy') - plt.imshow(a) - plt.gray() - plt.show() \ No newline at end of file + # Test that the random values have the correct distribution + ht.random.seed(54321) + shape = (5, 10, 13, 23, 15, 20) + a = ht.random.randn(*shape, split=0) + a = a.numpy() + mean = np.mean(a) + median = np.median(a) + std = np.std(a) + self.assertTrue(-0.01 < mean < 0.01) + self.assertTrue(-0.01 < median < 0.01) + self.assertTrue(0.99 < std < 1.01) + + # Compare to a second array with a different shape but same number of elements and same seed + ht.random.seed(54321) + elements = np.prod(shape) + b = ht.random.randn(elements, split=0) + b = b.numpy() + a = a.flatten() + self.assertTrue(np.array_equal(a, b)) + + # Creating the same array two times without resetting seed results in different elements + c = ht.random.randn(elements, split=0) + c = c.numpy() + self.assertEqual(c.shape, b.shape) + self.assertFalse(np.array_equal(b, c)) + + # All the created values should be different + d = np.concatenate((b, c)) + _, counts = np.unique(d, return_counts=True) + self.assertTrue((counts == 1).all()) + + # Two arrays are the same for same seed and split-axis != 0 + ht.random.seed(12345) + a = ht.random.randn(*shape, split=5) + ht.random.seed(12345) + b = ht.random.randn(*shape, split=5) + self.assertTrue(ht.equal(a, b)) + a = a.numpy() + b = b.numpy() + self.assertTrue(np.array_equal(a, b)) From 2d54f2508a8f1019245efde601d1f6f1d870dfb1 Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 6 Sep 2019 12:31:55 +0200 Subject: [PATCH 14/24] removed unnecessary imports --- heat/core/tests/test_random.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/heat/core/tests/test_random.py b/heat/core/tests/test_random.py index f91c8c03f0..7f99d47ef7 100644 --- a/heat/core/tests/test_random.py +++ b/heat/core/tests/test_random.py @@ -1,9 +1,7 @@ -import time import unittest import heat as ht import numpy as np -import matplotlib.pyplot as plt class TestTensor(unittest.TestCase): From 0d6d0e6c80cd3cb298e9c9c316dc1c5cee8310af Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 6 Sep 2019 15:14:32 +0200 Subject: [PATCH 15/24] added more negative test cases --- heat/core/tests/test_random.py | 37 ++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/heat/core/tests/test_random.py b/heat/core/tests/test_random.py index 7f99d47ef7..aa00d0beb5 100644 --- a/heat/core/tests/test_random.py +++ b/heat/core/tests/test_random.py @@ -90,6 +90,19 @@ def test_rand(self): self.assertTrue(std < 0.3) self.assertTrue(((0 <= c) & (c < 1)).all()) + # No arguments work correctly + ht.random.seed(seed) + a = ht.random.rand() + ht.random.seed(seed) + b = ht.random.rand(1) + self.assertTrue(ht.equal(a, b)) + + # To big arrays cant be created + with self.assertRaises(ValueError): + ht.random.randn(0xffffffffffffffff * 2 + 1, comm=ht.MPI_WORLD) + with self.assertRaises(ValueError): + ht.random.rand(3, 2, -2, 5, split=1, comm=ht.MPI_WORLD) + def test_randint(self): # Checked that the random values are in the correct range a = ht.random.randint(low=0, high=10, size=(10, 10)) @@ -126,6 +139,10 @@ def test_randint(self): b = ht.random.randint(low=0, high=10000, size=shape, split=2) b = b.numpy() + ht.random.seed(13579) + c = ht.random.randint(low=0, high=10000) + self.assertTrue(np.equal(b[0, 0, 0, 0, 0], c)) + self.assertTrue(np.array_equal(a, b)) mean = np.mean(a) median = np.median(a) @@ -136,6 +153,13 @@ def test_randint(self): self.assertTrue(4900 < median < 5100) self.assertTrue(std < 2900) + with self.assertRaises(ValueError): + ht.random.randint(5, 5, size=(10, 10), split=0) + with self.assertRaises(ValueError): + ht.random.randint(low=0, high=10, size=(3, -4)) + with self.assertRaises(ValueError): + ht.random.randint(low=0, high=10, size=(15, ), dtype=ht.float32) + def test_randn(self): # Test that the random values have the correct distribution ht.random.seed(54321) @@ -177,3 +201,16 @@ def test_randn(self): a = a.numpy() b = b.numpy() self.assertTrue(np.array_equal(a, b)) + + def test_set_state(self): + ht.random.set_state(('Threefry', 12345, 0xfff)) + self.assertEqual(ht.random.get_state(), ('Threefry', 12345, 0xfff, 0, 0.0)) + + ht.random.set_state(('Threefry', 55555, 0xffffffffffffff, 'for', 'compatibility')) + self.assertEqual(ht.random.get_state(), ('Threefry', 55555, 0xffffffffffffff, 0, 0.0)) + + with self.assertRaises(ValueError): + ht.random.set_state(('Thrfry', 12, 0xf)) + with self.assertRaises(ValueError): + ht.random.set_state(('Threefry', 12345)) + From 8b46d135f9739ca83a1643f6b1ef1bc2b192c9ed Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 6 Sep 2019 15:15:16 +0200 Subject: [PATCH 16/24] fixed a bug --- heat/core/tests/test_random.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heat/core/tests/test_random.py b/heat/core/tests/test_random.py index aa00d0beb5..1b2e361c05 100644 --- a/heat/core/tests/test_random.py +++ b/heat/core/tests/test_random.py @@ -211,6 +211,6 @@ def test_set_state(self): with self.assertRaises(ValueError): ht.random.set_state(('Thrfry', 12, 0xf)) - with self.assertRaises(ValueError): + with self.assertRaises(TypeError): ht.random.set_state(('Threefry', 12345)) From 2ad7aa937ead63af0ab5ddca8eb8be16ae95146d Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 6 Sep 2019 15:18:12 +0200 Subject: [PATCH 17/24] renewed the function description --- heat/core/random.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/heat/core/random.py b/heat/core/random.py index 87246a0b22..c948bb126d 100644 --- a/heat/core/random.py +++ b/heat/core/random.py @@ -67,6 +67,7 @@ def __counter_sequence(shape, dtype, split, device, comm): c_1 = __counter & max_count total_elements = np.prod(shape) + print('total', hex(total_elements), 'max', hex(2*max_count)) if total_elements > 2 * max_count: raise ValueError('Shape is to big with {} elements'.format(total_elements)) @@ -287,7 +288,8 @@ def randint(low, high=None, size=None, dtype=None, split=None, device=None, comm """ Random values in a given shape. - Create a tensor of the given shape and populate it with random samples from a uniform distribution over [0, 1). + Create a tensor of the given shape and populate it with random integer samples from a uniform distribution over + [low, high) or [0, low) if high is not provided. Parameters ---------- From 1471d901d0bab5d1082baa0205f6cd084684111f Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 9 Sep 2019 13:23:26 +0200 Subject: [PATCH 18/24] implemented rand for float32 --- heat/core/random.py | 34 ++++++++++++++++--------- heat/core/tests/test_random.py | 45 +++++++++++++++++++++++++++++++++- 2 files changed, 67 insertions(+), 12 deletions(-) diff --git a/heat/core/random.py b/heat/core/random.py index c948bb126d..1c9f49310c 100644 --- a/heat/core/random.py +++ b/heat/core/random.py @@ -67,7 +67,6 @@ def __counter_sequence(shape, dtype, split, device, comm): c_1 = __counter & max_count total_elements = np.prod(shape) - print('total', hex(total_elements), 'max', hex(2*max_count)) if total_elements > 2 * max_count: raise ValueError('Shape is to big with {} elements'.format(total_elements)) @@ -152,8 +151,8 @@ def __counter_sequence(shape, dtype, split, device, comm): # Correctly increase the counter variable used_values = int(np.ceil(total_elements / 2)) # Increase counter but not over 128 bit - tmp_counter += used_values & 0xffffffffffffffffffffffffffffffff # 128bit mask - __counter = tmp_counter + tmp_counter += used_values + __counter = tmp_counter & 0xffffffffffffffffffffffffffffffff # 128bit mask return x_0, x_1, lshape, lslice @@ -237,7 +236,7 @@ def __kundu_transform(values): return (torch.log(-torch.log(1 - values ** 0.0775)) - 1.0821) * __KUNDU_INVERSE -def rand(*args, split=None, device=None, comm=None): +def rand(*args, dtype=types.float64, split=None, device=None, comm=None): """ Random values in a given shape. @@ -275,13 +274,23 @@ def rand(*args, split=None, device=None, comm=None): comm = communication.sanitize_comm(comm) # generate the random sequence - x_0, x_1, lshape, lslice = __counter_sequence(shape, torch.int64, split, device, comm) - x_0, x_1 = __threefry64(x_0, x_1) + if dtype == types.float32: + x_0, x_1, lshape, lslice = __counter_sequence(shape, torch.int32, split, device, comm) + x_0, x_1 = __threefry32(x_0, x_1) + + # combine the values into one tensor and convert them to floats + values = __int32_to_float32(torch.stack([x_0, x_1], dim=1).flatten()[lslice]).reshape(lshape) + elif dtype == types.float64: + x_0, x_1, lshape, lslice = __counter_sequence(shape, torch.int64, split, device, comm) + x_0, x_1 = __threefry64(x_0, x_1) - # combine the values into one tensor and convert them to floats - values = __int64_to_float64(torch.stack([x_0, x_1], dim=1).flatten()[lslice]).reshape(lshape) + # combine the values into one tensor and convert them to floats + values = __int64_to_float64(torch.stack([x_0, x_1], dim=1).flatten()[lslice]).reshape(lshape) + else: + # Unsupported type + raise ValueError('dtype is none of ht.float32 or ht.float64 but was {}'.format(dtype)) - return dndarray.DNDarray(values, shape, types.float64, split, device, comm) + return dndarray.DNDarray(values, shape, dtype, split, device, comm) def randint(low, high=None, size=None, dtype=None, split=None, device=None, comm=None): @@ -479,9 +488,12 @@ def __threefry32(X_0, X_1): """ samples = len(X_0) + # Seed is > 32 bit + seed_32 = __seed & 0xffffffff + # set up key buffer - ks_0 = torch.full((samples,), __seed, dtype=torch.int32) - ks_1 = torch.full((samples,), __seed, dtype=torch.int32) + ks_0 = torch.full((samples,), seed_32, dtype=torch.int32) + ks_1 = torch.full((samples,), seed_32, dtype=torch.int32) ks_2 = torch.full((samples,), 466688986, dtype=torch.int32) ks_2 ^= ks_0 ks_2 ^= ks_0 diff --git a/heat/core/tests/test_random.py b/heat/core/tests/test_random.py index 1b2e361c05..4f3c121944 100644 --- a/heat/core/tests/test_random.py +++ b/heat/core/tests/test_random.py @@ -4,7 +4,7 @@ import numpy as np -class TestTensor(unittest.TestCase): +class TestRandom(unittest.TestCase): def test_rand(self): # int64 tests @@ -103,6 +103,49 @@ def test_rand(self): with self.assertRaises(ValueError): ht.random.rand(3, 2, -2, 5, split=1, comm=ht.MPI_WORLD) + # 32 Bit tests + ht.random.seed(9876) + shape = (13, 43, 13, 23) + a = ht.random.rand(*shape, dtype=ht.float32, split=0, comm=ht.MPI_WORLD) + self.assertEqual(a.dtype, ht.float32) + + ht.random.seed(9876) + b = ht.random.rand(np.prod(shape), dtype=ht.float32, comm=ht.MPI_WORLD) + a = a.numpy().flatten() + b = b._DNDarray__array.numpy() + self.assertTrue(np.array_equal(a, b)) + + a = ht.random.rand(21, 16, 17, 21, dtype=ht.float32, split=2, comm=ht.MPI_WORLD) + b = ht.random.rand(15, 11, 19, 31, dtype=ht.float32, split=0, comm=ht.MPI_WORLD) + a = a.numpy().flatten() + b = b.numpy().flatten() + c = np.concatenate((a, b)) + + _, counts = np.unique(c, return_counts=True) + # Values somehow repeat quite often (bad key or shifts?) + # self.assertTrue((counts == 1).all()) # TODO fails + + # Values should be spread evenly across the range [0, 1) + mean = np.mean(c) + median = np.median(c) + std = np.std(c) + self.assertTrue(0.49 < mean < 0.51) + self.assertTrue(0.49 < median < 0.51) + self.assertTrue(std < 0.3) + self.assertTrue(((0 <= c) & (c < 1)).all()) + + ht.random.seed(11111) + a = ht.random.rand(12, 32, 44, split=1, dtype=ht.float32, comm=ht.MPI_WORLD).numpy() + # Overflow reached + ht.random.set_state(('Threefry', 11111, 0x10000000000000000)) + b = ht.random.rand(12, 32, 44, split=1, dtype=ht.float32, comm=ht.MPI_WORLD).numpy() + self.assertTrue(np.array_equal(a, b)) + + ht.random.set_state(('Threefry', 11111, 0x100000000)) + c = ht.random.rand(12, 32, 44, split=1, dtype=ht.float32, comm=ht.MPI_WORLD).numpy() + self.assertFalse(np.array_equal(a, c)) + self.assertFalse(np.array_equal(b, c)) + def test_randint(self): # Checked that the random values are in the correct range a = ht.random.randint(low=0, high=10, size=(10, 10)) From 47a781e47a5e2b63cff77e34f4bf56dff857c468 Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 9 Sep 2019 13:42:51 +0200 Subject: [PATCH 19/24] added test cases for randint with int32 --- heat/core/random.py | 2 +- heat/core/tests/test_random.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/heat/core/random.py b/heat/core/random.py index 1c9f49310c..f6e941d80f 100644 --- a/heat/core/random.py +++ b/heat/core/random.py @@ -356,7 +356,7 @@ def randint(low, high=None, size=None, dtype=None, split=None, device=None, comm x_0, x_1, lshape, lslice = __counter_sequence(shape, dtype.torch_type(), split, device, comm) if torch_dtype is torch.int32: x_0, x_1 = __threefry32(x_0, x_1) - else: + else: # torch.int64 x_0, x_1 = __threefry64(x_0, x_1) # stack the resulting sequence and normalize to given range diff --git a/heat/core/tests/test_random.py b/heat/core/tests/test_random.py index 4f3c121944..d87e7715b2 100644 --- a/heat/core/tests/test_random.py +++ b/heat/core/tests/test_random.py @@ -203,6 +203,38 @@ def test_randint(self): with self.assertRaises(ValueError): ht.random.randint(low=0, high=10, size=(15, ), dtype=ht.float32) + # int32 tests + ht.random.seed(4545) + a = ht.random.randint(50, 1000, size=(13, 45), dtype=ht.int32, split=0, comm=ht.MPI_WORLD) + ht.random.set_state(('Threefry', 4545, 0x10000000000000000)) + b = ht.random.randint(50, 1000, size=(13, 45), dtype=ht.int32, split=0, comm=ht.MPI_WORLD) + + self.assertEqual(a.dtype, ht.int32) + self.assertEqual(b.dtype, ht.int32) + a = a.numpy() + b = b.numpy() + self.assertTrue(np.array_equal(a, b)) + self.assertTrue(((50 <= a) & (a < 1000)).all()) + self.assertTrue(((50 <= b) & (b < 1000)).all()) + + c = ht.random.randint(50, 1000, size=(13, 45), dtype=ht.int32, split=0, comm=ht.MPI_WORLD) + c = c.numpy() + self.assertFalse(np.array_equal(a, c)) + self.assertFalse(np.array_equal(b, c)) + self.assertTrue(((50 <= c) & (c < 1000)).all()) + + ht.random.seed(0xfffffff) + a = ht.random.randint(10000, size=(123, 42, 13, 21), split=3, dtype=ht.int32, comm=ht.MPI_WORLD) + a = a.numpy() + mean = np.mean(a) + median = np.median(a) + std = np.std(a) + + # Mean and median should be in the center while the std is very high due to an even distribution + self.assertTrue(4900 < mean < 5100) + self.assertTrue(4900 < median < 5100) + self.assertTrue(std < 2900) + def test_randn(self): # Test that the random values have the correct distribution ht.random.seed(54321) From 4f14b43a289b060bb0b7791a349d53e6c6b03122 Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 9 Sep 2019 14:05:03 +0200 Subject: [PATCH 20/24] added tests for randn with float32 --- heat/core/random.py | 16 ++++++++++------ heat/core/tests/test_random.py | 35 +++++++++++++++++++++++++++++++++- 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/heat/core/random.py b/heat/core/random.py index f6e941d80f..fa0eb4fbb3 100644 --- a/heat/core/random.py +++ b/heat/core/random.py @@ -247,6 +247,8 @@ def rand(*args, dtype=types.float64, split=None, device=None, comm=None): d0, d1, …, dn : int, optional The dimensions of the returned array, should all be positive. If no argument is given a single random samples is generated. + dtype: ht.types, optional + The datatype of the returned values. Has to be one of [ht.float32, ht.float64]. Default is ht.float64. split: int, optional The axis along which the array is split and distributed, defaults to None (no distribution). device : str or None, optional @@ -256,7 +258,7 @@ def rand(*args, dtype=types.float64, split=None, device=None, comm=None): Returns ------- - out : ndarray, shape (d0, d1, ..., dn) + out : ht.dndarray, shape (d0, d1, ..., dn) The uniformly distributed [0.0, 1.0)-bound random values. """ # if args are not set, generate a single sample @@ -321,7 +323,7 @@ def randint(low, high=None, size=None, dtype=None, split=None, device=None, comm Returns ------- - out : ndarray, shape (d0, d1, ..., dn) + out : ht.dndarray, shape (d0, d1, ..., dn) The uniformly distributed [0.0, 1.0)-bound random values. """ # determine range bounds @@ -367,7 +369,7 @@ def randint(low, high=None, size=None, dtype=None, split=None, device=None, comm return dndarray.DNDarray(values, shape, dtype, split, device, comm) -def randn(*args, split=None, device=None, comm=None): +def randn(*args, dtype=types.float64, split=None, device=None, comm=None): """ Returns a tensor filled with random numbers from a standard normal distribution with zero mean and variance of one. @@ -375,6 +377,8 @@ def randn(*args, split=None, device=None, comm=None): ---------- d0, d1, …, dn : int, optional The dimensions of the returned array, should be all positive. + dtype: ht.types, optional + The datatype of the returned values. Has to be one of [ht.float32, ht.float64]. Default is ht.float64. split: int, optional The axis along which the array is split and distributed, defaults to None (no distribution). device : str or None, optional @@ -384,8 +388,8 @@ def randn(*args, split=None, device=None, comm=None): Returns ------- - broadcast_shape : tuple of ints - the broadcast shape + out : ht.dndarray, shape (d0, d1, ..., dn) + The normal distributed random values. Raises ------- @@ -406,7 +410,7 @@ def randn(*args, split=None, device=None, comm=None): [ 0.1260, 1.2126, -0.0804, 0.0907]]) """ # generate uniformly distributed random numbers first - normal_tensor = rand(*args, split=split, device=device, comm=comm) + normal_tensor = rand(*args, dtype=dtype, split=split, device=device, comm=comm) # convert the the values to a normal distribution using the kundu transform normal_tensor._DNDarray__array = __kundu_transform(normal_tensor._DNDarray__array) diff --git a/heat/core/tests/test_random.py b/heat/core/tests/test_random.py index d87e7715b2..359b9d6241 100644 --- a/heat/core/tests/test_random.py +++ b/heat/core/tests/test_random.py @@ -1,5 +1,7 @@ import unittest +import torch + import heat as ht import numpy as np @@ -12,10 +14,12 @@ def test_rand(self): seed = 12345 ht.random.seed(seed) a = ht.random.rand(2, 5, 7, 3, split=0, comm=ht.MPI_WORLD) + self.assertEqual(a.dtype, ht.float64) + self.assertEqual(a._DNDarray__array.dtype, torch.float64) b = ht.random.rand(2, 5, 7, 3, split=0, comm=ht.MPI_WORLD) self.assertFalse(ht.equal(a, b)) ht.random.seed(seed) - c = ht.random.rand(2, 5, 7, 3, split=0, comm=ht.MPI_WORLD) + c = ht.random.rand(2, 5, 7, 3, dtype=ht.float64, split=0, comm=ht.MPI_WORLD) self.assertTrue(ht.equal(a, c)) # Random numbers with overflow @@ -25,6 +29,7 @@ def test_rand(self): b = ht.random.rand(2, 44, split=0, comm=ht.MPI_WORLD) a = a.numpy().flatten() b = b.numpy().flatten() + self.assertEqual(a.dtype, np.float64) self.assertTrue(np.array_equal(a[32:], b)) # Check that random numbers don't repeat after first overflow @@ -108,12 +113,14 @@ def test_rand(self): shape = (13, 43, 13, 23) a = ht.random.rand(*shape, dtype=ht.float32, split=0, comm=ht.MPI_WORLD) self.assertEqual(a.dtype, ht.float32) + self.assertEqual(a._DNDarray__array.dtype, torch.float32) ht.random.seed(9876) b = ht.random.rand(np.prod(shape), dtype=ht.float32, comm=ht.MPI_WORLD) a = a.numpy().flatten() b = b._DNDarray__array.numpy() self.assertTrue(np.array_equal(a, b)) + self.assertEqual(a.dtype, np.float32) a = ht.random.rand(21, 16, 17, 21, dtype=ht.float32, split=2, comm=ht.MPI_WORLD) b = ht.random.rand(15, 11, 19, 31, dtype=ht.float32, split=0, comm=ht.MPI_WORLD) @@ -149,6 +156,7 @@ def test_rand(self): def test_randint(self): # Checked that the random values are in the correct range a = ht.random.randint(low=0, high=10, size=(10, 10)) + self.assertEqual(a.dtype, ht.int64) a = a.numpy() self.assertTrue(((0 <= a) & (a < 10)).all()) @@ -210,9 +218,11 @@ def test_randint(self): b = ht.random.randint(50, 1000, size=(13, 45), dtype=ht.int32, split=0, comm=ht.MPI_WORLD) self.assertEqual(a.dtype, ht.int32) + self.assertEqual(a._DNDarray__array.dtype, torch.int32) self.assertEqual(b.dtype, ht.int32) a = a.numpy() b = b.numpy() + self.assertEqual(a.dtype, np.int32) self.assertTrue(np.array_equal(a, b)) self.assertTrue(((50 <= a) & (a < 1000)).all()) self.assertTrue(((50 <= b) & (b < 1000)).all()) @@ -240,6 +250,7 @@ def test_randn(self): ht.random.seed(54321) shape = (5, 10, 13, 23, 15, 20) a = ht.random.randn(*shape, split=0) + self.assertEqual(a.dtype, ht.float64) a = a.numpy() mean = np.mean(a) median = np.median(a) @@ -277,6 +288,28 @@ def test_randn(self): b = b.numpy() self.assertTrue(np.array_equal(a, b)) + # Tests with float32 + ht.random.seed(54321) + a = ht.random.randn(30, 30, 30, dtype=ht.float32, split=2, comm=ht.MPI_WORLD) + self.assertEqual(a.dtype, ht.float32) + self.assertEqual(a._DNDarray__array[0, 0, 0].dtype, torch.float32) + a = a.numpy() + self.assertEqual(a.dtype, np.float32) + mean = np.mean(a) + median = np.median(a) + std = np.std(a) + self.assertTrue(-0.01 < mean < 0.01) + self.assertTrue(-0.01 < median < 0.01) + self.assertTrue(0.99 < std < 1.01) + + ht.random.set_state(('Threefry', 54321, 0x10000000000000000)) + b = ht.random.randn(30, 30, 30, dtype=ht.float32, split=2, comm=ht.MPI_WORLD).numpy() + self.assertTrue(np.array_equal(a, b)) + + c = ht.random.randn(30, 30, 30, dtype=ht.float32, split=2, comm=ht.MPI_WORLD).numpy() + self.assertFalse(np.array_equal(a, c)) + self.assertFalse(np.array_equal(b, c)) + def test_set_state(self): ht.random.set_state(('Threefry', 12345, 0xfff)) self.assertEqual(ht.random.get_state(), ('Threefry', 12345, 0xfff, 0, 0.0)) From 9e941129e53947378d5daece5388ab9abc56e18e Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 9 Sep 2019 14:16:36 +0200 Subject: [PATCH 21/24] added one more test for wrong type input --- heat/core/tests/test_random.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/heat/core/tests/test_random.py b/heat/core/tests/test_random.py index 359b9d6241..c4c772cf49 100644 --- a/heat/core/tests/test_random.py +++ b/heat/core/tests/test_random.py @@ -107,6 +107,8 @@ def test_rand(self): ht.random.randn(0xffffffffffffffff * 2 + 1, comm=ht.MPI_WORLD) with self.assertRaises(ValueError): ht.random.rand(3, 2, -2, 5, split=1, comm=ht.MPI_WORLD) + with self.assertRaises(ValueError): + ht.random.randn(12, 43, dtype=ht.int32, split=0, comm=ht.MPI_WORLD) # 32 Bit tests ht.random.seed(9876) From a106e4697d8d74821315541e3695538bb932bab1 Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 9 Sep 2019 15:24:43 +0200 Subject: [PATCH 22/24] trying to fix threefry with 32 bit --- heat/core/random.py | 95 +++++++++++++++++++--------------- heat/core/tests/test_random.py | 13 ++++- 2 files changed, 65 insertions(+), 43 deletions(-) diff --git a/heat/core/random.py b/heat/core/random.py index fa0eb4fbb3..d7eb154f7d 100644 --- a/heat/core/random.py +++ b/heat/core/random.py @@ -277,8 +277,13 @@ def rand(*args, dtype=types.float64, split=None, device=None, comm=None): # generate the random sequence if dtype == types.float32: - x_0, x_1, lshape, lslice = __counter_sequence(shape, torch.int32, split, device, comm) + x_0, x_1, lshape, lslice = __counter_sequence(shape, torch.int64, split, device, comm) x_0, x_1 = __threefry32(x_0, x_1) + mask = 0x7fffffff + x_0 &= mask + x_1 &= mask + x_0 = torch.tensor(x_0, dtype=torch.int32) + x_1 = torch.tensor(x_1, dtype=torch.int32) # combine the values into one tensor and convert them to floats values = __int32_to_float32(torch.stack([x_0, x_1], dim=1).flatten()[lslice]).reshape(lshape) @@ -490,47 +495,53 @@ def __threefry32(X_0, X_1): Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis, p. 16, 2011 """ - samples = len(X_0) - - # Seed is > 32 bit - seed_32 = __seed & 0xffffffff - - # set up key buffer - ks_0 = torch.full((samples,), seed_32, dtype=torch.int32) - ks_1 = torch.full((samples,), seed_32, dtype=torch.int32) - ks_2 = torch.full((samples,), 466688986, dtype=torch.int32) - ks_2 ^= ks_0 - ks_2 ^= ks_0 - - # initialize output using the key - X_0 += ks_0 - X_1 += ks_1 - - # perform rounds - X_0 += X_1; X_1 = (X_1 << 13) | (X_1 >> 19); X_1 ^= X_0 # round 1 - X_0 += X_1; X_1 = (X_1 << 15) | (X_1 >> 17); X_1 ^= X_0 # round 2 - X_0 += X_1; X_1 = (X_1 << 26) | (X_1 >> 6); X_1 ^= X_0 # round 3 - X_0 += X_1; X_1 = (X_1 << 6) | (X_1 >> 26); X_1 ^= X_0 # round 4 - - # inject key - X_0 += ks_1; X_1 += (ks_2 + 1) - - X_0 += X_1; X_1 = (X_1 << 17) | (X_1 >> 15); X_1 ^= X_0 # round 5 - X_0 += X_1; X_1 = (X_1 << 29) | (X_1 >> 3); X_1 ^= X_0 # round 6 - X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 16); X_1 ^= X_0 # round 7 - X_0 += X_1; X_1 = (X_1 << 24) | (X_1 >> 8); X_1 ^= X_0 # round 8 - - # inject key - X_0 += ks_2; X_1 += (ks_0 + 2) - - X_0 += X_1; X_1 = (X_1 << 13) | (X_1 >> 19); X_1 ^= X_0 # round 9 - X_0 += X_1; X_1 = (X_1 << 15) | (X_1 >> 17); X_1 ^= X_0 # round 10 - X_0 += X_1; X_1 = (X_1 << 26) | (X_1 >> 6); X_1 ^= X_0 # round 11 - X_0 += X_1; X_1 = (X_1 << 6) | (X_1 >> 26); X_1 ^= X_0 # round 12 - - # inject key - X_0 += ks_0; X_1 += (ks_1 + 3) - + # samples = len(X_0) + # + # # Seed is > 32 bit + # seed_32 = __seed & 0xffffffff + # + # # set up key buffer + # ks_0 = torch.full((samples,), seed_32, dtype=torch.int32) + # ks_1 = torch.full((samples,), seed_32, dtype=torch.int32) + # ks_2 = torch.full((samples,), 466688986, dtype=torch.int32) + # ks_2 ^= ks_0 + # ks_2 ^= ks_0 + # + # # initialize output using the key + # X_0 += ks_0 + # X_1 += ks_1 + # + # # perform rounds + # X_0 += X_1; X_1 = (X_1 << 13) | (X_1 >> 19); X_1 ^= X_0 # round 1 + # X_0 += X_1; X_1 = (X_1 << 15) | (X_1 >> 17); X_1 ^= X_0 # round 2 + # X_0 += X_1; X_1 = (X_1 << 26) | (X_1 >> 6); X_1 ^= X_0 # round 3 + # X_0 += X_1; X_1 = (X_1 << 6) | (X_1 >> 26); X_1 ^= X_0 # round 4 + # + # # inject key + # X_0 += ks_1; X_1 += (ks_2 + 1) + # + # X_0 += X_1; X_1 = (X_1 << 17) | (X_1 >> 15); X_1 ^= X_0 # round 5 + # X_0 += X_1; X_1 = (X_1 << 29) | (X_1 >> 3); X_1 ^= X_0 # round 6 + # X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 16); X_1 ^= X_0 # round 7 + # X_0 += X_1; X_1 = (X_1 << 24) | (X_1 >> 8); X_1 ^= X_0 # round 8 + # + # # inject key + # X_0 += ks_2; X_1 += (ks_0 + 2) + # + # X_0 += X_1; X_1 = (X_1 << 13) | (X_1 >> 19); X_1 ^= X_0 # round 9 + # X_0 += X_1; X_1 = (X_1 << 15) | (X_1 >> 17); X_1 ^= X_0 # round 10 + # X_0 += X_1; X_1 = (X_1 << 26) | (X_1 >> 6); X_1 ^= X_0 # round 11 + # X_0 += X_1; X_1 = (X_1 << 6) | (X_1 >> 26); X_1 ^= X_0 # round 12 + # + # # inject key + # X_0 += ks_0; X_1 += (ks_1 + 3) + + X_0, X_1 = __threefry64(X_0=X_0, X_1=X_1) + mask = 0xffffffff + X_0 &= mask + X_1 &= mask + X_0 = torch.tensor(X_0, dtype=torch.int32) + X_1 = torch.tensor(X_1, dtype=torch.int32) return X_0, X_1 diff --git a/heat/core/tests/test_random.py b/heat/core/tests/test_random.py index c4c772cf49..eb1ff2ef2a 100644 --- a/heat/core/tests/test_random.py +++ b/heat/core/tests/test_random.py @@ -132,7 +132,8 @@ def test_rand(self): _, counts = np.unique(c, return_counts=True) # Values somehow repeat quite often (bad key or shifts?) - # self.assertTrue((counts == 1).all()) # TODO fails + print('len', len(counts[np.where(counts!=1)])) + self.assertTrue((counts == 1).all()) # TODO fails # Values should be spread evenly across the range [0, 1) mean = np.mean(c) @@ -155,6 +156,16 @@ def test_rand(self): self.assertFalse(np.array_equal(a, c)) self.assertFalse(np.array_equal(b, c)) + def test_exp(self): + a = ht.random.rand(1000000, dtype=ht.float32) + a = a.numpy() + # import matplotlib.pyplot as plt + # plt.hist(a, bins=100000) + # plt.show() + _, counts = np.unique(a, return_counts=True) + print('counts', len(counts[np.where(counts!=1)])) + self.fail() + def test_randint(self): # Checked that the random values are in the correct range a = ht.random.randint(low=0, high=10, size=(10, 10)) From ca8f64a6e8ba45662eb345bcef0a9adf80248a26 Mon Sep 17 00:00:00 2001 From: simon Date: Thu, 12 Sep 2019 10:51:33 +0200 Subject: [PATCH 23/24] threefry32 is now done --- heat/core/random.py | 95 +++++++++++++++------------------- heat/core/tests/test_random.py | 15 ------ 2 files changed, 42 insertions(+), 68 deletions(-) diff --git a/heat/core/random.py b/heat/core/random.py index d7eb154f7d..844e73c021 100644 --- a/heat/core/random.py +++ b/heat/core/random.py @@ -277,13 +277,8 @@ def rand(*args, dtype=types.float64, split=None, device=None, comm=None): # generate the random sequence if dtype == types.float32: - x_0, x_1, lshape, lslice = __counter_sequence(shape, torch.int64, split, device, comm) + x_0, x_1, lshape, lslice = __counter_sequence(shape, torch.int32, split, device, comm) x_0, x_1 = __threefry32(x_0, x_1) - mask = 0x7fffffff - x_0 &= mask - x_1 &= mask - x_0 = torch.tensor(x_0, dtype=torch.int32) - x_1 = torch.tensor(x_1, dtype=torch.int32) # combine the values into one tensor and convert them to floats values = __int32_to_float32(torch.stack([x_0, x_1], dim=1).flatten()[lslice]).reshape(lshape) @@ -495,53 +490,47 @@ def __threefry32(X_0, X_1): Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis, p. 16, 2011 """ - # samples = len(X_0) - # - # # Seed is > 32 bit - # seed_32 = __seed & 0xffffffff - # - # # set up key buffer - # ks_0 = torch.full((samples,), seed_32, dtype=torch.int32) - # ks_1 = torch.full((samples,), seed_32, dtype=torch.int32) - # ks_2 = torch.full((samples,), 466688986, dtype=torch.int32) - # ks_2 ^= ks_0 - # ks_2 ^= ks_0 - # - # # initialize output using the key - # X_0 += ks_0 - # X_1 += ks_1 - # - # # perform rounds - # X_0 += X_1; X_1 = (X_1 << 13) | (X_1 >> 19); X_1 ^= X_0 # round 1 - # X_0 += X_1; X_1 = (X_1 << 15) | (X_1 >> 17); X_1 ^= X_0 # round 2 - # X_0 += X_1; X_1 = (X_1 << 26) | (X_1 >> 6); X_1 ^= X_0 # round 3 - # X_0 += X_1; X_1 = (X_1 << 6) | (X_1 >> 26); X_1 ^= X_0 # round 4 - # - # # inject key - # X_0 += ks_1; X_1 += (ks_2 + 1) - # - # X_0 += X_1; X_1 = (X_1 << 17) | (X_1 >> 15); X_1 ^= X_0 # round 5 - # X_0 += X_1; X_1 = (X_1 << 29) | (X_1 >> 3); X_1 ^= X_0 # round 6 - # X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 16); X_1 ^= X_0 # round 7 - # X_0 += X_1; X_1 = (X_1 << 24) | (X_1 >> 8); X_1 ^= X_0 # round 8 - # - # # inject key - # X_0 += ks_2; X_1 += (ks_0 + 2) - # - # X_0 += X_1; X_1 = (X_1 << 13) | (X_1 >> 19); X_1 ^= X_0 # round 9 - # X_0 += X_1; X_1 = (X_1 << 15) | (X_1 >> 17); X_1 ^= X_0 # round 10 - # X_0 += X_1; X_1 = (X_1 << 26) | (X_1 >> 6); X_1 ^= X_0 # round 11 - # X_0 += X_1; X_1 = (X_1 << 6) | (X_1 >> 26); X_1 ^= X_0 # round 12 - # - # # inject key - # X_0 += ks_0; X_1 += (ks_1 + 3) - - X_0, X_1 = __threefry64(X_0=X_0, X_1=X_1) - mask = 0xffffffff - X_0 &= mask - X_1 &= mask - X_0 = torch.tensor(X_0, dtype=torch.int32) - X_1 = torch.tensor(X_1, dtype=torch.int32) + samples = len(X_0) + + # Seed is > 32 bit + seed_32 = __seed & 0x7fffffff + + # set up key buffer + ks_0 = torch.full((samples,), seed_32, dtype=torch.int32) + ks_1 = torch.full((samples,), seed_32, dtype=torch.int32) + ks_2 = torch.full((samples,), 466688986, dtype=torch.int32) + ks_2 ^= ks_0 + ks_2 ^= ks_0 + + # initialize output using the key + X_0 += ks_0 + X_1 += ks_1 + + # perform rounds + X_0 += X_1; X_1 = (X_1 << 13) | (X_1 >> 19); X_1 ^= X_0 # round 1 + X_0 += X_1; X_1 = (X_1 << 15) | (X_1 >> 17); X_1 ^= X_0 # round 2 + X_0 += X_1; X_1 = (X_1 << 26) | (X_1 >> 6); X_1 ^= X_0 # round 3 + X_0 += X_1; X_1 = (X_1 << 6) | (X_1 >> 26); X_1 ^= X_0 # round 4 + + # inject key + X_0 += ks_1; X_1 += (ks_2 + 1) + + X_0 += X_1; X_1 = (X_1 << 17) | (X_1 >> 15); X_1 ^= X_0 # round 5 + X_0 += X_1; X_1 = (X_1 << 29) | (X_1 >> 3); X_1 ^= X_0 # round 6 + X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 16); X_1 ^= X_0 # round 7 + X_0 += X_1; X_1 = (X_1 << 24) | (X_1 >> 8); X_1 ^= X_0 # round 8 + + # inject key + X_0 += ks_2; X_1 += (ks_0 + 2) + + X_0 += X_1; X_1 = (X_1 << 13) | (X_1 >> 19); X_1 ^= X_0 # round 9 + X_0 += X_1; X_1 = (X_1 << 15) | (X_1 >> 17); X_1 ^= X_0 # round 10 + X_0 += X_1; X_1 = (X_1 << 26) | (X_1 >> 6); X_1 ^= X_0 # round 11 + X_0 += X_1; X_1 = (X_1 << 6) | (X_1 >> 26); X_1 ^= X_0 # round 12 + + # inject key + X_0 += ks_0; X_1 += (ks_1 + 3) + return X_0, X_1 diff --git a/heat/core/tests/test_random.py b/heat/core/tests/test_random.py index eb1ff2ef2a..7dd276a686 100644 --- a/heat/core/tests/test_random.py +++ b/heat/core/tests/test_random.py @@ -130,11 +130,6 @@ def test_rand(self): b = b.numpy().flatten() c = np.concatenate((a, b)) - _, counts = np.unique(c, return_counts=True) - # Values somehow repeat quite often (bad key or shifts?) - print('len', len(counts[np.where(counts!=1)])) - self.assertTrue((counts == 1).all()) # TODO fails - # Values should be spread evenly across the range [0, 1) mean = np.mean(c) median = np.median(c) @@ -156,16 +151,6 @@ def test_rand(self): self.assertFalse(np.array_equal(a, c)) self.assertFalse(np.array_equal(b, c)) - def test_exp(self): - a = ht.random.rand(1000000, dtype=ht.float32) - a = a.numpy() - # import matplotlib.pyplot as plt - # plt.hist(a, bins=100000) - # plt.show() - _, counts = np.unique(a, return_counts=True) - print('counts', len(counts[np.where(counts!=1)])) - self.fail() - def test_randint(self): # Checked that the random values are in the correct range a = ht.random.randint(low=0, high=10, size=(10, 10)) From e774c95acaa03254d1506dffb45a530195e3f53d Mon Sep 17 00:00:00 2001 From: simon Date: Thu, 12 Sep 2019 12:51:22 +0200 Subject: [PATCH 24/24] set rounds of the threefry algorithm to 8 for both implementations --- heat/core/random.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/heat/core/random.py b/heat/core/random.py index 844e73c021..b70cce7f55 100644 --- a/heat/core/random.py +++ b/heat/core/random.py @@ -521,12 +521,12 @@ def __threefry32(X_0, X_1): X_0 += X_1; X_1 = (X_1 << 24) | (X_1 >> 8); X_1 ^= X_0 # round 8 # inject key - X_0 += ks_2; X_1 += (ks_0 + 2) - - X_0 += X_1; X_1 = (X_1 << 13) | (X_1 >> 19); X_1 ^= X_0 # round 9 - X_0 += X_1; X_1 = (X_1 << 15) | (X_1 >> 17); X_1 ^= X_0 # round 10 - X_0 += X_1; X_1 = (X_1 << 26) | (X_1 >> 6); X_1 ^= X_0 # round 11 - X_0 += X_1; X_1 = (X_1 << 6) | (X_1 >> 26); X_1 ^= X_0 # round 12 + # X_0 += ks_2; X_1 += (ks_0 + 2) + # + # X_0 += X_1; X_1 = (X_1 << 13) | (X_1 >> 19); X_1 ^= X_0 # round 9 + # X_0 += X_1; X_1 = (X_1 << 15) | (X_1 >> 17); X_1 ^= X_0 # round 10 + # X_0 += X_1; X_1 = (X_1 << 26) | (X_1 >> 6); X_1 ^= X_0 # round 11 + # X_0 += X_1; X_1 = (X_1 << 6) | (X_1 >> 26); X_1 ^= X_0 # round 12 # inject key X_0 += ks_0; X_1 += (ks_1 + 3) @@ -580,13 +580,10 @@ def __threefry64(X_0, X_1): X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 48); X_1 ^= X_0 # round 5 X_0 += X_1; X_1 = (X_1 << 32) | (X_1 >> 32); X_1 ^= X_0 # round 6 + X_0 += X_1; X_1 = (X_1 << 24) | (X_1 >> 40); X_1 ^= X_0 # round 7 + X_0 += X_1; X_1 = (X_1 << 21) | (X_1 >> 43); X_1 ^= X_0 # round 8 - # With half of the iterations the "randomness" is already achieved and computation time is halved - - # X_0 += X_1; X_1 = (X_1 << 24) | (X_1 >> 40); X_1 ^= X_0 # round 7 - # X_0 += X_1; X_1 = (X_1 << 21) | (X_1 >> 43); X_1 ^= X_0 # round 8 - # - # # inject key + # inject key # X_0 += ks_2; X_1 += (ks_0 + 2) # # X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 48); X_1 ^= X_0 # round 9