From a38ce5d5970cb3a6960f67f24c386a117bc96f49 Mon Sep 17 00:00:00 2001
From: Markus Goetz <markus.goetz@kit.edu>
Date: Thu, 6 Jun 2019 18:18:37 +0200
Subject: [PATCH 01/24] Provided get and set state functions, reworked seed
 setting, first threefry function (32bit)

---
 heat/core/random.py | 152 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 133 insertions(+), 19 deletions(-)

diff --git a/heat/core/random.py b/heat/core/random.py
index 284f806943..87d47eff5b 100644
--- a/heat/core/random.py
+++ b/heat/core/random.py
@@ -1,32 +1,32 @@
+import time
 import torch
 
 from . import communication
 from . import devices
 from . import dndarray
-from . import types
 from . import stride_tricks
+from . import types
 
+# introduce the variables, will be correctly initialized at the end of file
+__seed = None
+__counter = None
 
-def set_gseed(seed):
-    # TODO: think about proper random number generation
-    # TODO: comment me
-    # TODO: test me
-    torch.manual_seed(seed)
-
-
-def uniform(low=0.0, high=1.0, size=None, device=None, comm=None):
-    # TODO: comment me
-    # TODO: test me
-    # TODO: make me splitable
-    # TODO: add device capabilities
-    if size is None:
-        size = (1,)
 
-    device = devices.sanitize_device(device)
-    comm = communication.sanitize_comm(comm)
-    data = torch.rand(*size, device=device.torch_device) * (high - low) + low
+def get_state():
+    """
+    Return a tuple representing the internal state of the generator.
 
-    return dndarray.DNDarray(data, size, types.float32, None, device, comm)
+    Returns
+    -------
+    out : tuple(str, int, int, int, float)
+        The returned tuple has the following items:
+            1. the string ‘Threefry’,
+            2. the Threefry key value, aka seed,
+            3. the internal counter value,
+            4. an integer has_gauss, always set to 0 (present for compatibility with numpy) and
+            5. a float cached_gaussian, always set to 0.0 (present for compatibility with numpy).
+    """
+    return 'Threefry', __seed, __counter, 0, 0.0
 
 
 def randn(*args, split=None, device=None, comm=None):
@@ -86,3 +86,117 @@ def randn(*args, split=None, device=None, comm=None):
     data = torch.randn(args, device=device.torch_device)
 
     return dndarray.DNDarray(data, gshape, types.canonical_heat_type(data.dtype), split, device, comm)
+
+
+def seed(seed=None):
+    """
+    Seed the generator.
+
+    Parameters
+    ----------
+    seed : int, optional
+        Value to seed the algorithm with, if not set a time-based seed is generated.
+    """
+    if seed is None:
+        seed = communication.MPI_WORLD.bcast(int(time.time() * 256))
+
+    global __seed, __counter
+    __seed = seed
+    __counter = 0
+    torch.manual_seed(seed)
+
+
+def set_state(state):
+    """
+    Set the internal state of the generator from a tuple.
+
+    Parameters
+    ----------
+    state : tuple(str, int, int, int, float)
+        The returned tuple has the following items:
+            1. the string ‘Threefry’,
+            2. the Threefry key value, aka seed,
+            3. the internal counter value,
+            4. an integer has_gauss, ignored (present for compatibility with numpy), optional and
+            5. a float cached_gaussian, ignored (present for compatibility with numpy), optional.
+
+    Raises
+    ------
+    TypeError
+        If and improper state is passed.
+    ValueError
+        If one of the items in the state tuple is of wrong type or value.
+    """
+    if not isinstance(state, tuple) or (len(state) != 3 and len(state) != 5):
+        raise TypeError('state needs to be a three- or five-tuple')
+
+    if state[0] != 'Threefry':
+        raise ValueError('algorithm must be "Threefry"')
+
+    global __seed, __counter
+    __seed = int(state[1])
+    __counter = int(state[2])
+
+
+def __threefry_32(num_samples):
+    samples = (num_samples + 1) // 2
+
+    # set up X, i.e. output buffer
+    X_0 = t.arange(samples, dtype=t.int32)
+    X_1 = t.arange(samples, dtype=t.int32)
+    X_0 //= t.iinfo(t.int32).max
+
+    # set up key buffer
+    ks_0 = t.full((samples,), 0, dtype=t.int32) # seed instead of 0
+    ks_1 = t.full((samples,), 0, dtype=t.int32) # seed instead of 0
+    ks_2 = t.full((samples,), 466688986, dtype=t.int32)
+    ks_2 ^= ks_0
+    ks_2 ^= ks_0
+
+    # initialize output using the key
+    X_0 += ks_0
+    X_1 += ks_1
+
+    # perform rounds
+    X_0 += X_1; X_1 = (X_1 << 13) | (X_1 >> 19); X_1 ^= X_0  # round 1
+    X_0 += X_1; X_1 = (X_1 << 15) | (X_1 >> 17); X_1 ^= X_0  # round 2
+    X_0 += X_1; X_1 = (X_1 << 26) | (X_1 >>  6); X_1 ^= X_0  # round 3
+    X_0 += X_1; X_1 = (X_1 << 6)  | (X_1 >> 26); X_1 ^= X_0  # round 4
+
+    # inject key
+    X_0 += ks_1; X_1 += (ks_2 + 1)
+
+    X_0 += X_1; X_1 = (X_1 << 17) | (X_1 >> 15); X_1 ^= X_0  # round 5
+    X_0 += X_1; X_1 = (X_1 << 29) | (X_1 >>  3); X_1 ^= X_0  # round 6
+    X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 16); X_1 ^= X_0  # round 7
+    X_0 += X_1; X_1 = (X_1 << 24) | (X_1 >>  8); X_1 ^= X_0  # round 8
+
+    # inject key
+    X_0 += ks_2; X_1 += (ks_0 + 2)
+
+    X_0 += X_1; X_1 = (X_1 << 13) | (X_1 >> 19); X_1 ^= X_0  # round 9
+    X_0 += X_1; X_1 = (X_1 << 15) | (X_1 >> 17); X_1 ^= X_0  # round 10
+    X_0 += X_1; X_1 = (X_1 << 26) | (X_1 >>  6); X_1 ^= X_0  # round 11
+    X_0 += X_1; X_1 = (X_1 <<  6) | (X_1 >> 26); X_1 ^= X_0  # round 12
+
+    # inject key
+    X_0 += ks_0; X_1 += (ks_1 + 3)
+
+
+def uniform(low=0.0, high=1.0, size=None, device=None, comm=None):
+    # TODO: comment me
+    # TODO: test me
+    # TODO: make me splitable
+    # TODO: add device capabilities
+    if size is None:
+        size = (1,)
+
+    device = devices.sanitize_device(device)
+    comm = communication.sanitize_comm(comm)
+    data = torch.rand(*size, device=device.torch_device) * (high - low) + low
+
+    return dndarray.DNDarray(data, size, types.float32, None, device, comm)
+
+
+# roll a global time-based seed
+seed()

From 08e583530b84e52f99bd8ba3531df0bd553087ea Mon Sep 17 00:00:00 2001
From: Markus Goetz <markus.goetz@kit.edu>
Date: Thu, 13 Jun 2019 11:12:59 +0200
Subject: [PATCH 02/24] Added threefry64, added intxx to floatxx conversion
 functions

---
 heat/core/random.py | 140 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 133 insertions(+), 7 deletions(-)

diff --git a/heat/core/random.py b/heat/core/random.py
index 87d47eff5b..6721688a0e 100644
--- a/heat/core/random.py
+++ b/heat/core/random.py
@@ -7,7 +7,7 @@
 from . import stride_tricks
 from . import types
 
-# introduce the variables, will be correctly initialized at the end of file
+# introduce the global random state variables, will be correctly initialized at the end of file
 __seed = None
 __counter = None
 
@@ -29,6 +29,44 @@ def get_state():
     return 'Threefry', __seed, __counter, 0, 0.0
 
 
+def __int32_to_float32(values):
+    """
+    Converts a tensor of 32-bit (random) numbers to matching single-precision floating point numbers (equally 32-bit) in
+    the bounded interval [0.0, 1.0). Extracts the 23 least-significant bits of the integers (0x7fffff) and sets them to
+    be the mantissa of the floating point number. Interval is bound by dividing by 2^23 = 8388608.0.
+
+    Parameters
+    ----------
+    values : torch.Tensor (int32)
+        Values to be converted to floating points numbers in interval [0.0, 1.0).
+
+    Returns
+    -------
+    floats : torch.Tensor (float32)
+        Corresponding single-precision floating point numbers.
+    """
+    return (values & 0x7fffff).type(torch.float32) / 8388608.0
+
+
+def __int64_to_float64(values):
+    """
+    Converts a tensor of 64-bit (random) numbers to matching double-precision floating point numbers (equally 64-bit) in
+    the bounded interval [0.0, 1.0). Extracts the 53 least-significant bits of the integers (0x1fffffffffffff) and sets
+    them to be the mantissa of the floating point number. Interval is bound by dividing by 2^53 = 9007199254740992.0.
+
+    Parameters
+    ----------
+    values : torch.Tensor (int64)
+        Values to be converted to floating points numbers in interval [0.0, 1.0).
+
+    Returns
+    -------
+    floats : torch.Tensor (float64)
+        Corresponding single-precision floating point numbers.
+    """
+    return (values & 0x1fffffffffffff).type(torch.float64) / 9007199254740992.0
+
+
 def randn(*args, split=None, device=None, comm=None):
     """
     Returns a tensor filled with random numbers from a standard normal distribution with zero mean and variance of one.
@@ -139,17 +177,37 @@ def set_state(state):
 
 
 def __threefry_32(num_samples):
+    """
+    Counter-based pseudo random number generator. Based on a 12-round Threefry "encryption" algorithm [1]. This is the
+    32-bit version.
+
+    Parameters
+    ----------
+    num_samples : int
+        Number of 32-bit pseudo random numbers to be generated.
+
+    Returns
+    -------
+    random_numbers : torch.Tensor (int32)
+        Vector with num_samples pseudo random numbers.
+
+    References
+    ----------
+    [1] Salmon, John K., Moraes, Mark A., Dror, Ron O. and Shaw, David E., "Parallel random numbers: as easy as 1, 2, 3"
+        Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis,
+        p. 16, 2011
+    """
     samples = (num_samples + 1) // 2
 
     # set up X, i.e. output buffer
-    X_0 = t.arange(samples, dtype=t.int32)
-    X_1 = t.arange(samples, dtype=t.int32)
-    X_0 //= t.iinfo(t.int32).max
+    X_0 = torch.arange(samples, dtype=torch.int32)
+    X_1 = torch.arange(samples, dtype=torch.int32)
+    X_0 //= torch.iinfo(torch.int32).max
 
     # set up key buffer
-    ks_0 = t.full((samples,), 0, dtype=t.int32) # seed instead of 0
-    ks_1 = t.full((samples,), 0, dtype=t.int32) # seed instead of 0
-    ks_2 = t.full((samples,), 466688986, dtype=t.int32)
+    ks_0 = torch.full((samples,), __seed, dtype=torch.int32)
+    ks_1 = torch.full((samples,), __seed, dtype=torch.int32)
+    ks_2 = torch.full((samples,), 466688986, dtype=torch.int32)
     ks_2 ^= ks_0
     ks_2 ^= ks_0
 
@@ -182,6 +240,74 @@ def __threefry_32(num_samples):
     # inject key
     X_0 += ks_0; X_1 += (ks_1 + 3)
 
+    return X_0, X_1
+
+
+def __threefry64(num_samples):
+    """
+    Counter-based pseudo random number generator. Based on a 12-round Threefry "encryption" algorithm [1]. This is the
+    64-bit version.
+
+    Parameters
+    ----------
+    num_samples : int
+        Number of 64-bit pseudo random numbers to be generated.
+
+    Returns
+    -------
+    random_numbers : torch.Tensor (int64)
+        Vector with num_samples pseudo random numbers.
+
+    References
+    ----------
+    [1] Salmon, John K., Moraes, Mark A., Dror, Ron O. and Shaw, David E., "Parallel random numbers: as easy as 1, 2, 3"
+        Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis,
+        p. 16, 2011
+    """
+    samples = (num_samples + 1) // 2
+
+    # set up X, i.e. output buffer
+    X_0 = torch.arange(samples, dtype=torch.int64)
+    X_1 = torch.arange(samples, dtype=torch.int64)
+    X_0 //= torch.iinfo(torch.int64).max
+
+    # set up key buffer
+    ks_0 = torch.full((samples,), __seed, dtype=torch.int64)
+    ks_1 = torch.full((samples,), __seed, dtype=torch.int64)
+    ks_2 = torch.full((samples,), 2004413935125273122, dtype=torch.int64)
+    ks_2 ^= ks_0
+    ks_2 ^= ks_0
+
+    # initialize output using the key
+    X_0 += ks_0
+    X_1 += ks_1
+
+    # perform rounds
+    X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 48); X_1 ^= X_0  # round 1
+    X_0 += X_1; X_1 = (X_1 << 42) | (X_1 >> 22); X_1 ^= X_0  # round 2
+    X_0 += X_1; X_1 = (X_1 << 12) | (X_1 >> 52); X_1 ^= X_0  # round 3
+    X_0 += X_1; X_1 = (X_1 << 31) | (X_1 >> 33); X_1 ^= X_0  # round 4
+    # inject key
+    X_0 += ks_1; X_1 += (ks_2 + 1)
+
+    X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 48); X_1 ^= X_0  # round 5
+    X_0 += X_1; X_1 = (X_1 << 32) | (X_1 >> 32); X_1 ^= X_0  # round 6
+    X_0 += X_1; X_1 = (X_1 << 24) | (X_1 >> 40); X_1 ^= X_0  # round 7
+    X_0 += X_1; X_1 = (X_1 << 21) | (X_1 >> 43); X_1 ^= X_0  # round 8
+
+    # inject key
+    X_0 += ks_2; X_1 += (ks_0 + 2)
+
+    X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 48); X_1 ^= X_0  # round 9
+    X_0 += X_1; X_1 = (X_1 << 42) | (X_1 >> 22); X_1 ^= X_0  # round 10
+    X_0 += X_1; X_1 = (X_1 << 12) | (X_1 >> 52); X_1 ^= X_0  # round 11
+    X_0 += X_1; X_1 = (X_1 << 31) | (X_1 >> 33); X_1 ^= X_0  # round 12
+
+    # inject key
+    X_0 += ks_0; X_1 += (ks_1 + 3)
+
+    return X_0, X_1
+
 
 def uniform(low=0.0, high=1.0, size=None, device=None, comm=None):
     # TODO: comment me

From 52837a5bf4beb18e6e16620b16badc7ef9262852 Mon Sep 17 00:00:00 2001
From: Markus Goetz <markus.goetz@kit.edu>
Date: Mon, 17 Jun 2019 13:46:02 +0200
Subject: [PATCH 03/24] Added float conversion sugar, added Kundu random normal
 transformation

---
 heat/core/random.py | 54 +++++++++++++++++++++++++++++++++++++--------
 1 file changed, 45 insertions(+), 9 deletions(-)

diff --git a/heat/core/random.py b/heat/core/random.py
index 6721688a0e..9a737cd77f 100644
--- a/heat/core/random.py
+++ b/heat/core/random.py
@@ -7,11 +7,42 @@
 from . import stride_tricks
 from . import types
 
+
 # introduce the global random state variables, will be correctly initialized at the end of file
 __seed = None
 __counter = None
 
 
+# float conversion constants
+__INT32_TO_FLOAT32 = 1.0 / 8388608.0
+__INT64_TO_FLOAT64 = 1.0 / 9007199254740992.0
+__KUNDU_INVERSE = 1.0 / 0.3807
+
+
+def __kundu_transform(values):
+    """
+    Transforms uniformly distributed floating point random values in the interval [0.0, 1.0) into normal distributed
+    floating point random values with mean 0.0 and standard deviation 1.0. The algorithm makes use of the generalized
+    exponential distribution transformation [1].
+
+    Parameters
+    ----------
+    values : torch.Tensor
+        A tensor containing uniformly distributed floating point values in the interval [0.0, 1.0).
+
+    Returns
+    -------
+    normal_values : torch.Tensor
+        A tensor containing the equivalent normally distributed floating point values with mean of 0.0 and standard
+        deviation of 1.0.
+
+    References
+    ----------
+    [1] Boiroju, N. K. and Reddy, K. M., "Generation of Standard Normal Random Numbers", Interstat, vol 5., 2012.
+    """
+    return (torch.log(-torch.log(1 - values ** 0.0775)) - 1.0821) * __KUNDU_INVERSE
+
+
 def get_state():
     """
     Return a tuple representing the internal state of the generator.
@@ -45,7 +76,7 @@ def __int32_to_float32(values):
     floats : torch.Tensor (float32)
         Corresponding single-precision floating point numbers.
     """
-    return (values & 0x7fffff).type(torch.float32) / 8388608.0
+    return (values & 0x7fffff).type(torch.float32) * __INT32_TO_FLOAT32
 
 
 def __int64_to_float64(values):
@@ -64,7 +95,7 @@ def __int64_to_float64(values):
     floats : torch.Tensor (float64)
         Corresponding single-precision floating point numbers.
     """
-    return (values & 0x1fffffffffffff).type(torch.float64) / 9007199254740992.0
+    return (values & 0x1fffffffffffff).type(torch.float64) * __INT64_TO_FLOAT64
 
 
 def randn(*args, split=None, device=None, comm=None):
@@ -101,6 +132,9 @@ def randn(*args, split=None, device=None, comm=None):
             [ 1.3365, -1.5212,  1.4159, -0.1671],
             [ 0.1260,  1.2126, -0.0804,  0.0907]])
     """
+    # TODO: FIX ME!
+    return
+
     # TODO: make me splitable
     # TODO: add device capabilities
     # check if all positional arguments are integers
@@ -169,7 +203,7 @@ def set_state(state):
         raise TypeError('state needs to be a three- or five-tuple')
 
     if state[0] != 'Threefry':
-        raise ValueError('algorithm must be "Threefry"')
+        raise ValueError('algorithm must be \'Threefry\'')
 
     global __seed, __counter
     __seed = int(state[1])
@@ -188,8 +222,8 @@ def __threefry_32(num_samples):
 
     Returns
     -------
-    random_numbers : torch.Tensor (int32)
-        Vector with num_samples pseudo random numbers.
+    random_numbers : tuple(torch.Tensor (int32))
+        Two vectors with num_samples / 2 (rounded-up) pseudo random numbers.
 
     References
     ----------
@@ -200,8 +234,8 @@ def __threefry_32(num_samples):
     samples = (num_samples + 1) // 2
 
     # set up X, i.e. output buffer
-    X_0 = torch.arange(samples, dtype=torch.int32)
-    X_1 = torch.arange(samples, dtype=torch.int32)
+    X_0 = torch.arange(samples, dtype=torch.int32) + (__counter | 0xffffffff)
+    X_1 = torch.arange(samples, dtype=torch.int32) + (__counter >> 32)
     X_0 //= torch.iinfo(torch.int32).max
 
     # set up key buffer
@@ -255,8 +289,8 @@ def __threefry64(num_samples):
 
     Returns
     -------
-    random_numbers : torch.Tensor (int64)
-        Vector with num_samples pseudo random numbers.
+    random_numbers : tuple(torch.Tensor (int64))
+        Two vectors with num_samples / 2 (rounded-up) pseudo random numbers.
 
     References
     ----------
@@ -310,6 +344,8 @@ def __threefry64(num_samples):
 
 
 def uniform(low=0.0, high=1.0, size=None, device=None, comm=None):
+    # TODO: FIX ME!
+
     # TODO: comment me
     # TODO: test me
     # TODO: make me splitable

From b0a2a90ee59afa6d14e797ab8d86cf704470d947 Mon Sep 17 00:00:00 2001
From: Markus Goetz <markus.goetz@kit.edu>
Date: Thu, 4 Jul 2019 17:33:05 +0200
Subject: [PATCH 04/24] Broken inbetween state, nothing working yet, but would
 like to backup changes made

---
 heat/core/random.py | 268 +++++++++++++++++++++++++++++++-------------
 1 file changed, 193 insertions(+), 75 deletions(-)

diff --git a/heat/core/random.py b/heat/core/random.py
index 9a737cd77f..b23b8a1834 100644
--- a/heat/core/random.py
+++ b/heat/core/random.py
@@ -1,3 +1,4 @@
+import numpy as np
 import time
 import torch
 
@@ -19,28 +20,43 @@
 __KUNDU_INVERSE = 1.0 / 0.3807
 
 
-def __kundu_transform(values):
+def __counter_sequence(shape, dtype, split, device, comm):
     """
-    Transforms uniformly distributed floating point random values in the interval [0.0, 1.0) into normal distributed
-    floating point random values with mean 0.0 and standard deviation 1.0. The algorithm makes use of the generalized
-    exponential distribution transformation [1].
 
     Parameters
     ----------
-    values : torch.Tensor
-        A tensor containing uniformly distributed floating point values in the interval [0.0, 1.0).
+    shape
+    dtype
+    split
+    device
+    comm
 
     Returns
     -------
-    normal_values : torch.Tensor
-        A tensor containing the equivalent normally distributed floating point values with mean of 0.0 and standard
-        deviation of 1.0.
 
-    References
-    ----------
-    [1] Boiroju, N. K. and Reddy, K. M., "Generation of Standard Normal Random Numbers", Interstat, vol 5., 2012.
     """
-    return (torch.log(-torch.log(1 - values ** 0.0775)) - 1.0821) * __KUNDU_INVERSE
+    global __counter
+
+    total_elements = np.prod(shape)
+
+
+    dimensions = len(shape)
+    elements_in_higher_dims = 1
+    offset, lshape, _ = comm.chunk(shape, split)
+    ranges = dimensions * [None]
+
+    for i in range(dimensions - 2, -1, -1):
+        elements_in_dim = lshape[i]
+        if i != split:
+            values = torch.arange(elements_in_dim, dtype=dtype, device=device) * elements_in_higher_dims
+        else:
+            values = (torch.arange(elements_in_dim, dtype=dtype, device=device) + offset) * elements_in_higher_dims
+
+        values = values.reshape(*[1 if j != i else -1 for j in range(dimensions)])
+        ranges[i] = values
+        elements_in_higher_dims *= elements_in_dim
+
+    return torch.sum(ranges)
 
 
 def get_state():
@@ -98,16 +114,161 @@ def __int64_to_float64(values):
     return (values & 0x1fffffffffffff).type(torch.float64) * __INT64_TO_FLOAT64
 
 
+def __kundu_transform(values):
+    """
+    Transforms uniformly distributed floating point random values in the interval [0.0, 1.0) into normal distributed
+    floating point random values with mean 0.0 and standard deviation 1.0. The algorithm makes use of the generalized
+    exponential distribution transformation [1].
+
+    Parameters
+    ----------
+    values : torch.Tensor
+        A tensor containing uniformly distributed floating point values in the interval [0.0, 1.0).
+
+    Returns
+    -------
+    normal_values : torch.Tensor
+        A tensor containing the equivalent normally distributed floating point values with mean of 0.0 and standard
+        deviation of 1.0.
+
+    References
+    ----------
+    [1] Boiroju, N. K. and Reddy, K. M., "Generation of Standard Normal Random Numbers", Interstat, vol 5., 2012.
+    """
+    return (torch.log(-torch.log(1 - values ** 0.0775)) - 1.0821) * __KUNDU_INVERSE
+
+
+def rand(*args, split=None, device=None, comm=None):
+    """
+    Random values in a given shape.
+
+    Create a tensor of the given shape and populate it with random samples from a uniform distribution over [0, 1).
+
+    Parameters
+    ----------
+    d0, d1, …, dn : int, optional
+        The dimensions of the returned array, should all be positive. If no argument is given a single random samples is
+        generated.
+    split: int, optional
+        The axis along which the array is split and distributed, defaults to None (no distribution).
+    device : str or None, optional
+        Specifies the device the tensor shall be allocated on, defaults to None (i.e. globally set default device).
+    comm: Communication, optional
+        Handle to the nodes holding distributed parts or copies of this tensor.
+
+    Returns
+    -------
+    out : ndarray, shape (d0, d1, ..., dn)
+        The uniformly distributed [0.0, 1.0)-bound random values.
+    """
+    # if args are not set, generate a single sample
+    if not args:
+        args = (1,)
+
+    # ensure that the passed dimensions are positive integer-likes
+    shape = tuple(int(ele) for ele in args)
+    if not all(ele > 0 for ele in shape):
+        raise ValueError('negative dimensions are not allowed')
+
+    # make sure the remaining parameters are of proper type
+    split = stride_tricks.sanitize_axis(shape, split)
+    device = devices.sanitize_device(device)
+    comm = communication.sanitize_comm(comm)
+
+    # generate the random sequence
+    x_0, x_1, counter_shape, slices = __counter_sequence(shape, torch.int64, split, device, comm)
+    x_0, x_1 = __threefry64(x_0, x_1)
+
+    # combine the values into one tensor and convert them to floats
+    values = torch.stack([x_0, x_1], dim=1).reshape(counter_shape)[slices]
+    values = __int64_to_float64(values)
+
+    return dndarray.DNDarray(values, shape, types.float64, split, device, comm)
+
+
+def randint(low, high=None, size=None, dtype=None, split=None, device=None, comm=None):
+    """
+    Random values in a given shape.
+
+    Create a tensor of the given shape and populate it with random samples from a uniform distribution over [0, 1).
+
+    Parameters
+    ----------
+    low : int
+        Lowest (signed) integer to be drawn from the distribution (unless high=None, in which case this parameter is one
+        above the highest such integer).
+    high : int, optional
+        If provided, one above the largest (signed) integer to be drawn from the distribution (see above for behavior if high=None).
+    size : int or tuple of ints, optional
+        Output shape. If the given shape is, e.g., (m, n, k), then m * n * k samples are drawn. Default is None, in
+        which case a single value is returned.
+    dtype : dtype, optional
+        Desired dtype of the result. Must be an integer type. Defaults to ht.int64.
+    split: int, optional
+        The axis along which the array is split and distributed, defaults to None (no distribution).
+    device : str or None, optional
+        Specifies the device the tensor shall be allocated on, defaults to None (i.e. globally set default device).
+    comm: Communication, optional
+        Handle to the nodes holding distributed parts or copies of this tensor.
+
+    Returns
+    -------
+    out : ndarray, shape (d0, d1, ..., dn)
+        The uniformly distributed [0.0, 1.0)-bound random values.
+    """
+    # determine range bounds
+    if high is None:
+        low, high = 0, int(low)
+    else:
+        low, high = int(low), int(high)
+    if low >= high:
+        raise ValueError('low >= high')
+    span = high - low + 1
+
+    # sanitize shape
+    if size is None:
+        size = (1,)
+    shape = tuple(int(ele) for ele in size)
+    if not all(ele > 0 for ele in shape):
+        raise ValueError('negative dimensions are not allowed')
+
+    # sanitize the data type
+    if dtype is None:
+        dtype = types.int64
+    dtype = types.canonical_heat_type(dtype)
+    if dtype is not types.int64 and dtype is not types.int32:
+        raise ValueError('Unsupported dtype for randint')
+
+    # make sure the remaining parameters are of proper type
+    split = stride_tricks.sanitize_axis(shape, split)
+    device = devices.sanitize_device(device)
+    comm = communication.sanitize_comm(comm)
+
+    # generate the random sequence
+    x_0, x_1, counter_shape, slices = __counter_sequence(shape, torch.int64, split, device, comm)
+    x_0, x_1 = __threefry64(x_0, x_1)
+
+    # combine the values into one tensor and convert them to floats
+    values = torch.stack([x_0, x_1], dim=1).reshape(counter_shape)[slices]
+    values = __int64_to_float64(values)
+
+    return dndarray.DNDarray(values, shape, types.float64, split, device, comm)
+
+
 def randn(*args, split=None, device=None, comm=None):
     """
     Returns a tensor filled with random numbers from a standard normal distribution with zero mean and variance of one.
 
-    The shape of the tensor is defined by the varargs args.
-
     Parameters
     ----------
     d0, d1, …, dn : int, optional
         The dimensions of the returned array, should be all positive.
+    split: int, optional
+        The axis along which the array is split and distributed, defaults to None (no distribution).
+    device : str or None, optional
+        Specifies the device the tensor shall be allocated on, defaults to None (i.e. globally set default device).
+    comm: Communication, optional
+        Handle to the nodes holding distributed parts or copies of this tensor.
 
     Returns
     -------
@@ -132,32 +293,12 @@ def randn(*args, split=None, device=None, comm=None):
             [ 1.3365, -1.5212,  1.4159, -0.1671],
             [ 0.1260,  1.2126, -0.0804,  0.0907]])
     """
-    # TODO: FIX ME!
-    return
-
-    # TODO: make me splitable
-    # TODO: add device capabilities
-    # check if all positional arguments are integers
-    if not all(isinstance(_, int) for _ in args):
-        raise TypeError('dimensions have to be integers')
-    if not all(_ > 0 for _ in args):
-        raise ValueError('negative dimension are not allowed')
-
-    gshape = tuple(args) if args else(1,)
-    split = stride_tricks.sanitize_axis(gshape, split)
-
-    try:
-        torch.randn(gshape)
-    except RuntimeError as exception:
-        # re-raise the exception to be consistent with numpy's exception interface
-        raise ValueError(str(exception))
-
-    # compose the local tensor
-    device = devices.sanitize_device(device)
-    comm = communication.sanitize_comm(comm)
-    data = torch.randn(args, device=device.torch_device)
+    # generate uniformly distributed random numbers first
+    normal_tensor = rand(*args, split, device, comm)
+    # convert the the values to a normal distribution using the kundu transform
+    normal_tensor._DNDarray__array = __kundu_transform(normal_tensor._DNDarray__array)
 
-    return dndarray.DNDarray(data, gshape, types.canonical_heat_type(data.dtype), split, device, comm)
+    return normal_tensor
 
 
 def seed(seed=None):
@@ -210,15 +351,17 @@ def set_state(state):
     __counter = int(state[2])
 
 
-def __threefry_32(num_samples):
+def __threefry_32(X_0, X_1):
     """
     Counter-based pseudo random number generator. Based on a 12-round Threefry "encryption" algorithm [1]. This is the
     32-bit version.
 
     Parameters
     ----------
-    num_samples : int
-        Number of 32-bit pseudo random numbers to be generated.
+    X_0 : torch.Tensor
+        Upper bits of the to be encoded random sequence
+    X_1 : torch.Tensor
+        Lower bits of the to be encoded random sequence
 
     Returns
     -------
@@ -231,12 +374,7 @@ def __threefry_32(num_samples):
         Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis,
         p. 16, 2011
     """
-    samples = (num_samples + 1) // 2
-
-    # set up X, i.e. output buffer
-    X_0 = torch.arange(samples, dtype=torch.int32) + (__counter | 0xffffffff)
-    X_1 = torch.arange(samples, dtype=torch.int32) + (__counter >> 32)
-    X_0 //= torch.iinfo(torch.int32).max
+    samples = len(X_0)
 
     # set up key buffer
     ks_0 = torch.full((samples,), __seed, dtype=torch.int32)
@@ -277,15 +415,17 @@ def __threefry_32(num_samples):
     return X_0, X_1
 
 
-def __threefry64(num_samples):
+def __threefry64(X_0, X_1):
     """
     Counter-based pseudo random number generator. Based on a 12-round Threefry "encryption" algorithm [1]. This is the
     64-bit version.
 
     Parameters
     ----------
-    num_samples : int
-        Number of 64-bit pseudo random numbers to be generated.
+    X_0 : torch.Tensor
+        Upper bits of the to be encoded random sequence
+    X_1 : torch.Tensor
+        Lower bits of the to be encoded random sequence
 
     Returns
     -------
@@ -298,12 +438,7 @@ def __threefry64(num_samples):
         Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis,
         p. 16, 2011
     """
-    samples = (num_samples + 1) // 2
-
-    # set up X, i.e. output buffer
-    X_0 = torch.arange(samples, dtype=torch.int64)
-    X_1 = torch.arange(samples, dtype=torch.int64)
-    X_0 //= torch.iinfo(torch.int64).max
+    samples = len(X_0)
 
     # set up key buffer
     ks_0 = torch.full((samples,), __seed, dtype=torch.int64)
@@ -343,22 +478,5 @@ def __threefry64(num_samples):
     return X_0, X_1
 
 
-def uniform(low=0.0, high=1.0, size=None, device=None, comm=None):
-    # TODO: FIX ME!
-
-    # TODO: comment me
-    # TODO: test me
-    # TODO: make me splitable
-    # TODO: add device capabilities
-    if size is None:
-        size = (1,)
-
-    device = devices.sanitize_device(device)
-    comm = communication.sanitize_comm(comm)
-    data = torch.rand(*size, device=device.torch_device) * (high - low) + low
-
-    return dndarray.DNDarray(data, size, types.float32, None, device, comm)
-
-
 # roll a global time-based seed
 seed()

From 4e593f838455e237480f7bba07bd2c16fa257dca Mon Sep 17 00:00:00 2001
From: Markus Goetz <markus.goetz@kit.edu>
Date: Wed, 14 Aug 2019 10:29:47 +0200
Subject: [PATCH 05/24] Simon taking over

---
 heat/core/random.py            | 77 +++++++++++++++++++++++++---------
 heat/core/tests/test_random.py | 26 +++---------
 2 files changed, 64 insertions(+), 39 deletions(-)

diff --git a/heat/core/random.py b/heat/core/random.py
index b23b8a1834..fbfbd37ef5 100644
--- a/heat/core/random.py
+++ b/heat/core/random.py
@@ -22,27 +22,59 @@
 
 def __counter_sequence(shape, dtype, split, device, comm):
     """
+    Generates a sequence of numbers to be used as the "clear text" for the threefry encryption, i.e. the pseudo random
+    number generator. Due to the fact that threefry always requires pairs of inputs, the input sequence may not just be
+    a simple range including the global offset, but rather needs to be to independent vectors, one containing the range
+    and the other having the interleaved high-bits counter in it.
 
     Parameters
     ----------
-    shape
-    dtype
-    split
-    device
-    comm
+    shape : tuple of ints
+        The global shape of the random tensor to be generated.
+    dtype : torch.dtype
+        The data type of the elements to be generated. Needs to be either torch.int32 or torch.int64.
+    split : int or None
+        The split axis along which the random number tensor is split
+    device : 'str'
+        Specifies the device the tensor shall be allocated on.
+    comm: ht.Communication
+        Handle to the nodes holding distributed parts or copies of this tensor.
 
     Returns
     -------
-
+    x_0 : torch.Tensor
+        The high-bits vector for the threefry encryption.
+    x_1 : torch.Tensor
+        The low-bits vector for the threefry encryption.
+    lshape : tuple of ints
+        The shape x_0 and x_1 need to be reshaped to after encryption. May be slightly larger than the actual local
+        portion of the random number tensor due to sequence overlaps of the counter sequence.
+    slices : list of slices
+        The indices into the reshaped tensor to obtain the actual local portion.
     """
+    # get the global random state into the function, might want to factor this out into a class later
     global __counter
 
+    # extract the counter state of the random number generator
+    if dtype is torch.int32:
+        c_0 = __counter & (0xffffffff << 32)
+        c_1 = __counter & 0xffffffff
+    else:  # torch.int64
+        c_0 = __counter & (0xffffffffffffffff << 64)
+        c_1 = __counter & 0xffffffffffffffff
+
+    # prepare some reusable values
+    dimensions = len(shape)
     total_elements = np.prod(shape)
+    offset, lshape, _ = comm.chunk(shape, split)
+
+    # generate the x_0 counter sequence
+    x_0 = torch.full
+
+    # generate the x_1 counter sequence
 
 
-    dimensions = len(shape)
     elements_in_higher_dims = 1
-    offset, lshape, _ = comm.chunk(shape, split)
     ranges = dimensions * [None]
 
     for i in range(dimensions - 2, -1, -1):
@@ -56,7 +88,10 @@ def __counter_sequence(shape, dtype, split, device, comm):
         ranges[i] = values
         elements_in_higher_dims *= elements_in_dim
 
-    return torch.sum(ranges)
+    # advance the global counter
+    __counter += total_elements
+
+    return x_0, x_1, lshape, slices
 
 
 def get_state():
@@ -176,12 +211,11 @@ def rand(*args, split=None, device=None, comm=None):
     comm = communication.sanitize_comm(comm)
 
     # generate the random sequence
-    x_0, x_1, counter_shape, slices = __counter_sequence(shape, torch.int64, split, device, comm)
+    x_0, x_1, lshape = __counter_sequence(shape, torch.int64, split, device, comm)
     x_0, x_1 = __threefry64(x_0, x_1)
 
     # combine the values into one tensor and convert them to floats
-    values = torch.stack([x_0, x_1], dim=1).reshape(counter_shape)[slices]
-    values = __int64_to_float64(values)
+    values = __int64_to_float64(torch.stack([x_0, x_1], dim=1)).reshape(lshape)
 
     return dndarray.DNDarray(values, shape, types.float64, split, device, comm)
 
@@ -238,6 +272,7 @@ def randint(low, high=None, size=None, dtype=None, split=None, device=None, comm
     dtype = types.canonical_heat_type(dtype)
     if dtype is not types.int64 and dtype is not types.int32:
         raise ValueError('Unsupported dtype for randint')
+    torch_dtype = dtype.torch_type()
 
     # make sure the remaining parameters are of proper type
     split = stride_tricks.sanitize_axis(shape, split)
@@ -245,14 +280,18 @@ def randint(low, high=None, size=None, dtype=None, split=None, device=None, comm
     comm = communication.sanitize_comm(comm)
 
     # generate the random sequence
-    x_0, x_1, counter_shape, slices = __counter_sequence(shape, torch.int64, split, device, comm)
-    x_0, x_1 = __threefry64(x_0, x_1)
+    x_0, x_1, lshape = __counter_sequence(shape, torch_dtype, split, device, comm)
+    if torch_dtype is torch.int32:
+        x_0, x_1 = __threefry32(x_0, x_1)
+    else:
+        x_0, x_1 = __threefry64(x_0, x_1)
 
-    # combine the values into one tensor and convert them to floats
-    values = torch.stack([x_0, x_1], dim=1).reshape(counter_shape)[slices]
-    values = __int64_to_float64(values)
+    # stack the resulting sequence and normalize to given range
+    values = torch.stack([x_0, x_1], dim=1).reshape(lshape)
+    # ATTENTION: this is biased and known, bias-free rejection sampling is difficult to do in parallel
+    values = (values.abs_() % span) + low
 
-    return dndarray.DNDarray(values, shape, types.float64, split, device, comm)
+    return dndarray.DNDarray(values, shape, dtype, split, device, comm)
 
 
 def randn(*args, split=None, device=None, comm=None):
@@ -351,7 +390,7 @@ def set_state(state):
     __counter = int(state[2])
 
 
-def __threefry_32(X_0, X_1):
+def __threefry32(X_0, X_1):
     """
     Counter-based pseudo random number generator. Based on a 12-round Threefry "encryption" algorithm [1]. This is the
     32-bit version.
diff --git a/heat/core/tests/test_random.py b/heat/core/tests/test_random.py
index ce93daa8ea..bb03418e4a 100644
--- a/heat/core/tests/test_random.py
+++ b/heat/core/tests/test_random.py
@@ -4,25 +4,11 @@
 
 
 class TestTensor(unittest.TestCase):
-    def test_randn(self):
-        # scalar input
-        simple_randn_float = ht.random.randn(3)
-        self.assertIsInstance(simple_randn_float, ht.DNDarray)
-        self.assertEqual(simple_randn_float.shape, (3,))
-        self.assertEqual(simple_randn_float.lshape, (3,))
-        self.assertEqual(simple_randn_float.split, None)
-        self.assertEqual(simple_randn_float.dtype, ht.float32)
+    def test_rand(self):
+        pass
 
-        # multi-dimensional
-        elaborate_randn_float = ht.random.randn(2, 3)
-        self.assertIsInstance(elaborate_randn_float, ht.DNDarray)
-        self.assertEqual(elaborate_randn_float.shape, (2, 3))
-        self.assertEqual(elaborate_randn_float.lshape, (2, 3))
-        self.assertEqual(elaborate_randn_float.split, None)
-        self.assertEqual(elaborate_randn_float.dtype, ht.float32)
+    def test_randint(self):
+        pass
 
-        # exceptions
-        with self.assertRaises(TypeError):
-            ht.random.randn('(2, 3,)')
-        with self.assertRaises(ValueError):
-            ht.random.randn(-1, 3)
+    def test_randn(self):
+        pass

From 8a87c4f1a0040caf3dd24e699afc9fde896e6e9f Mon Sep 17 00:00:00 2001
From: simon <ubwdr@student.kit.edu>
Date: Mon, 26 Aug 2019 17:42:51 +0200
Subject: [PATCH 06/24] Implemented the counter_sequenze function and added
 multiple test cases

---
 heat/core/random.py            | 125 +++++++++++++++++++++++++--------
 heat/core/tests/test_random.py |  76 +++++++++++++++++++-
 2 files changed, 169 insertions(+), 32 deletions(-)

diff --git a/heat/core/random.py b/heat/core/random.py
index fbfbd37ef5..07f28c3c03 100644
--- a/heat/core/random.py
+++ b/heat/core/random.py
@@ -49,49 +49,112 @@ def __counter_sequence(shape, dtype, split, device, comm):
     lshape : tuple of ints
         The shape x_0 and x_1 need to be reshaped to after encryption. May be slightly larger than the actual local
         portion of the random number tensor due to sequence overlaps of the counter sequence.
-    slices : list of slices
-        The indices into the reshaped tensor to obtain the actual local portion.
+    slice : python slice
+        The slice that needs to be applied to the resulting random number tensor
     """
     # get the global random state into the function, might want to factor this out into a class later
     global __counter
-
+    tmp_counter = __counter  # Share this initial local state to update it correctly later
+    rank = comm.Get_rank()
+    size = comm.Get_size()
+    max_count = 0xffffffff if dtype == torch.int32 else 0xffffffffffffffff
     # extract the counter state of the random number generator
     if dtype is torch.int32:
-        c_0 = __counter & (0xffffffff << 32)
-        c_1 = __counter & 0xffffffff
+        c_0 = (__counter & (max_count << 32)) >> 32
+        c_1 = __counter & max_count
     else:  # torch.int64
-        c_0 = __counter & (0xffffffffffffffff << 64)
-        c_1 = __counter & 0xffffffffffffffff
+        c_0 = (__counter & (max_count << 64)) >> 64
+        c_1 = __counter & max_count
 
-    # prepare some reusable values
-    dimensions = len(shape)
     total_elements = np.prod(shape)
-    offset, lshape, _ = comm.chunk(shape, split)
+    if total_elements > 2 * max_count:
+        raise ValueError('Shape is to big with {} elements'.format(total_elements))
+
+    if split is None:
+        values = total_elements / 2
+        even_end = values % 2 == 0
+        lslice = slice(None) if even_end else slice(None, -1)
+        start = c_1
+        end = start + int(values)
+        lshape = shape
+    else:
+        offset, lshape, _ = comm.chunk(shape, split)
+        counts, displs, _ = comm.counts_displs_shape(shape, split)
+
+        # Calculate number of local elements per process
+        local_elements = [total_elements / shape[split] * counts[i] for i in range(size)]
+        cum_elements = np.cumsum(local_elements)
+
+        # Calculate the correct borders and slices
+        even_start = True if rank == 0 else cum_elements[rank-1] % 2 == 0
+        start = c_1 if rank == 0 else int(cum_elements[rank-1] / 2) + c_1
+        elements = local_elements[rank] / 2
+        lslice = slice(None)
+        if even_start:
+            # No overlap with previous processes
+            if elements == int(elements):
+                # Even number of elements
+                end = int(elements)
+            else:
+                # Odd number of elements
+                end = int(elements) + 1
+                lslice = slice(None, -1)
+        else:
+            # Overlap with previous processes
+            if elements == int(elements):
+                # Even number of elements
+                end = int(elements) + 1
+                lslice = slice(1, -1)
+            else:
+                # Odd number of elements
+                end = int(elements) + 1
+                lslice = slice(1, None)
+        start = int(start)
+        end += start
+
+    # Check x_1 for overflow
+    lrange = [start, end]
+    signed_mask = 0x7fffffff if dtype == torch.int32 else 0x7fffffffffffffff
+    diff = 0 if lrange[1] <= signed_mask else lrange[1] - signed_mask
+    lrange[0], lrange[1] = lrange[0] - diff, lrange[1] - diff
+
+    # create x_1 counter sequence
+    x_1 = torch.arange(*lrange, dtype=dtype)
+    while diff > signed_mask:
+        # signed_mask is maximum that can be added at a time because torch does not support unit64 or unit32
+        x_1 += signed_mask
+        diff -= signed_mask
+    x_1 += diff
 
     # generate the x_0 counter sequence
-    x_0 = torch.full
-
-    # generate the x_1 counter sequence
-
-
-    elements_in_higher_dims = 1
-    ranges = dimensions * [None]
+    x_0 = torch.empty_like(x_1)
+    diff = c_0 - signed_mask
+    if diff > 0:
+        # same problem as for x_1 with the overflow
+        x_0.fill_(signed_mask)
+        while diff > signed_mask:
+            x_0 += signed_mask
+            diff -= signed_mask
+        x_0 += diff
+    else:
+        x_0.fill_(c_0)
 
-    for i in range(dimensions - 2, -1, -1):
-        elements_in_dim = lshape[i]
-        if i != split:
-            values = torch.arange(elements_in_dim, dtype=dtype, device=device) * elements_in_higher_dims
+    # Detect if x_0 needs to be increased for current values
+    if end > max_count:
+        if start > max_count:
+            # x_0 changed in previous process, increase all values
+            x_0 += 1
         else:
-            values = (torch.arange(elements_in_dim, dtype=dtype, device=device) + offset) * elements_in_higher_dims
-
-        values = values.reshape(*[1 if j != i else -1 for j in range(dimensions)])
-        ranges[i] = values
-        elements_in_higher_dims *= elements_in_dim
+            # x_0 changes after reaching the overflow in this process
+            x_0[-(end-max_count-1):] += 1
 
-    # advance the global counter
-    __counter += total_elements
+    # Correctly increase the counter variable
+    used_values = int(np.ceil(total_elements / 2))
+    # Increase counter but not over 128 bit
+    tmp_counter += used_values & 0xffffffffffffffffffffffffffffffff  # 128bit mask
+    __counter = tmp_counter
 
-    return x_0, x_1, lshape, slices
+    return x_0, x_1, lshape, lslice
 
 
 def get_state():
@@ -211,11 +274,11 @@ def rand(*args, split=None, device=None, comm=None):
     comm = communication.sanitize_comm(comm)
 
     # generate the random sequence
-    x_0, x_1, lshape = __counter_sequence(shape, torch.int64, split, device, comm)
+    x_0, x_1, lshape, lslice = __counter_sequence(shape, torch.int64, split, device, comm)
     x_0, x_1 = __threefry64(x_0, x_1)
 
     # combine the values into one tensor and convert them to floats
-    values = __int64_to_float64(torch.stack([x_0, x_1], dim=1)).reshape(lshape)
+    values = __int64_to_float64(torch.stack([x_0, x_1], dim=1).flatten()[lslice]).reshape(lshape)
 
     return dndarray.DNDarray(values, shape, types.float64, split, device, comm)
 
diff --git a/heat/core/tests/test_random.py b/heat/core/tests/test_random.py
index bb03418e4a..68ebbfe593 100644
--- a/heat/core/tests/test_random.py
+++ b/heat/core/tests/test_random.py
@@ -1,11 +1,85 @@
 import unittest
 
 import heat as ht
+import numpy as np
 
 
 class TestTensor(unittest.TestCase):
     def test_rand(self):
-        pass
+        # int64 tests
+
+        # Resetting seed works
+        seed = 12345
+        ht.random.seed(seed)
+        a = ht.random.rand(2, 5, 7, 3, split=0, comm=ht.MPI_WORLD)
+        b = ht.random.rand(2, 5, 7, 3, split=0, comm=ht.MPI_WORLD)
+        self.assertFalse(ht.equal(a, b))
+        ht.random.seed(seed)
+        c = ht.random.rand(2, 5, 7, 3, split=0, comm=ht.MPI_WORLD)
+        self.assertTrue(ht.equal(a, c))
+
+        # Random numbers with overflow
+        ht.random.set_state(('Threefry', seed, 0xfffffffffffffff0))
+        a = ht.random.rand(2, 3, 4, 5, split=0, comm=ht.MPI_WORLD)
+        ht.random.set_state(('Threefry', seed, 0x10000000000000000))
+        b = ht.random.rand(2, 44, split=0, comm=ht.MPI_WORLD)
+        a = a.numpy().flatten()
+        b = b.numpy().flatten()
+        self.assertTrue(np.array_equal(a[32:], b))
+
+        # Check that random numbers don't repeat after first overflow
+        seed = 12345
+        ht.random.set_state(('Threefry', seed, 0x10000000000000000))
+        a = ht.random.rand(2, 44)
+        ht.random.seed(seed)
+        b = ht.random.rand(2, 44)
+        self.assertFalse(ht.equal(a, b))
+
+        # Check that we start from beginning after 128 bit overflow
+        ht.random.seed(seed)
+        a = ht.random.rand(2, 34, split=0)
+        ht.random.set_state(('Threefry', seed, 0xfffffffffffffffffffffffffffffff0))
+        b = ht.random.rand(2, 50, split=0)
+        a = a.numpy().flatten()
+        b = b.numpy(). flatten()
+        self.assertTrue(np.array_equal(a, b[32:]))
+
+        # different split axis with resetting seed
+        ht.random.seed(seed)
+        a = ht.random.rand(3, 5, 2, 9, split=3, comm=ht.MPI_WORLD)
+        ht.random.seed(seed)
+        c = ht.random.rand(3, 5, 2, 9, split=3, comm=ht.MPI_WORLD)
+        self.assertTrue(ht.equal(a, c))
+
+        # Random values are in correct order
+        ht.random.seed(seed)
+        a = ht.random.rand(2, 50, split=0)
+        ht.random.seed(seed)
+        b = ht.random.rand(100, split=None)
+        a = a.numpy().flatten()
+        b = b._DNDarray__array.numpy()
+        self.assertTrue(np.array_equal(a, b))
+
+        # On different shape and split the same random values are used
+        ht.random.seed(seed)
+        a = ht.random.rand(3, 5, 2, 9, split=3, comm=ht.MPI_WORLD)
+        ht.random.seed(seed)
+        b = ht.random.rand(30, 9, split=1, comm=ht.MPI_WORLD)
+        a = np.sort(a.numpy().flatten())
+        b = np.sort(b.numpy().flatten())
+        self.assertTrue(np.array_equal(a, b))
+
+        # One large array does not have two similar values
+        a = ht.random.rand(11, 15, 3, 7, split=2, comm=ht.MPI_WORLD)
+        a = a.numpy()
+        _, counts = np.unique(a, return_counts=True)
+        self.assertTrue((counts == 1).all())    # Assert that no value appears more than once
+
+        # Two large arrays that were created after each other don't share any values
+        b = ht.random.rand(14, 7, 3, 12, 18, 42, split=5, comm=ht.MPI_WORLD)
+        c = np.concatenate((a.flatten(), b.numpy().flatten()))
+        _, counts = np.unique(c, return_counts=True)
+        self.assertTrue((counts == 1).all())
 
     def test_randint(self):
         pass

From 3d46337211ca7846df9ddfc932e36caec4ab0cab Mon Sep 17 00:00:00 2001
From: simon <ubwdr@student.kit.edu>
Date: Thu, 29 Aug 2019 13:52:38 +0200
Subject: [PATCH 07/24] fixing unit test that broke down because of new random
 generator

---
 heat/core/random.py                   |  8 ++--
 heat/core/statistics.py               |  9 +++--
 heat/core/tests/test_communication.py | 27 +++++++------
 heat/core/tests/test_manipulations.py | 16 ++++----
 heat/core/tests/test_statistics.py    | 56 +++++++++++++--------------
 5 files changed, 59 insertions(+), 57 deletions(-)

diff --git a/heat/core/random.py b/heat/core/random.py
index 07f28c3c03..9dbab52e67 100644
--- a/heat/core/random.py
+++ b/heat/core/random.py
@@ -71,8 +71,8 @@ def __counter_sequence(shape, dtype, split, device, comm):
         raise ValueError('Shape is to big with {} elements'.format(total_elements))
 
     if split is None:
-        values = total_elements / 2
-        even_end = values % 2 == 0
+        values = int(total_elements / 2)
+        even_end = total_elements % 2 == 0
         lslice = slice(None) if even_end else slice(None, -1)
         start = c_1
         end = start + int(values)
@@ -259,6 +259,7 @@ def rand(*args, split=None, device=None, comm=None):
     out : ndarray, shape (d0, d1, ..., dn)
         The uniformly distributed [0.0, 1.0)-bound random values.
     """
+    print('args', args)
     # if args are not set, generate a single sample
     if not args:
         args = (1,)
@@ -395,8 +396,9 @@ def randn(*args, split=None, device=None, comm=None):
             [ 1.3365, -1.5212,  1.4159, -0.1671],
             [ 0.1260,  1.2126, -0.0804,  0.0907]])
     """
+    print('args', args)
     # generate uniformly distributed random numbers first
-    normal_tensor = rand(*args, split, device, comm)
+    normal_tensor = rand(*args, split=split, device=device, comm=comm)
     # convert the the values to a normal distribution using the kundu transform
     normal_tensor._DNDarray__array = __kundu_transform(normal_tensor._DNDarray__array)
 
diff --git a/heat/core/statistics.py b/heat/core/statistics.py
index ba3a214489..ae0d70b2a3 100644
--- a/heat/core/statistics.py
+++ b/heat/core/statistics.py
@@ -390,6 +390,7 @@ def max(x, axis=None, out=None, keepdim=None):
             [12.]])
     """
     def local_max(*args, **kwargs):
+        print('args', *args)
         result = torch.max(*args, **kwargs)
         if isinstance(result, tuple):
             return result[0]
@@ -496,14 +497,14 @@ def maximum(x1, x2, out=None):
 
     # locally: apply torch.max(x1, x2)
     output_lshape = stride_tricks.broadcast_shape(x1.lshape, x2.lshape)
-    lresult = factories.empty(output_lshape)
+    lresult = factories.empty(output_lshape, dtype=x1.dtype)
     lresult._DNDarray__array = torch.max(x1._DNDarray__array, x2._DNDarray__array)
     lresult._DNDarray__dtype = types.promote_types(x1.dtype, x2.dtype)
     lresult._DNDarray__split = split
     if x1.split is not None or x2.split is not None:
         if x1.comm.is_distributed():  # assuming x1.comm = x2.comm
             output_gshape = stride_tricks.broadcast_shape(x1.gshape, x2.gshape)
-            result = factories.empty(output_gshape)
+            result = factories.empty(output_gshape, dtype=x1.dtype)
             x1.comm.Allgather(lresult, result)
             # TODO: adopt Allgatherv() as soon as it is fixed, Issue #233
             result._DNDarray__dtype = lresult._DNDarray__dtype
@@ -912,14 +913,14 @@ def minimum(x1, x2, out=None):
 
     # locally: apply torch.min(x1, x2)
     output_lshape = stride_tricks.broadcast_shape(x1.lshape, x2.lshape)
-    lresult = factories.empty(output_lshape)
+    lresult = factories.empty(output_lshape, dtype=x1.dtype)
     lresult._DNDarray__array = torch.min(x1._DNDarray__array, x2._DNDarray__array)
     lresult._DNDarray__dtype = types.promote_types(x1.dtype, x2.dtype)
     lresult._DNDarray__split = split
     if x1.split is not None or x2.split is not None:
         if x1.comm.is_distributed():  # assuming x1.comm = x2.comm
             output_gshape = stride_tricks.broadcast_shape(x1.gshape, x2.gshape)
-            result = factories.empty(output_gshape)
+            result = factories.empty(output_gshape, dtype=x1.dtype)
             x1.comm.Allgather(lresult, result)
             # TODO: adopt Allgatherv() as soon as it is fixed, Issue #233
             result._DNDarray__dtype = lresult._DNDarray__dtype
diff --git a/heat/core/tests/test_communication.py b/heat/core/tests/test_communication.py
index f7d85f44dd..118094c1db 100644
--- a/heat/core/tests/test_communication.py
+++ b/heat/core/tests/test_communication.py
@@ -181,10 +181,10 @@ def test_allgather(self):
         # check result
         self.assertTrue(data._DNDarray__array.is_contiguous())
         self.assertTrue(output._DNDarray__array.is_contiguous())
-        self.assertTrue((output._DNDarray__array == torch.ones(ht.MPI_WORLD.size, 7,)).all())
+        self.assertTrue((output._DNDarray__array == torch.ones(ht.MPI_WORLD.size, 7)).all())
 
         # contiguous data, different gather axis
-        data = ht.ones((7, 2,))
+        data = ht.ones((7, 2,), dtype=ht.float64)
         output = ht.random.randn(7, 2 * ht.MPI_WORLD.size)
 
         # ensure prior invariants
@@ -195,7 +195,7 @@ def test_allgather(self):
         # check result
         self.assertTrue(data._DNDarray__array.is_contiguous())
         self.assertTrue(output._DNDarray__array.is_contiguous())
-        self.assertTrue((output._DNDarray__array == torch.ones(7, 2 * ht.MPI_WORLD.size)).all())
+        self.assertTrue((output._DNDarray__array == torch.ones(7, 2 * ht.MPI_WORLD.size, dtype=torch.float64)).all())
 
         # non-contiguous data
         data = ht.ones((4, 5,)).T
@@ -825,7 +825,7 @@ def test_iallgather(self):
             self.assertTrue((output._DNDarray__array == torch.ones(ht.MPI_WORLD.size, 7,)).all())
 
             # contiguous data, different gather axis
-            data = ht.ones((7, 2,))
+            data = ht.ones((7, 2,), dtype=ht.float64)
             output = ht.random.randn(7, 2 * ht.MPI_WORLD.size)
 
             # ensure prior invariants
@@ -833,11 +833,10 @@ def test_iallgather(self):
             self.assertTrue(output._DNDarray__array.is_contiguous())
             req = data.comm.Iallgather(data, output, send_axis=1)
             req.wait()
-
             # check scatter result
             self.assertTrue(data._DNDarray__array.is_contiguous())
             self.assertTrue(output._DNDarray__array.is_contiguous())
-            self.assertTrue((output._DNDarray__array == torch.ones(7, 2 * ht.MPI_WORLD.size)).all())
+            self.assertTrue((output._DNDarray__array == torch.ones(7, 2 * ht.MPI_WORLD.size, dtype=torch.float64)).all())
 
             # non-contiguous data
             data = ht.ones((4, 5,)).T
@@ -1288,7 +1287,7 @@ def test_iexscan(self):
     def test_igather(self):
         try:
             # contiguous data
-            data = ht.ones((1, 5,), dtype=ht.float32)
+            data = ht.ones((1, 5,), dtype=ht.float64)
             output = ht.random.randn(ht.MPI_WORLD.size, 5)
 
             # ensure prior invariants
@@ -1301,10 +1300,10 @@ def test_igather(self):
             self.assertTrue(data._DNDarray__array.is_contiguous())
             self.assertTrue(output._DNDarray__array.is_contiguous())
             if data.comm.rank == 0:
-                self.assertTrue((output._DNDarray__array == torch.ones((ht.MPI_WORLD.size, 5,), dtype=torch.float32)).all())
+                self.assertTrue((output._DNDarray__array == torch.ones((ht.MPI_WORLD.size, 5,), dtype=torch.float64)).all())
 
             # contiguous data, different gather axis
-            data = ht.ones((5, 2,), dtype=ht.float32)
+            data = ht.ones((5, 2,), dtype=ht.float64)
             output = ht.random.randn(5, 2 * ht.MPI_WORLD.size)
 
             # ensure prior invariants
@@ -1318,11 +1317,11 @@ def test_igather(self):
             self.assertTrue(output._DNDarray__array.is_contiguous())
             if data.comm.rank == 0:
                 self.assertTrue(
-                    (output._DNDarray__array == torch.ones((5, 2 * ht.MPI_WORLD.size,), dtype=torch.float32)).all()
+                    (output._DNDarray__array == torch.ones((5, 2 * ht.MPI_WORLD.size,), dtype=torch.float64)).all()
                 )
 
             # non-contiguous data
-            data = ht.ones((3, 5,), dtype=ht.float32).T
+            data = ht.ones((3, 5,), dtype=ht.float64).T
             output = ht.random.randn(5, 3 * ht.MPI_WORLD.size)
 
             # ensure prior invariants
@@ -1336,11 +1335,11 @@ def test_igather(self):
             self.assertTrue(output._DNDarray__array.is_contiguous())
             if data.comm.rank == 0:
                 self.assertTrue(
-                    (output._DNDarray__array == torch.ones((5, 3 * ht.MPI_WORLD.size,), dtype=torch.float32)).all()
+                    (output._DNDarray__array == torch.ones((5, 3 * ht.MPI_WORLD.size,), dtype=torch.float64)).all()
                 )
 
             # non-contiguous output, different gather axis
-            data = ht.ones((5, 3,), dtype=ht.float32)
+            data = ht.ones((5, 3,), dtype=ht.float64)
             output = ht.random.randn(3 * ht.MPI_WORLD.size, 5).T
 
             # ensure prior invariants
@@ -1354,7 +1353,7 @@ def test_igather(self):
             self.assertFalse(output._DNDarray__array.is_contiguous())
             if data.comm.rank == 0:
                 self.assertTrue(
-                    (output._DNDarray__array == torch.ones((5, 3 * ht.MPI_WORLD.size,), dtype=torch.float32)).all()
+                    (output._DNDarray__array == torch.ones((5, 3 * ht.MPI_WORLD.size,), dtype=torch.float64)).all()
                 )
 
         # MPI implementation may not support asynchronous operations
diff --git a/heat/core/tests/test_manipulations.py b/heat/core/tests/test_manipulations.py
index d0ecb6e2f8..bfe23bf7b3 100644
--- a/heat/core/tests/test_manipulations.py
+++ b/heat/core/tests/test_manipulations.py
@@ -526,8 +526,8 @@ def test_squeeze(self):
         # 4D local tensor, no axis
         result = ht.squeeze(data)
         self.assertIsInstance(result, ht.DNDarray)
-        self.assertEqual(result.dtype, ht.float32)
-        self.assertEqual(result._DNDarray__array.dtype, torch.float32)
+        self.assertEqual(result.dtype, ht.float64)
+        self.assertEqual(result._DNDarray__array.dtype, torch.float64)
         self.assertEqual(result.shape, (4, 5))
         self.assertEqual(result.lshape, (4, 5))
         self.assertEqual(result.split, None)
@@ -536,8 +536,8 @@ def test_squeeze(self):
         # 4D local tensor, major axis
         result = ht.squeeze(data, axis=0)
         self.assertIsInstance(result, ht.DNDarray)
-        self.assertEqual(result.dtype, ht.float32)
-        self.assertEqual(result._DNDarray__array.dtype, torch.float32)
+        self.assertEqual(result.dtype, ht.float64)
+        self.assertEqual(result._DNDarray__array.dtype, torch.float64)
         self.assertEqual(result.shape, (4, 5, 1))
         self.assertEqual(result.lshape, (4, 5, 1))
         self.assertEqual(result.split, None)
@@ -546,8 +546,8 @@ def test_squeeze(self):
         # 4D local tensor, minor axis
         result = ht.squeeze(data, axis=-1)
         self.assertIsInstance(result, ht.DNDarray)
-        self.assertEqual(result.dtype, ht.float32)
-        self.assertEqual(result._DNDarray__array.dtype, torch.float32)
+        self.assertEqual(result.dtype, ht.float64)
+        self.assertEqual(result._DNDarray__array.dtype, torch.float64)
         self.assertEqual(result.shape, (1, 4, 5))
         self.assertEqual(result.lshape, (1, 4, 5))
         self.assertEqual(result.split, None)
@@ -556,8 +556,8 @@ def test_squeeze(self):
         # 4D local tensor, tuple axis
         result = data.squeeze(axis=(0, -1))
         self.assertIsInstance(result, ht.DNDarray)
-        self.assertEqual(result.dtype, ht.float32)
-        self.assertEqual(result._DNDarray__array.dtype, torch.float32)
+        self.assertEqual(result.dtype, ht.float64)
+        self.assertEqual(result._DNDarray__array.dtype, torch.float64)
         self.assertEqual(result.shape, (4, 5))
         self.assertEqual(result.lshape, (4, 5))
         self.assertEqual(result.split, None)
diff --git a/heat/core/tests/test_statistics.py b/heat/core/tests/test_statistics.py
index d08a418c15..bcf29d2726 100644
--- a/heat/core/tests/test_statistics.py
+++ b/heat/core/tests/test_statistics.py
@@ -248,8 +248,8 @@ def test_average(self):
         self.assertIsInstance(avg_volume, ht.DNDarray)
         self.assertEqual(avg_volume.shape, (3,))
         self.assertEqual(avg_volume.lshape[0], random_volume.lshape[0])
-        self.assertEqual(avg_volume.dtype, ht.float32)
-        self.assertEqual(avg_volume._DNDarray__array.dtype, torch.float32)
+        self.assertEqual(avg_volume.dtype, ht.float64)
+        self.assertEqual(avg_volume._DNDarray__array.dtype, torch.float64)
         self.assertEqual(avg_volume.split, 0)
 
         # check weighted average over all float elements of split 5d tensor, along split axis
@@ -261,8 +261,8 @@ def test_average(self):
         self.assertIsInstance(avg_5d, ht.DNDarray)
         self.assertEqual(avg_5d.gshape, (size, 3, 4, 5))
         self.assertLessEqual(avg_5d.lshape[1], 3)
-        self.assertEqual(avg_5d.dtype, ht.float32)
-        self.assertEqual(avg_5d._DNDarray__array.dtype, torch.float32)
+        self.assertEqual(avg_5d.dtype, ht.float64)
+        self.assertEqual(avg_5d._DNDarray__array.dtype, torch.float64)
         self.assertEqual(avg_5d.split, 0)
 
         # check exceptions
@@ -344,8 +344,8 @@ def test_max(self):
         self.assertIsInstance(maximum_volume, ht.DNDarray)
         self.assertEqual(maximum_volume.shape, (3, 3))
         self.assertEqual(maximum_volume.lshape, (3, 3))
-        self.assertEqual(maximum_volume.dtype, ht.float32)
-        self.assertEqual(maximum_volume._DNDarray__array.dtype, torch.float32)
+        self.assertEqual(maximum_volume.dtype, ht.float64)
+        self.assertEqual(maximum_volume._DNDarray__array.dtype, torch.float64)
         self.assertEqual(maximum_volume.split, None)
 
         # check max over all float elements of split 3d tensor, tuple axis
@@ -356,8 +356,8 @@ def test_max(self):
         self.assertIsInstance(maximum_volume, ht.DNDarray)
         self.assertEqual(maximum_volume.shape, (3,))
         self.assertEqual(maximum_volume.lshape, (3,))
-        self.assertEqual(maximum_volume.dtype, ht.float32)
-        self.assertEqual(maximum_volume._DNDarray__array.dtype, torch.float32)
+        self.assertEqual(maximum_volume.dtype, ht.float64)
+        self.assertEqual(maximum_volume._DNDarray__array.dtype, torch.float64)
         self.assertEqual(maximum_volume.split, 0)
         self.assertTrue((maximum_volume == alt_maximum_volume).all())
 
@@ -368,8 +368,8 @@ def test_max(self):
         self.assertIsInstance(maximum_5d, ht.DNDarray)
         self.assertEqual(maximum_5d.shape, (1, 3, 4, 5))
         self.assertLessEqual(maximum_5d.lshape[1], 3)
-        self.assertEqual(maximum_5d.dtype, ht.float32)
-        self.assertEqual(maximum_5d._DNDarray__array.dtype, torch.float32)
+        self.assertEqual(maximum_5d.dtype, ht.float64)
+        self.assertEqual(maximum_5d._DNDarray__array.dtype, torch.float64)
         self.assertEqual(maximum_5d.split, 0)
 
         # check exceptions
@@ -421,8 +421,8 @@ def test_maximum(self):
         self.assertIsInstance(maximum_volume, ht.DNDarray)
         self.assertEqual(maximum_volume.shape, (size * 12, 3, 3))
         self.assertEqual(maximum_volume.lshape, (size * 12, 3, 3))
-        self.assertEqual(maximum_volume.dtype, ht.float32)
-        self.assertEqual(maximum_volume._DNDarray__array.dtype, torch.float32)
+        self.assertEqual(maximum_volume.dtype, ht.float64)
+        self.assertEqual(maximum_volume._DNDarray__array.dtype, torch.float64)
         self.assertEqual(maximum_volume.split, random_volume_1.split)
 
         # check maximum over float elements of split 3d tensors with different split axis
@@ -433,8 +433,8 @@ def test_maximum(self):
         self.assertIsInstance(maximum_volume_splitdiff, ht.DNDarray)
         self.assertEqual(maximum_volume_splitdiff.shape, (size*3, size*3, 4))
         self.assertEqual(maximum_volume_splitdiff.lshape, (size*3, size*3, 4))
-        self.assertEqual(maximum_volume_splitdiff.dtype, ht.float32)
-        self.assertEqual(maximum_volume_splitdiff._DNDarray__array.dtype, torch.float32)
+        self.assertEqual(maximum_volume_splitdiff.dtype, ht.float64)
+        self.assertEqual(maximum_volume_splitdiff._DNDarray__array.dtype, torch.float64)
         self.assertEqual(maximum_volume_splitdiff.split, 0)
 
         random_volume_1_splitdiff = ht.array(ht.random.randn(size*3, size*3, 4), split=1)
@@ -459,8 +459,8 @@ def test_maximum(self):
         self.assertIsInstance(output, ht.DNDarray)
         self.assertEqual(output.shape, (ht.MPI_WORLD.size * 12, 3, 3))
         self.assertEqual(output.lshape, (ht.MPI_WORLD.size * 12, 3, 3))
-        self.assertEqual(output.dtype, ht.float32)
-        self.assertEqual(output._DNDarray__array.dtype, torch.float32)
+        self.assertEqual(output.dtype, ht.float64)
+        self.assertEqual(output._DNDarray__array.dtype, torch.float64)
         self.assertEqual(output.split, random_volume_1.split)
 
         # check exceptions
@@ -593,8 +593,8 @@ def test_min(self):
         self.assertIsInstance(minimum_volume, ht.DNDarray)
         self.assertEqual(minimum_volume.shape, (3, 3))
         self.assertEqual(minimum_volume.lshape, (3, 3))
-        self.assertEqual(minimum_volume.dtype, ht.float32)
-        self.assertEqual(minimum_volume._DNDarray__array.dtype, torch.float32)
+        self.assertEqual(minimum_volume.dtype, ht.float64)
+        self.assertEqual(minimum_volume._DNDarray__array.dtype, torch.float64)
         self.assertEqual(minimum_volume.split, None)
 
         # check min over all float elements of split 3d tensor, tuple axis
@@ -605,8 +605,8 @@ def test_min(self):
         self.assertIsInstance(minimum_volume, ht.DNDarray)
         self.assertEqual(minimum_volume.shape, (3,))
         self.assertEqual(minimum_volume.lshape, (3,))
-        self.assertEqual(minimum_volume.dtype, ht.float32)
-        self.assertEqual(minimum_volume._DNDarray__array.dtype, torch.float32)
+        self.assertEqual(minimum_volume.dtype, ht.float64)
+        self.assertEqual(minimum_volume._DNDarray__array.dtype, torch.float64)
         self.assertEqual(minimum_volume.split, 0)
         self.assertTrue((minimum_volume == alt_minimum_volume).all())
 
@@ -617,8 +617,8 @@ def test_min(self):
         self.assertIsInstance(minimum_5d, ht.DNDarray)
         self.assertEqual(minimum_5d.shape, (1, 3, 4, 5))
         self.assertLessEqual(minimum_5d.lshape[1], 3)
-        self.assertEqual(minimum_5d.dtype, ht.float32)
-        self.assertEqual(minimum_5d._DNDarray__array.dtype, torch.float32)
+        self.assertEqual(minimum_5d.dtype, ht.float64)
+        self.assertEqual(minimum_5d._DNDarray__array.dtype, torch.float64)
         self.assertEqual(minimum_5d.split, 0)
 
         # check exceptions
@@ -670,8 +670,8 @@ def test_minimum(self):
         self.assertIsInstance(minimum_volume, ht.DNDarray)
         self.assertEqual(minimum_volume.shape, (size * 12, 3, 3))
         self.assertEqual(minimum_volume.lshape, (size * 12, 3, 3))
-        self.assertEqual(minimum_volume.dtype, ht.float32)
-        self.assertEqual(minimum_volume._DNDarray__array.dtype, torch.float32)
+        self.assertEqual(minimum_volume.dtype, ht.float64)
+        self.assertEqual(minimum_volume._DNDarray__array.dtype, torch.float64)
         self.assertEqual(minimum_volume.split, random_volume_1.split)
 
         # check minimum over float elements of split 3d tensors with different split axis
@@ -682,8 +682,8 @@ def test_minimum(self):
         self.assertIsInstance(minimum_volume_splitdiff, ht.DNDarray)
         self.assertEqual(minimum_volume_splitdiff.shape, (size*3, size*3, 4))
         self.assertEqual(minimum_volume_splitdiff.lshape, (size*3, size*3, 4))
-        self.assertEqual(minimum_volume_splitdiff.dtype, ht.float32)
-        self.assertEqual(minimum_volume_splitdiff._DNDarray__array.dtype, torch.float32)
+        self.assertEqual(minimum_volume_splitdiff.dtype, ht.float64)
+        self.assertEqual(minimum_volume_splitdiff._DNDarray__array.dtype, torch.float64)
         self.assertEqual(minimum_volume_splitdiff.split, 0)
 
         random_volume_1_splitdiff = ht.array(ht.random.randn(size*3, size*3, 4), split=1)
@@ -708,8 +708,8 @@ def test_minimum(self):
         self.assertIsInstance(output, ht.DNDarray)
         self.assertEqual(output.shape, (ht.MPI_WORLD.size * 12, 3, 3))
         self.assertEqual(output.lshape, (ht.MPI_WORLD.size * 12, 3, 3))
-        self.assertEqual(output.dtype, ht.float32)
-        self.assertEqual(output._DNDarray__array.dtype, torch.float32)
+        self.assertEqual(output.dtype, ht.float64)
+        self.assertEqual(output._DNDarray__array.dtype, torch.float64)
         self.assertEqual(output.split, random_volume_1.split)
 
         # check exceptions

From 526c029a5ab5156b055710464433cac4431b63b3 Mon Sep 17 00:00:00 2001
From: simon <ubwdr@student.kit.edu>
Date: Thu, 29 Aug 2019 15:18:56 +0200
Subject: [PATCH 08/24] fixed a but in random

---
 heat/core/operations.py | 18 +++++++++++++-----
 heat/core/random.py     |  2 +-
 heat/core/statistics.py |  1 -
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/heat/core/operations.py b/heat/core/operations.py
index 81fd9cfb59..4e2e177e67 100644
--- a/heat/core/operations.py
+++ b/heat/core/operations.py
@@ -216,17 +216,25 @@ def __reduce_op(x, partial_op, reduction_op, **kwargs):
             partial = x._DNDarray__array
             output_shape = x.gshape
             for dim in axis:
-                partial = partial_op(partial, dim=dim, keepdim=True)
-                output_shape = output_shape[:dim] + (1,) + output_shape[dim + 1:]
+                if 0 not in partial.shape:
+                    partial = partial_op(partial, dim=dim, keepdim=True)
+                    output_shape = output_shape[:dim] + (1,) + output_shape[dim + 1:]
+                else:
+                    output_shape = output_shape[:dim] + (0, ) + output_shape[dim + 1:]
+                print('output', output_shape)
         if not keepdim and not len(partial.shape) == 1:
             gshape_losedim = tuple(x.gshape[dim] for dim in range(len(x.gshape)) if dim not in axis)
             lshape_losedim = tuple(x.lshape[dim] for dim in range(len(x.lshape)) if dim not in axis)
+            if 0 in partial.shape:
+                lshape_losedim = (0, )
             output_shape = gshape_losedim
+            print('output_shape', lshape_losedim)
             # Take care of special cases argmin and argmax: keep partial.shape[0]
-            if (0 in axis and partial.shape[0] != 1):
+            if 0 in axis and partial.shape[0] != 1:
                 lshape_losedim = (partial.shape[0],) + lshape_losedim
-            if (not 0 in axis and partial.shape[0] != x.lshape[0]):
+            if 0 not in axis and partial.shape[0] != x.lshape[0]:
                 lshape_losedim = (partial.shape[0],) + lshape_losedim[1:]
+            print('output_shape', lshape_losedim)
             partial = partial.reshape(lshape_losedim)
 
     # Check shape of output buffer, if any
@@ -241,7 +249,7 @@ def __reduce_op(x, partial_op, reduction_op, **kwargs):
 
     # if reduction_op is a Boolean operation, then resulting tensor is bool
     boolean_ops = [MPI.LAND, MPI.LOR, MPI.BAND, MPI.BOR]
-    tensor_type = bool if reduction_op in boolean_ops else partial[0].dtype
+    tensor_type = bool if reduction_op in boolean_ops else partial.dtype
 
     if out is not None:
         out._DNDarray__array = partial
diff --git a/heat/core/random.py b/heat/core/random.py
index 9dbab52e67..0e98d00258 100644
--- a/heat/core/random.py
+++ b/heat/core/random.py
@@ -71,7 +71,7 @@ def __counter_sequence(shape, dtype, split, device, comm):
         raise ValueError('Shape is to big with {} elements'.format(total_elements))
 
     if split is None:
-        values = int(total_elements / 2)
+        values = np.ceil(total_elements / 2)
         even_end = total_elements % 2 == 0
         lslice = slice(None) if even_end else slice(None, -1)
         start = c_1
diff --git a/heat/core/statistics.py b/heat/core/statistics.py
index ae0d70b2a3..ab1b73b6e9 100644
--- a/heat/core/statistics.py
+++ b/heat/core/statistics.py
@@ -390,7 +390,6 @@ def max(x, axis=None, out=None, keepdim=None):
             [12.]])
     """
     def local_max(*args, **kwargs):
-        print('args', *args)
         result = torch.max(*args, **kwargs)
         if isinstance(result, tuple):
             return result[0]

From f53311d214b72a4445617148ea005395d65ba912 Mon Sep 17 00:00:00 2001
From: simon <ubwdr@student.kit.edu>
Date: Mon, 2 Sep 2019 14:02:43 +0200
Subject: [PATCH 09/24] fixed the reduce function max and min

---
 heat/core/operations.py            | 12 +-----
 heat/core/statistics.py            | 64 ++++++++++++++++++++++++++++--
 heat/core/tests/test_statistics.py | 28 +++++++++----
 3 files changed, 82 insertions(+), 22 deletions(-)

diff --git a/heat/core/operations.py b/heat/core/operations.py
index 4e2e177e67..d57c62b83e 100644
--- a/heat/core/operations.py
+++ b/heat/core/operations.py
@@ -216,25 +216,17 @@ def __reduce_op(x, partial_op, reduction_op, **kwargs):
             partial = x._DNDarray__array
             output_shape = x.gshape
             for dim in axis:
-                if 0 not in partial.shape:
-                    partial = partial_op(partial, dim=dim, keepdim=True)
-                    output_shape = output_shape[:dim] + (1,) + output_shape[dim + 1:]
-                else:
-                    output_shape = output_shape[:dim] + (0, ) + output_shape[dim + 1:]
-                print('output', output_shape)
+                partial = partial_op(partial, dim=dim, keepdim=True)
+                output_shape = output_shape[:dim] + (1,) + output_shape[dim + 1:]
         if not keepdim and not len(partial.shape) == 1:
             gshape_losedim = tuple(x.gshape[dim] for dim in range(len(x.gshape)) if dim not in axis)
             lshape_losedim = tuple(x.lshape[dim] for dim in range(len(x.lshape)) if dim not in axis)
-            if 0 in partial.shape:
-                lshape_losedim = (0, )
             output_shape = gshape_losedim
-            print('output_shape', lshape_losedim)
             # Take care of special cases argmin and argmax: keep partial.shape[0]
             if 0 in axis and partial.shape[0] != 1:
                 lshape_losedim = (partial.shape[0],) + lshape_losedim
             if 0 not in axis and partial.shape[0] != x.lshape[0]:
                 lshape_losedim = (partial.shape[0],) + lshape_losedim[1:]
-            print('output_shape', lshape_losedim)
             partial = partial.reshape(lshape_losedim)
 
     # Check shape of output buffer, if any
diff --git a/heat/core/statistics.py b/heat/core/statistics.py
index ab1b73b6e9..19287d7bfa 100644
--- a/heat/core/statistics.py
+++ b/heat/core/statistics.py
@@ -390,9 +390,37 @@ def max(x, axis=None, out=None, keepdim=None):
             [12.]])
     """
     def local_max(*args, **kwargs):
-        result = torch.max(*args, **kwargs)
+        array = args[0]
+        dim = kwargs.get('dim')
+        if 0 in array.shape:
+            # Empty local vector would throw an error in the torch max function
+            if dim == x.split or (dim is None and x.split == 0):
+                # No distributed result
+                out_shape = list(array.shape)
+                empty_dim = next(i for i, d in enumerate(array.shape) if d == 0)
+                out_shape[empty_dim] = 1
+
+                # Lowest possible value should be neutral to the max function
+                if array.dtype is torch.int8:
+                    fill_value = -(1 << 7)
+                elif array.dtype is torch.int16:
+                    fill_value = -(1 << 15)
+                elif array.dtype is torch.int32:
+                    fill_value = -(1 << 31)
+                elif array.dtype is torch.int64:
+                    fill_value = -(1 << 63)
+                else:
+                    fill_value = float('-inf')
+
+                # Create a local result with a "neutral" value that should not affect the global result
+                result = torch.empty(out_shape, dtype=array.dtype).fill_(fill_value)
+            else:
+                # Distributed result: return an empty tensor as the local result
+                result = torch.empty_like(array)
+        else:
+            result = torch.max(*args, **kwargs)
         if isinstance(result, tuple):
-            return result[0]
+            result = result[0]
         return result
 
     return operations.__reduce_op(x, local_max, MPI.MAX, axis=axis, out=out, keepdim=keepdim)
@@ -806,9 +834,37 @@ def min(x, axis=None, out=None, keepdim=None):
     """
 
     def local_min(*args, **kwargs):
-        result = torch.min(*args, **kwargs)
+        array = args[0]
+        dim = kwargs.get('dim')
+        if 0 in array.shape:
+            # Empty local vector would throw an error in the torch min function
+            if dim == x.split or (dim is None and x.split == 0):
+                # No distributed result
+                out_shape = list(array.shape)
+                empty_dim = next(i for i, d in enumerate(array.shape) if d == 0)
+                out_shape[empty_dim] = 1
+
+                # Highest possible value should be neutral to the min function
+                if array.dtype is torch.int8:
+                    fill_value = (1 << 7) - 1
+                elif array.dtype is torch.int16:
+                    fill_value = (1 << 15) - 1
+                elif array.dtype is torch.int32:
+                    fill_value = (1 << 31) - 1
+                elif array.dtype is torch.int64:
+                    fill_value = (1 << 63) - 1
+                else:
+                    fill_value = float('inf')
+
+                # Create a local result with a "neutral" value that should not affect the global result
+                result = torch.empty(out_shape, dtype=array.dtype).fill_(fill_value)
+            else:
+                # Distributed result: return an empty tensor as the local result
+                result = torch.empty_like(array)
+        else:
+            result = torch.min(*args, **kwargs)
         if isinstance(result, tuple):
-            return result[0]
+            result = result[0]
         return result
 
     return operations.__reduce_op(x, local_min, MPI.MIN, axis=axis, out=out, keepdim=keepdim)
diff --git a/heat/core/tests/test_statistics.py b/heat/core/tests/test_statistics.py
index bcf29d2726..6bace2ad7d 100644
--- a/heat/core/tests/test_statistics.py
+++ b/heat/core/tests/test_statistics.py
@@ -43,16 +43,16 @@ def test_argmax(self):
         self.assertTrue((result._DNDarray__array == torch.tensor([19])))
 
         # 2D split tensor, along the axis
-        torch.manual_seed(1)
         data = ht.array(ht.random.randn(4, 5), is_split=0)
         result = ht.argmax(data, axis=1)
+        expected = torch.argmax(data._DNDarray__array, dim=1)
         self.assertIsInstance(result, ht.DNDarray)
         self.assertEqual(result.dtype, ht.int64)
         self.assertEqual(result._DNDarray__array.dtype, torch.int64)
         self.assertEqual(result.shape, (ht.MPI_WORLD.size * 4,))
         self.assertEqual(result.lshape, (4,))
         self.assertEqual(result.split, 0)
-        self.assertTrue((result._DNDarray__array == torch.tensor([4, 4, 2, 4])).all())
+        self.assertTrue((result._DNDarray__array == expected).all())
 
         # 2D split tensor, across the axis
         size = ht.MPI_WORLD.size * 2
@@ -127,16 +127,16 @@ def test_argmin(self):
         self.assertTrue((result._DNDarray__array == data._DNDarray__array.argmin(-1, keepdim=True)).all())
 
         # 2D split tensor, along the axis
-        torch.manual_seed(1)
         data = ht.array(ht.random.randn(4, 5), is_split=0)
         result = ht.argmin(data, axis=1)
+        expected = torch.argmin(data._DNDarray__array, dim=1)
         self.assertIsInstance(result, ht.DNDarray)
         self.assertEqual(result.dtype, ht.int64)
         self.assertEqual(result._DNDarray__array.dtype, torch.int64)
         self.assertEqual(result.shape, (ht.MPI_WORLD.size * 4,))
         self.assertEqual(result.lshape, (4,))
         self.assertEqual(result.split, 0)
-        self.assertTrue((result._DNDarray__array == torch.tensor([3, 1, 1, 3])).all())
+        self.assertTrue((result._DNDarray__array == expected).all())
 
         # 2D split tensor, across the axis
         size = ht.MPI_WORLD.size * 2
@@ -290,8 +290,6 @@ def test_average(self):
         with self.assertRaises(ValueError):
             ht.average(ht_array, axis=-4)
 
-
-
     def test_max(self):
         data = [
             [1,   2,  3],
@@ -355,7 +353,6 @@ def test_max(self):
 
         self.assertIsInstance(maximum_volume, ht.DNDarray)
         self.assertEqual(maximum_volume.shape, (3,))
-        self.assertEqual(maximum_volume.lshape, (3,))
         self.assertEqual(maximum_volume.dtype, ht.float64)
         self.assertEqual(maximum_volume._DNDarray__array.dtype, torch.float64)
         self.assertEqual(maximum_volume.split, 0)
@@ -372,6 +369,14 @@ def test_max(self):
         self.assertEqual(maximum_5d._DNDarray__array.dtype, torch.float64)
         self.assertEqual(maximum_5d.split, 0)
 
+        # Calculating max with empty local vectors works
+        size = ht.MPI_WORLD.size
+        if size > 1:
+            a = ht.arange(size - 1, split=0)
+            res = ht.max(a)
+            expected = torch.tensor([size - 2], dtype=a.dtype.torch_type())
+            self.assertTrue(torch.equal(res._DNDarray__array, expected))
+
         # check exceptions
         with self.assertRaises(TypeError):
             ht_array.max(axis=1.1)
@@ -604,7 +609,6 @@ def test_min(self):
 
         self.assertIsInstance(minimum_volume, ht.DNDarray)
         self.assertEqual(minimum_volume.shape, (3,))
-        self.assertEqual(minimum_volume.lshape, (3,))
         self.assertEqual(minimum_volume.dtype, ht.float64)
         self.assertEqual(minimum_volume._DNDarray__array.dtype, torch.float64)
         self.assertEqual(minimum_volume.split, 0)
@@ -621,6 +625,14 @@ def test_min(self):
         self.assertEqual(minimum_5d._DNDarray__array.dtype, torch.float64)
         self.assertEqual(minimum_5d.split, 0)
 
+        # Calculating min with empty local vectors works
+        size = ht.MPI_WORLD.size
+        if size > 1:
+            a = ht.arange(size - 1, split=0)
+            res = ht.min(a)
+            expected = torch.tensor([0], dtype=a.dtype.torch_type())
+            self.assertTrue(torch.equal(res._DNDarray__array, expected))
+
         # check exceptions
         with self.assertRaises(TypeError):
             ht_array.min(axis=1.1)

From 3d086489362c1c42dc04e733c61352a03e9d1f18 Mon Sep 17 00:00:00 2001
From: simon <ubwdr@student.kit.edu>
Date: Mon, 2 Sep 2019 14:33:31 +0200
Subject: [PATCH 10/24] fixed the kmeans setup to fit the new random module

---
 heat/ml/cluster/kmeans.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/heat/ml/cluster/kmeans.py b/heat/ml/cluster/kmeans.py
index 4f22518d5b..7befa802d2 100644
--- a/heat/ml/cluster/kmeans.py
+++ b/heat/ml/cluster/kmeans.py
@@ -17,8 +17,11 @@ def initialize_centroids(k, dimensions, seed, device):
         # TODO: document me
         # TODO: extend me with further initialization methods
         # zero-centered uniform random distribution in [-1, 1]
-        ht.random.set_gseed(seed)
-        return ht.random.uniform(low=-1.0, high=1.0, size=(1, dimensions, k), device=device)
+        ht.random.seed(seed)
+        rands = ht.random.rand((1, dimensions, k), device=device)
+        # change the range of the values from [0, 1) to [-1, 1)
+        rands = rands * 2 - 1
+        return rands
 
     def fit(self, data):
         # TODO: document me

From d33f743bb1981a11c8b7b0693b8b58d8d597b971 Mon Sep 17 00:00:00 2001
From: simon <ubwdr@student.kit.edu>
Date: Mon, 2 Sep 2019 14:37:16 +0200
Subject: [PATCH 11/24] unit tests now running in kmeans

---
 heat/core/manipulations.py | 2 --
 heat/core/random.py        | 2 --
 heat/ml/cluster/kmeans.py  | 2 +-
 3 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/heat/core/manipulations.py b/heat/core/manipulations.py
index 2830695f7e..029b8527d7 100644
--- a/heat/core/manipulations.py
+++ b/heat/core/manipulations.py
@@ -568,8 +568,6 @@ def sort(a, axis=None, descending=False, out=None):
             second_result[idx_slice] = r_val
             second_indices[idx_slice] = r_ind
 
-        # print('second_result', second_result, 'tmp_indices', second_indices)
-
         second_result, tmp_indices = second_result.sort(dim=0, descending=descending)
         final_result = second_result.transpose(0, axis)
         final_indices = torch.empty_like(second_indices)
diff --git a/heat/core/random.py b/heat/core/random.py
index 0e98d00258..9a768ccddb 100644
--- a/heat/core/random.py
+++ b/heat/core/random.py
@@ -259,7 +259,6 @@ def rand(*args, split=None, device=None, comm=None):
     out : ndarray, shape (d0, d1, ..., dn)
         The uniformly distributed [0.0, 1.0)-bound random values.
     """
-    print('args', args)
     # if args are not set, generate a single sample
     if not args:
         args = (1,)
@@ -396,7 +395,6 @@ def randn(*args, split=None, device=None, comm=None):
             [ 1.3365, -1.5212,  1.4159, -0.1671],
             [ 0.1260,  1.2126, -0.0804,  0.0907]])
     """
-    print('args', args)
     # generate uniformly distributed random numbers first
     normal_tensor = rand(*args, split=split, device=device, comm=comm)
     # convert the the values to a normal distribution using the kundu transform
diff --git a/heat/ml/cluster/kmeans.py b/heat/ml/cluster/kmeans.py
index 7befa802d2..a97982cb0a 100644
--- a/heat/ml/cluster/kmeans.py
+++ b/heat/ml/cluster/kmeans.py
@@ -18,7 +18,7 @@ def initialize_centroids(k, dimensions, seed, device):
         # TODO: extend me with further initialization methods
         # zero-centered uniform random distribution in [-1, 1]
         ht.random.seed(seed)
-        rands = ht.random.rand((1, dimensions, k), device=device)
+        rands = ht.random.rand(1, dimensions, k, device=device)
         # change the range of the values from [0, 1) to [-1, 1)
         rands = rands * 2 - 1
         return rands

From 2d511a5546deaeff5d767131e45623085c1c143c Mon Sep 17 00:00:00 2001
From: simon <ubwdr@student.kit.edu>
Date: Fri, 6 Sep 2019 07:14:44 +0200
Subject: [PATCH 12/24] reduced the number of iterations for the threefry
 algorithm

---
 heat/core/random.py            | 18 +++++++++---------
 heat/core/tests/test_random.py | 20 ++++++++++++++++++--
 2 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/heat/core/random.py b/heat/core/random.py
index 9a768ccddb..5dc98f39c1 100644
--- a/heat/core/random.py
+++ b/heat/core/random.py
@@ -564,15 +564,15 @@ def __threefry64(X_0, X_1):
     X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 48); X_1 ^= X_0  # round 5
     X_0 += X_1; X_1 = (X_1 << 32) | (X_1 >> 32); X_1 ^= X_0  # round 6
     X_0 += X_1; X_1 = (X_1 << 24) | (X_1 >> 40); X_1 ^= X_0  # round 7
-    X_0 += X_1; X_1 = (X_1 << 21) | (X_1 >> 43); X_1 ^= X_0  # round 8
-
-    # inject key
-    X_0 += ks_2; X_1 += (ks_0 + 2)
-
-    X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 48); X_1 ^= X_0  # round 9
-    X_0 += X_1; X_1 = (X_1 << 42) | (X_1 >> 22); X_1 ^= X_0  # round 10
-    X_0 += X_1; X_1 = (X_1 << 12) | (X_1 >> 52); X_1 ^= X_0  # round 11
-    X_0 += X_1; X_1 = (X_1 << 31) | (X_1 >> 33); X_1 ^= X_0  # round 12
+    # X_0 += X_1; X_1 = (X_1 << 21) | (X_1 >> 43); X_1 ^= X_0  # round 8
+    #
+    # # inject key
+    # X_0 += ks_2; X_1 += (ks_0 + 2)
+    #
+    # X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 48); X_1 ^= X_0  # round 9
+    # X_0 += X_1; X_1 = (X_1 << 42) | (X_1 >> 22); X_1 ^= X_0  # round 10
+    # X_0 += X_1; X_1 = (X_1 << 12) | (X_1 >> 52); X_1 ^= X_0  # round 11
+    # X_0 += X_1; X_1 = (X_1 << 31) | (X_1 >> 33); X_1 ^= X_0  # round 12
 
     # inject key
     X_0 += ks_0; X_1 += (ks_1 + 3)
diff --git a/heat/core/tests/test_random.py b/heat/core/tests/test_random.py
index 68ebbfe593..1932067f98 100644
--- a/heat/core/tests/test_random.py
+++ b/heat/core/tests/test_random.py
@@ -1,7 +1,9 @@
+import time
 import unittest
 
 import heat as ht
 import numpy as np
+import matplotlib.pyplot as plt
 
 
 class TestTensor(unittest.TestCase):
@@ -82,7 +84,21 @@ def test_rand(self):
         self.assertTrue((counts == 1).all())
 
     def test_randint(self):
-        pass
+        a = ht.random.rand(1000, 1000)
+        b = a.numpy()
+        plt.imshow(b)
+        plt.gray()
+        plt.show()
 
     def test_randn(self):
-        pass
+        t1 = time.time()
+        a = ht.random.rand(1000, 1000, split=1)
+        t2 = time.time()
+        print('time taken', t2-t1)
+        self.fail()
+
+    def test_read(self):
+        a = np.load('../../../all_rounds.npy')
+        plt.imshow(a)
+        plt.gray()
+        plt.show()
\ No newline at end of file

From e9d2f9b5eacaf69f4c7f58d88b13c03667861b1a Mon Sep 17 00:00:00 2001
From: simon <ubwdr@student.kit.edu>
Date: Fri, 6 Sep 2019 11:18:41 +0200
Subject: [PATCH 13/24] Fixed the randn and randint functions and added test
 cases for both of them

---
 heat/core/manipulation.py      |   0
 heat/core/random.py            |  14 +++--
 heat/core/tests/test_random.py | 109 ++++++++++++++++++++++++++++-----
 3 files changed, 101 insertions(+), 22 deletions(-)
 delete mode 100644 heat/core/manipulation.py

diff --git a/heat/core/manipulation.py b/heat/core/manipulation.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/heat/core/random.py b/heat/core/random.py
index 5dc98f39c1..87246a0b22 100644
--- a/heat/core/random.py
+++ b/heat/core/random.py
@@ -320,7 +320,7 @@ def randint(low, high=None, size=None, dtype=None, split=None, device=None, comm
         low, high = int(low), int(high)
     if low >= high:
         raise ValueError('low >= high')
-    span = high - low + 1
+    span = high - low
 
     # sanitize shape
     if size is None:
@@ -333,7 +333,7 @@ def randint(low, high=None, size=None, dtype=None, split=None, device=None, comm
     if dtype is None:
         dtype = types.int64
     dtype = types.canonical_heat_type(dtype)
-    if dtype is not types.int64 and dtype is not types.int32:
+    if dtype not in [types.int64, types.int32]:
         raise ValueError('Unsupported dtype for randint')
     torch_dtype = dtype.torch_type()
 
@@ -341,16 +341,15 @@ def randint(low, high=None, size=None, dtype=None, split=None, device=None, comm
     split = stride_tricks.sanitize_axis(shape, split)
     device = devices.sanitize_device(device)
     comm = communication.sanitize_comm(comm)
-
     # generate the random sequence
-    x_0, x_1, lshape = __counter_sequence(shape, torch_dtype, split, device, comm)
+    x_0, x_1, lshape, lslice = __counter_sequence(shape, dtype.torch_type(), split, device, comm)
     if torch_dtype is torch.int32:
         x_0, x_1 = __threefry32(x_0, x_1)
     else:
         x_0, x_1 = __threefry64(x_0, x_1)
 
     # stack the resulting sequence and normalize to given range
-    values = torch.stack([x_0, x_1], dim=1).reshape(lshape)
+    values = torch.stack([x_0, x_1], dim=1).flatten()[lslice].reshape(lshape)
     # ATTENTION: this is biased and known, bias-free rejection sampling is difficult to do in parallel
     values = (values.abs_() % span) + low
 
@@ -563,7 +562,10 @@ def __threefry64(X_0, X_1):
 
     X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 48); X_1 ^= X_0  # round 5
     X_0 += X_1; X_1 = (X_1 << 32) | (X_1 >> 32); X_1 ^= X_0  # round 6
-    X_0 += X_1; X_1 = (X_1 << 24) | (X_1 >> 40); X_1 ^= X_0  # round 7
+
+    # With half of the iterations the "randomness" is already achieved and computation time is halved
+
+    # X_0 += X_1; X_1 = (X_1 << 24) | (X_1 >> 40); X_1 ^= X_0  # round 7
     # X_0 += X_1; X_1 = (X_1 << 21) | (X_1 >> 43); X_1 ^= X_0  # round 8
     #
     # # inject key
diff --git a/heat/core/tests/test_random.py b/heat/core/tests/test_random.py
index 1932067f98..f91c8c03f0 100644
--- a/heat/core/tests/test_random.py
+++ b/heat/core/tests/test_random.py
@@ -83,22 +83,99 @@ def test_rand(self):
         _, counts = np.unique(c, return_counts=True)
         self.assertTrue((counts == 1).all())
 
+        # Values should be spread evenly across the range [0, 1)
+        mean = np.mean(c)
+        median = np.median(c)
+        std = np.std(c)
+        self.assertTrue(0.49 < mean < 0.51)
+        self.assertTrue(0.49 < median < 0.51)
+        self.assertTrue(std < 0.3)
+        self.assertTrue(((0 <= c) & (c < 1)).all())
+
     def test_randint(self):
-        a = ht.random.rand(1000, 1000)
-        b = a.numpy()
-        plt.imshow(b)
-        plt.gray()
-        plt.show()
+        # Checked that the random values are in the correct range
+        a = ht.random.randint(low=0, high=10, size=(10, 10))
+        a = a.numpy()
+        self.assertTrue(((0 <= a) & (a < 10)).all())
+
+        a = ht.random.randint(low=100000, high=150000, size=(31, 25, 11), split=2)
+        a = a.numpy()
+        self.assertTrue(((100000 <= a) & (a < 150000)).all())
+
+        # For the range [0, 1) only the value 0 is allowed
+        a = ht.random.randint(1, size=(10, ), split=0)
+        b = ht.zeros((10, ), dtype=ht.int64, split=0)
+        self.assertTrue(ht.equal(a, b))
+
+        # Two arrays with the same seed and same number of elements have the same random values
+        ht.random.seed(13579)
+        shape = (15, 13, 9, 21, 65)
+        a = ht.random.randint(15, 100, size=shape, split=0)
+        a = a.numpy().flatten()
+
+        ht.random.seed(13579)
+        elements = np.prod(shape)
+        b = ht.random.randint(low=15, high=100, size=(elements, ))
+        b = b.numpy()
+        self.assertTrue(np.array_equal(a, b))
+
+        # Two arrays with the same seed and shape have identical values
+        ht.random.seed(13579)
+        a = ht.random.randint(10000, size=shape, split=2)
+        a = a.numpy()
+
+        ht.random.seed(13579)
+        b = ht.random.randint(low=0, high=10000, size=shape, split=2)
+        b = b.numpy()
+
+        self.assertTrue(np.array_equal(a, b))
+        mean = np.mean(a)
+        median = np.median(a)
+        std = np.std(a)
+
+        # Mean and median should be in the center while the std is very high due to an even distribution
+        self.assertTrue(4900 < mean < 5100)
+        self.assertTrue(4900 < median < 5100)
+        self.assertTrue(std < 2900)
 
     def test_randn(self):
-        t1 = time.time()
-        a = ht.random.rand(1000, 1000, split=1)
-        t2 = time.time()
-        print('time taken', t2-t1)
-        self.fail()
-
-    def test_read(self):
-        a = np.load('../../../all_rounds.npy')
-        plt.imshow(a)
-        plt.gray()
-        plt.show()
\ No newline at end of file
+        # Test that the random values have the correct distribution
+        ht.random.seed(54321)
+        shape = (5, 10, 13, 23, 15, 20)
+        a = ht.random.randn(*shape, split=0)
+        a = a.numpy()
+        mean = np.mean(a)
+        median = np.median(a)
+        std = np.std(a)
+        self.assertTrue(-0.01 < mean < 0.01)
+        self.assertTrue(-0.01 < median < 0.01)
+        self.assertTrue(0.99 < std < 1.01)
+
+        # Compare to a second array with a different shape but same number of elements and same seed
+        ht.random.seed(54321)
+        elements = np.prod(shape)
+        b = ht.random.randn(elements, split=0)
+        b = b.numpy()
+        a = a.flatten()
+        self.assertTrue(np.array_equal(a, b))
+
+        # Creating the same array two times without resetting seed results in different elements
+        c = ht.random.randn(elements, split=0)
+        c = c.numpy()
+        self.assertEqual(c.shape, b.shape)
+        self.assertFalse(np.array_equal(b, c))
+
+        # All the created values should be different
+        d = np.concatenate((b, c))
+        _, counts = np.unique(d, return_counts=True)
+        self.assertTrue((counts == 1).all())
+
+        # Two arrays are the same for same seed and split-axis != 0
+        ht.random.seed(12345)
+        a = ht.random.randn(*shape, split=5)
+        ht.random.seed(12345)
+        b = ht.random.randn(*shape, split=5)
+        self.assertTrue(ht.equal(a, b))
+        a = a.numpy()
+        b = b.numpy()
+        self.assertTrue(np.array_equal(a, b))

From 2d54f2508a8f1019245efde601d1f6f1d870dfb1 Mon Sep 17 00:00:00 2001
From: simon <ubwdr@student.kit.edu>
Date: Fri, 6 Sep 2019 12:31:55 +0200
Subject: [PATCH 14/24] removed unnecessary imports

---
 heat/core/tests/test_random.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/heat/core/tests/test_random.py b/heat/core/tests/test_random.py
index f91c8c03f0..7f99d47ef7 100644
--- a/heat/core/tests/test_random.py
+++ b/heat/core/tests/test_random.py
@@ -1,9 +1,7 @@
-import time
 import unittest
 
 import heat as ht
 import numpy as np
-import matplotlib.pyplot as plt
 
 
 class TestTensor(unittest.TestCase):

From 0d6d0e6c80cd3cb298e9c9c316dc1c5cee8310af Mon Sep 17 00:00:00 2001
From: simon <ubwdr@student.kit.edu>
Date: Fri, 6 Sep 2019 15:14:32 +0200
Subject: [PATCH 15/24] added more negative test cases

---
 heat/core/tests/test_random.py | 37 ++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/heat/core/tests/test_random.py b/heat/core/tests/test_random.py
index 7f99d47ef7..aa00d0beb5 100644
--- a/heat/core/tests/test_random.py
+++ b/heat/core/tests/test_random.py
@@ -90,6 +90,19 @@ def test_rand(self):
         self.assertTrue(std < 0.3)
         self.assertTrue(((0 <= c) & (c < 1)).all())
 
+        # No arguments work correctly
+        ht.random.seed(seed)
+        a = ht.random.rand()
+        ht.random.seed(seed)
+        b = ht.random.rand(1)
+        self.assertTrue(ht.equal(a, b))
+
+        # To big arrays cant be created
+        with self.assertRaises(ValueError):
+            ht.random.randn(0xffffffffffffffff * 2 + 1, comm=ht.MPI_WORLD)
+        with self.assertRaises(ValueError):
+            ht.random.rand(3, 2, -2, 5, split=1, comm=ht.MPI_WORLD)
+
     def test_randint(self):
         # Checked that the random values are in the correct range
         a = ht.random.randint(low=0, high=10, size=(10, 10))
@@ -126,6 +139,10 @@ def test_randint(self):
         b = ht.random.randint(low=0, high=10000, size=shape, split=2)
         b = b.numpy()
 
+        ht.random.seed(13579)
+        c = ht.random.randint(low=0, high=10000)
+        self.assertTrue(np.equal(b[0, 0, 0, 0, 0], c))
+
         self.assertTrue(np.array_equal(a, b))
         mean = np.mean(a)
         median = np.median(a)
@@ -136,6 +153,13 @@ def test_randint(self):
         self.assertTrue(4900 < median < 5100)
         self.assertTrue(std < 2900)
 
+        with self.assertRaises(ValueError):
+            ht.random.randint(5, 5, size=(10, 10), split=0)
+        with self.assertRaises(ValueError):
+            ht.random.randint(low=0, high=10, size=(3, -4))
+        with self.assertRaises(ValueError):
+            ht.random.randint(low=0, high=10, size=(15, ), dtype=ht.float32)
+
     def test_randn(self):
         # Test that the random values have the correct distribution
         ht.random.seed(54321)
@@ -177,3 +201,16 @@ def test_randn(self):
         a = a.numpy()
         b = b.numpy()
         self.assertTrue(np.array_equal(a, b))
+
+    def test_set_state(self):
+        ht.random.set_state(('Threefry', 12345, 0xfff))
+        self.assertEqual(ht.random.get_state(), ('Threefry', 12345, 0xfff, 0, 0.0))
+
+        ht.random.set_state(('Threefry', 55555, 0xffffffffffffff, 'for', 'compatibility'))
+        self.assertEqual(ht.random.get_state(), ('Threefry', 55555, 0xffffffffffffff, 0, 0.0))
+
+        with self.assertRaises(ValueError):
+            ht.random.set_state(('Thrfry', 12, 0xf))
+        with self.assertRaises(ValueError):
+            ht.random.set_state(('Threefry', 12345))
+

From 8b46d135f9739ca83a1643f6b1ef1bc2b192c9ed Mon Sep 17 00:00:00 2001
From: simon <ubwdr@student.kit.edu>
Date: Fri, 6 Sep 2019 15:15:16 +0200
Subject: [PATCH 16/24] fixed a bug

---
 heat/core/tests/test_random.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/heat/core/tests/test_random.py b/heat/core/tests/test_random.py
index aa00d0beb5..1b2e361c05 100644
--- a/heat/core/tests/test_random.py
+++ b/heat/core/tests/test_random.py
@@ -211,6 +211,6 @@ def test_set_state(self):
 
         with self.assertRaises(ValueError):
             ht.random.set_state(('Thrfry', 12, 0xf))
-        with self.assertRaises(ValueError):
+        with self.assertRaises(TypeError):
             ht.random.set_state(('Threefry', 12345))
 

From 2ad7aa937ead63af0ab5ddca8eb8be16ae95146d Mon Sep 17 00:00:00 2001
From: simon <ubwdr@student.kit.edu>
Date: Fri, 6 Sep 2019 15:18:12 +0200
Subject: [PATCH 17/24] renewed the function description

---
 heat/core/random.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/heat/core/random.py b/heat/core/random.py
index 87246a0b22..c948bb126d 100644
--- a/heat/core/random.py
+++ b/heat/core/random.py
@@ -67,6 +67,7 @@ def __counter_sequence(shape, dtype, split, device, comm):
         c_1 = __counter & max_count
 
     total_elements = np.prod(shape)
+    print('total', hex(total_elements), 'max', hex(2*max_count))
     if total_elements > 2 * max_count:
         raise ValueError('Shape is to big with {} elements'.format(total_elements))
 
@@ -287,7 +288,8 @@ def randint(low, high=None, size=None, dtype=None, split=None, device=None, comm
     """
     Random values in a given shape.
 
-    Create a tensor of the given shape and populate it with random samples from a uniform distribution over [0, 1).
+    Create a tensor of the given shape and populate it with random integer samples from a uniform distribution over
+    [low, high) or [0, low) if high is not provided.
 
     Parameters
     ----------

From 1471d901d0bab5d1082baa0205f6cd084684111f Mon Sep 17 00:00:00 2001
From: simon <ubwdr@student.kit.edu>
Date: Mon, 9 Sep 2019 13:23:26 +0200
Subject: [PATCH 18/24] implemented rand for float32

---
 heat/core/random.py            | 34 ++++++++++++++++---------
 heat/core/tests/test_random.py | 45 +++++++++++++++++++++++++++++++++-
 2 files changed, 67 insertions(+), 12 deletions(-)

diff --git a/heat/core/random.py b/heat/core/random.py
index c948bb126d..1c9f49310c 100644
--- a/heat/core/random.py
+++ b/heat/core/random.py
@@ -67,7 +67,6 @@ def __counter_sequence(shape, dtype, split, device, comm):
         c_1 = __counter & max_count
 
     total_elements = np.prod(shape)
-    print('total', hex(total_elements), 'max', hex(2*max_count))
     if total_elements > 2 * max_count:
         raise ValueError('Shape is to big with {} elements'.format(total_elements))
 
@@ -152,8 +151,8 @@ def __counter_sequence(shape, dtype, split, device, comm):
     # Correctly increase the counter variable
     used_values = int(np.ceil(total_elements / 2))
     # Increase counter but not over 128 bit
-    tmp_counter += used_values & 0xffffffffffffffffffffffffffffffff  # 128bit mask
-    __counter = tmp_counter
+    tmp_counter += used_values
+    __counter = tmp_counter & 0xffffffffffffffffffffffffffffffff  # 128bit mask
 
     return x_0, x_1, lshape, lslice
 
@@ -237,7 +236,7 @@ def __kundu_transform(values):
     return (torch.log(-torch.log(1 - values ** 0.0775)) - 1.0821) * __KUNDU_INVERSE
 
 
-def rand(*args, split=None, device=None, comm=None):
+def rand(*args, dtype=types.float64, split=None, device=None, comm=None):
     """
     Random values in a given shape.
 
@@ -275,13 +274,23 @@ def rand(*args, split=None, device=None, comm=None):
     comm = communication.sanitize_comm(comm)
 
     # generate the random sequence
-    x_0, x_1, lshape, lslice = __counter_sequence(shape, torch.int64, split, device, comm)
-    x_0, x_1 = __threefry64(x_0, x_1)
+    if dtype == types.float32:
+        x_0, x_1, lshape, lslice = __counter_sequence(shape, torch.int32, split, device, comm)
+        x_0, x_1 = __threefry32(x_0, x_1)
+
+        # combine the values into one tensor and convert them to floats
+        values = __int32_to_float32(torch.stack([x_0, x_1], dim=1).flatten()[lslice]).reshape(lshape)
+    elif dtype == types.float64:
+        x_0, x_1, lshape, lslice = __counter_sequence(shape, torch.int64, split, device, comm)
+        x_0, x_1 = __threefry64(x_0, x_1)
 
-    # combine the values into one tensor and convert them to floats
-    values = __int64_to_float64(torch.stack([x_0, x_1], dim=1).flatten()[lslice]).reshape(lshape)
+        # combine the values into one tensor and convert them to floats
+        values = __int64_to_float64(torch.stack([x_0, x_1], dim=1).flatten()[lslice]).reshape(lshape)
+    else:
+        # Unsupported type
+        raise ValueError('dtype is none of ht.float32 or ht.float64 but was {}'.format(dtype))
 
-    return dndarray.DNDarray(values, shape, types.float64, split, device, comm)
+    return dndarray.DNDarray(values, shape, dtype, split, device, comm)
 
 
 def randint(low, high=None, size=None, dtype=None, split=None, device=None, comm=None):
@@ -479,9 +488,12 @@ def __threefry32(X_0, X_1):
     """
     samples = len(X_0)
 
+    # Seed is > 32 bit
+    seed_32 = __seed & 0xffffffff
+
     # set up key buffer
-    ks_0 = torch.full((samples,), __seed, dtype=torch.int32)
-    ks_1 = torch.full((samples,), __seed, dtype=torch.int32)
+    ks_0 = torch.full((samples,), seed_32, dtype=torch.int32)
+    ks_1 = torch.full((samples,), seed_32, dtype=torch.int32)
     ks_2 = torch.full((samples,), 466688986, dtype=torch.int32)
     ks_2 ^= ks_0
     ks_2 ^= ks_0
diff --git a/heat/core/tests/test_random.py b/heat/core/tests/test_random.py
index 1b2e361c05..4f3c121944 100644
--- a/heat/core/tests/test_random.py
+++ b/heat/core/tests/test_random.py
@@ -4,7 +4,7 @@
 import numpy as np
 
 
-class TestTensor(unittest.TestCase):
+class TestRandom(unittest.TestCase):
     def test_rand(self):
         # int64 tests
 
@@ -103,6 +103,49 @@ def test_rand(self):
         with self.assertRaises(ValueError):
             ht.random.rand(3, 2, -2, 5, split=1, comm=ht.MPI_WORLD)
 
+        # 32 Bit tests
+        ht.random.seed(9876)
+        shape = (13, 43, 13, 23)
+        a = ht.random.rand(*shape, dtype=ht.float32, split=0, comm=ht.MPI_WORLD)
+        self.assertEqual(a.dtype, ht.float32)
+
+        ht.random.seed(9876)
+        b = ht.random.rand(np.prod(shape), dtype=ht.float32, comm=ht.MPI_WORLD)
+        a = a.numpy().flatten()
+        b = b._DNDarray__array.numpy()
+        self.assertTrue(np.array_equal(a, b))
+
+        a = ht.random.rand(21, 16, 17, 21, dtype=ht.float32, split=2, comm=ht.MPI_WORLD)
+        b = ht.random.rand(15, 11, 19, 31, dtype=ht.float32, split=0, comm=ht.MPI_WORLD)
+        a = a.numpy().flatten()
+        b = b.numpy().flatten()
+        c = np.concatenate((a, b))
+
+        _, counts = np.unique(c, return_counts=True)
+        # Values somehow repeat quite often (bad key or shifts?)
+        # self.assertTrue((counts == 1).all())  # TODO fails
+
+        # Values should be spread evenly across the range [0, 1)
+        mean = np.mean(c)
+        median = np.median(c)
+        std = np.std(c)
+        self.assertTrue(0.49 < mean < 0.51)
+        self.assertTrue(0.49 < median < 0.51)
+        self.assertTrue(std < 0.3)
+        self.assertTrue(((0 <= c) & (c < 1)).all())
+
+        ht.random.seed(11111)
+        a = ht.random.rand(12, 32, 44, split=1, dtype=ht.float32, comm=ht.MPI_WORLD).numpy()
+        # Overflow reached
+        ht.random.set_state(('Threefry', 11111, 0x10000000000000000))
+        b = ht.random.rand(12, 32, 44, split=1, dtype=ht.float32, comm=ht.MPI_WORLD).numpy()
+        self.assertTrue(np.array_equal(a, b))
+
+        ht.random.set_state(('Threefry', 11111, 0x100000000))
+        c = ht.random.rand(12, 32, 44, split=1, dtype=ht.float32, comm=ht.MPI_WORLD).numpy()
+        self.assertFalse(np.array_equal(a, c))
+        self.assertFalse(np.array_equal(b, c))
+
     def test_randint(self):
         # Checked that the random values are in the correct range
         a = ht.random.randint(low=0, high=10, size=(10, 10))

From 47a781e47a5e2b63cff77e34f4bf56dff857c468 Mon Sep 17 00:00:00 2001
From: simon <ubwdr@student.kit.edu>
Date: Mon, 9 Sep 2019 13:42:51 +0200
Subject: [PATCH 19/24] added test cases for randint with int32

---
 heat/core/random.py            |  2 +-
 heat/core/tests/test_random.py | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/heat/core/random.py b/heat/core/random.py
index 1c9f49310c..f6e941d80f 100644
--- a/heat/core/random.py
+++ b/heat/core/random.py
@@ -356,7 +356,7 @@ def randint(low, high=None, size=None, dtype=None, split=None, device=None, comm
     x_0, x_1, lshape, lslice = __counter_sequence(shape, dtype.torch_type(), split, device, comm)
     if torch_dtype is torch.int32:
         x_0, x_1 = __threefry32(x_0, x_1)
-    else:
+    else:  # torch.int64
         x_0, x_1 = __threefry64(x_0, x_1)
 
     # stack the resulting sequence and normalize to given range
diff --git a/heat/core/tests/test_random.py b/heat/core/tests/test_random.py
index 4f3c121944..d87e7715b2 100644
--- a/heat/core/tests/test_random.py
+++ b/heat/core/tests/test_random.py
@@ -203,6 +203,38 @@ def test_randint(self):
         with self.assertRaises(ValueError):
             ht.random.randint(low=0, high=10, size=(15, ), dtype=ht.float32)
 
+        # int32 tests
+        ht.random.seed(4545)
+        a = ht.random.randint(50, 1000, size=(13, 45), dtype=ht.int32, split=0, comm=ht.MPI_WORLD)
+        ht.random.set_state(('Threefry', 4545, 0x10000000000000000))
+        b = ht.random.randint(50, 1000, size=(13, 45), dtype=ht.int32, split=0, comm=ht.MPI_WORLD)
+
+        self.assertEqual(a.dtype, ht.int32)
+        self.assertEqual(b.dtype, ht.int32)
+        a = a.numpy()
+        b = b.numpy()
+        self.assertTrue(np.array_equal(a, b))
+        self.assertTrue(((50 <= a) & (a < 1000)).all())
+        self.assertTrue(((50 <= b) & (b < 1000)).all())
+
+        c = ht.random.randint(50, 1000, size=(13, 45), dtype=ht.int32, split=0, comm=ht.MPI_WORLD)
+        c = c.numpy()
+        self.assertFalse(np.array_equal(a, c))
+        self.assertFalse(np.array_equal(b, c))
+        self.assertTrue(((50 <= c) & (c < 1000)).all())
+
+        ht.random.seed(0xfffffff)
+        a = ht.random.randint(10000, size=(123, 42, 13, 21), split=3, dtype=ht.int32, comm=ht.MPI_WORLD)
+        a = a.numpy()
+        mean = np.mean(a)
+        median = np.median(a)
+        std = np.std(a)
+
+        # Mean and median should be in the center while the std is very high due to an even distribution
+        self.assertTrue(4900 < mean < 5100)
+        self.assertTrue(4900 < median < 5100)
+        self.assertTrue(std < 2900)
+
     def test_randn(self):
         # Test that the random values have the correct distribution
         ht.random.seed(54321)

From 4f14b43a289b060bb0b7791a349d53e6c6b03122 Mon Sep 17 00:00:00 2001
From: simon <ubwdr@student.kit.edu>
Date: Mon, 9 Sep 2019 14:05:03 +0200
Subject: [PATCH 20/24] added tests for randn with float32

---
 heat/core/random.py            | 16 ++++++++++------
 heat/core/tests/test_random.py | 35 +++++++++++++++++++++++++++++++++-
 2 files changed, 44 insertions(+), 7 deletions(-)

diff --git a/heat/core/random.py b/heat/core/random.py
index f6e941d80f..fa0eb4fbb3 100644
--- a/heat/core/random.py
+++ b/heat/core/random.py
@@ -247,6 +247,8 @@ def rand(*args, dtype=types.float64, split=None, device=None, comm=None):
     d0, d1, …, dn : int, optional
         The dimensions of the returned array, should all be positive. If no argument is given a single random samples is
         generated.
+    dtype: ht.types, optional
+        The datatype of the returned values. Has to be one of [ht.float32, ht.float64]. Default is ht.float64.
     split: int, optional
         The axis along which the array is split and distributed, defaults to None (no distribution).
     device : str or None, optional
@@ -256,7 +258,7 @@ def rand(*args, dtype=types.float64, split=None, device=None, comm=None):
 
     Returns
     -------
-    out : ndarray, shape (d0, d1, ..., dn)
+    out : ht.dndarray, shape (d0, d1, ..., dn)
         The uniformly distributed [0.0, 1.0)-bound random values.
     """
     # if args are not set, generate a single sample
@@ -321,7 +323,7 @@ def randint(low, high=None, size=None, dtype=None, split=None, device=None, comm
 
     Returns
     -------
-    out : ndarray, shape (d0, d1, ..., dn)
+    out : ht.dndarray, shape (d0, d1, ..., dn)
         The uniformly distributed [0.0, 1.0)-bound random values.
     """
     # determine range bounds
@@ -367,7 +369,7 @@ def randint(low, high=None, size=None, dtype=None, split=None, device=None, comm
     return dndarray.DNDarray(values, shape, dtype, split, device, comm)
 
 
-def randn(*args, split=None, device=None, comm=None):
+def randn(*args, dtype=types.float64, split=None, device=None, comm=None):
     """
     Returns a tensor filled with random numbers from a standard normal distribution with zero mean and variance of one.
 
@@ -375,6 +377,8 @@ def randn(*args, split=None, device=None, comm=None):
     ----------
     d0, d1, …, dn : int, optional
         The dimensions of the returned array, should be all positive.
+    dtype: ht.types, optional
+        The datatype of the returned values. Has to be one of [ht.float32, ht.float64]. Default is ht.float64.
     split: int, optional
         The axis along which the array is split and distributed, defaults to None (no distribution).
     device : str or None, optional
@@ -384,8 +388,8 @@ def randn(*args, split=None, device=None, comm=None):
 
     Returns
     -------
-    broadcast_shape : tuple of ints
-        the broadcast shape
+    out : ht.dndarray, shape (d0, d1, ..., dn)
+        The normal distributed random values.
 
     Raises
     -------
@@ -406,7 +410,7 @@ def randn(*args, split=None, device=None, comm=None):
             [ 0.1260,  1.2126, -0.0804,  0.0907]])
     """
     # generate uniformly distributed random numbers first
-    normal_tensor = rand(*args, split=split, device=device, comm=comm)
+    normal_tensor = rand(*args, dtype=dtype, split=split, device=device, comm=comm)
     # convert the the values to a normal distribution using the kundu transform
     normal_tensor._DNDarray__array = __kundu_transform(normal_tensor._DNDarray__array)
 
diff --git a/heat/core/tests/test_random.py b/heat/core/tests/test_random.py
index d87e7715b2..359b9d6241 100644
--- a/heat/core/tests/test_random.py
+++ b/heat/core/tests/test_random.py
@@ -1,5 +1,7 @@
 import unittest
 
+import torch
+
 import heat as ht
 import numpy as np
 
@@ -12,10 +14,12 @@ def test_rand(self):
         seed = 12345
         ht.random.seed(seed)
         a = ht.random.rand(2, 5, 7, 3, split=0, comm=ht.MPI_WORLD)
+        self.assertEqual(a.dtype, ht.float64)
+        self.assertEqual(a._DNDarray__array.dtype, torch.float64)
         b = ht.random.rand(2, 5, 7, 3, split=0, comm=ht.MPI_WORLD)
         self.assertFalse(ht.equal(a, b))
         ht.random.seed(seed)
-        c = ht.random.rand(2, 5, 7, 3, split=0, comm=ht.MPI_WORLD)
+        c = ht.random.rand(2, 5, 7, 3, dtype=ht.float64, split=0, comm=ht.MPI_WORLD)
         self.assertTrue(ht.equal(a, c))
 
         # Random numbers with overflow
@@ -25,6 +29,7 @@ def test_rand(self):
         b = ht.random.rand(2, 44, split=0, comm=ht.MPI_WORLD)
         a = a.numpy().flatten()
         b = b.numpy().flatten()
+        self.assertEqual(a.dtype, np.float64)
         self.assertTrue(np.array_equal(a[32:], b))
 
         # Check that random numbers don't repeat after first overflow
@@ -108,12 +113,14 @@ def test_rand(self):
         shape = (13, 43, 13, 23)
         a = ht.random.rand(*shape, dtype=ht.float32, split=0, comm=ht.MPI_WORLD)
         self.assertEqual(a.dtype, ht.float32)
+        self.assertEqual(a._DNDarray__array.dtype, torch.float32)
 
         ht.random.seed(9876)
         b = ht.random.rand(np.prod(shape), dtype=ht.float32, comm=ht.MPI_WORLD)
         a = a.numpy().flatten()
         b = b._DNDarray__array.numpy()
         self.assertTrue(np.array_equal(a, b))
+        self.assertEqual(a.dtype, np.float32)
 
         a = ht.random.rand(21, 16, 17, 21, dtype=ht.float32, split=2, comm=ht.MPI_WORLD)
         b = ht.random.rand(15, 11, 19, 31, dtype=ht.float32, split=0, comm=ht.MPI_WORLD)
@@ -149,6 +156,7 @@ def test_rand(self):
     def test_randint(self):
         # Checked that the random values are in the correct range
         a = ht.random.randint(low=0, high=10, size=(10, 10))
+        self.assertEqual(a.dtype, ht.int64)
         a = a.numpy()
         self.assertTrue(((0 <= a) & (a < 10)).all())
 
@@ -210,9 +218,11 @@ def test_randint(self):
         b = ht.random.randint(50, 1000, size=(13, 45), dtype=ht.int32, split=0, comm=ht.MPI_WORLD)
 
         self.assertEqual(a.dtype, ht.int32)
+        self.assertEqual(a._DNDarray__array.dtype, torch.int32)
         self.assertEqual(b.dtype, ht.int32)
         a = a.numpy()
         b = b.numpy()
+        self.assertEqual(a.dtype, np.int32)
         self.assertTrue(np.array_equal(a, b))
         self.assertTrue(((50 <= a) & (a < 1000)).all())
         self.assertTrue(((50 <= b) & (b < 1000)).all())
@@ -240,6 +250,7 @@ def test_randn(self):
         ht.random.seed(54321)
         shape = (5, 10, 13, 23, 15, 20)
         a = ht.random.randn(*shape, split=0)
+        self.assertEqual(a.dtype, ht.float64)
         a = a.numpy()
         mean = np.mean(a)
         median = np.median(a)
@@ -277,6 +288,28 @@ def test_randn(self):
         b = b.numpy()
         self.assertTrue(np.array_equal(a, b))
 
+        # Tests with float32
+        ht.random.seed(54321)
+        a = ht.random.randn(30, 30, 30, dtype=ht.float32, split=2, comm=ht.MPI_WORLD)
+        self.assertEqual(a.dtype, ht.float32)
+        self.assertEqual(a._DNDarray__array[0, 0, 0].dtype, torch.float32)
+        a = a.numpy()
+        self.assertEqual(a.dtype, np.float32)
+        mean = np.mean(a)
+        median = np.median(a)
+        std = np.std(a)
+        self.assertTrue(-0.01 < mean < 0.01)
+        self.assertTrue(-0.01 < median < 0.01)
+        self.assertTrue(0.99 < std < 1.01)
+
+        ht.random.set_state(('Threefry', 54321, 0x10000000000000000))
+        b = ht.random.randn(30, 30, 30, dtype=ht.float32, split=2, comm=ht.MPI_WORLD).numpy()
+        self.assertTrue(np.array_equal(a, b))
+
+        c = ht.random.randn(30, 30, 30, dtype=ht.float32, split=2, comm=ht.MPI_WORLD).numpy()
+        self.assertFalse(np.array_equal(a, c))
+        self.assertFalse(np.array_equal(b, c))
+
     def test_set_state(self):
         ht.random.set_state(('Threefry', 12345, 0xfff))
         self.assertEqual(ht.random.get_state(), ('Threefry', 12345, 0xfff, 0, 0.0))

From 9e941129e53947378d5daece5388ab9abc56e18e Mon Sep 17 00:00:00 2001
From: simon <ubwdr@student.kit.edu>
Date: Mon, 9 Sep 2019 14:16:36 +0200
Subject: [PATCH 21/24] added one more test for wrong type input

---
 heat/core/tests/test_random.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/heat/core/tests/test_random.py b/heat/core/tests/test_random.py
index 359b9d6241..c4c772cf49 100644
--- a/heat/core/tests/test_random.py
+++ b/heat/core/tests/test_random.py
@@ -107,6 +107,8 @@ def test_rand(self):
             ht.random.randn(0xffffffffffffffff * 2 + 1, comm=ht.MPI_WORLD)
         with self.assertRaises(ValueError):
             ht.random.rand(3, 2, -2, 5, split=1, comm=ht.MPI_WORLD)
+        with self.assertRaises(ValueError):
+            ht.random.randn(12, 43, dtype=ht.int32, split=0, comm=ht.MPI_WORLD)
 
         # 32 Bit tests
         ht.random.seed(9876)

From a106e4697d8d74821315541e3695538bb932bab1 Mon Sep 17 00:00:00 2001
From: simon <ubwdr@student.kit.edu>
Date: Mon, 9 Sep 2019 15:24:43 +0200
Subject: [PATCH 22/24] trying to fix threefry with 32 bit

---
 heat/core/random.py            | 95 +++++++++++++++++++---------------
 heat/core/tests/test_random.py | 13 ++++-
 2 files changed, 65 insertions(+), 43 deletions(-)

diff --git a/heat/core/random.py b/heat/core/random.py
index fa0eb4fbb3..d7eb154f7d 100644
--- a/heat/core/random.py
+++ b/heat/core/random.py
@@ -277,8 +277,13 @@ def rand(*args, dtype=types.float64, split=None, device=None, comm=None):
 
     # generate the random sequence
     if dtype == types.float32:
-        x_0, x_1, lshape, lslice = __counter_sequence(shape, torch.int32, split, device, comm)
+        x_0, x_1, lshape, lslice = __counter_sequence(shape, torch.int64, split, device, comm)
         x_0, x_1 = __threefry32(x_0, x_1)
+        mask = 0x7fffffff
+        x_0 &= mask
+        x_1 &= mask
+        x_0 = torch.tensor(x_0, dtype=torch.int32)
+        x_1 = torch.tensor(x_1, dtype=torch.int32)
 
         # combine the values into one tensor and convert them to floats
         values = __int32_to_float32(torch.stack([x_0, x_1], dim=1).flatten()[lslice]).reshape(lshape)
@@ -490,47 +495,53 @@ def __threefry32(X_0, X_1):
         Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis,
         p. 16, 2011
     """
-    samples = len(X_0)
-
-    # Seed is > 32 bit
-    seed_32 = __seed & 0xffffffff
-
-    # set up key buffer
-    ks_0 = torch.full((samples,), seed_32, dtype=torch.int32)
-    ks_1 = torch.full((samples,), seed_32, dtype=torch.int32)
-    ks_2 = torch.full((samples,), 466688986, dtype=torch.int32)
-    ks_2 ^= ks_0
-    ks_2 ^= ks_0
-
-    # initialize output using the key
-    X_0 += ks_0
-    X_1 += ks_1
-
-    # perform rounds
-    X_0 += X_1; X_1 = (X_1 << 13) | (X_1 >> 19); X_1 ^= X_0  # round 1
-    X_0 += X_1; X_1 = (X_1 << 15) | (X_1 >> 17); X_1 ^= X_0  # round 2
-    X_0 += X_1; X_1 = (X_1 << 26) | (X_1 >>  6); X_1 ^= X_0  # round 3
-    X_0 += X_1; X_1 = (X_1 << 6)  | (X_1 >> 26); X_1 ^= X_0  # round 4
-
-    # inject key
-    X_0 += ks_1; X_1 += (ks_2 + 1)
-
-    X_0 += X_1; X_1 = (X_1 << 17) | (X_1 >> 15); X_1 ^= X_0  # round 5
-    X_0 += X_1; X_1 = (X_1 << 29) | (X_1 >>  3); X_1 ^= X_0  # round 6
-    X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 16); X_1 ^= X_0  # round 7
-    X_0 += X_1; X_1 = (X_1 << 24) | (X_1 >>  8); X_1 ^= X_0  # round 8
-
-    # inject key
-    X_0 += ks_2; X_1 += (ks_0 + 2)
-
-    X_0 += X_1; X_1 = (X_1 << 13) | (X_1 >> 19); X_1 ^= X_0  # round 9
-    X_0 += X_1; X_1 = (X_1 << 15) | (X_1 >> 17); X_1 ^= X_0  # round 10
-    X_0 += X_1; X_1 = (X_1 << 26) | (X_1 >>  6); X_1 ^= X_0  # round 11
-    X_0 += X_1; X_1 = (X_1 <<  6) | (X_1 >> 26); X_1 ^= X_0  # round 12
-
-    # inject key
-    X_0 += ks_0; X_1 += (ks_1 + 3)
-
+    # samples = len(X_0)
+    #
+    # # Seed is > 32 bit
+    # seed_32 = __seed & 0xffffffff
+    #
+    # # set up key buffer
+    # ks_0 = torch.full((samples,), seed_32, dtype=torch.int32)
+    # ks_1 = torch.full((samples,), seed_32, dtype=torch.int32)
+    # ks_2 = torch.full((samples,), 466688986, dtype=torch.int32)
+    # ks_2 ^= ks_0
+    # ks_2 ^= ks_0
+    #
+    # # initialize output using the key
+    # X_0 += ks_0
+    # X_1 += ks_1
+    #
+    # # perform rounds
+    # X_0 += X_1; X_1 = (X_1 << 13) | (X_1 >> 19); X_1 ^= X_0  # round 1
+    # X_0 += X_1; X_1 = (X_1 << 15) | (X_1 >> 17); X_1 ^= X_0  # round 2
+    # X_0 += X_1; X_1 = (X_1 << 26) | (X_1 >>  6); X_1 ^= X_0  # round 3
+    # X_0 += X_1; X_1 = (X_1 << 6)  | (X_1 >> 26); X_1 ^= X_0  # round 4
+    #
+    # # inject key
+    # X_0 += ks_1; X_1 += (ks_2 + 1)
+    #
+    # X_0 += X_1; X_1 = (X_1 << 17) | (X_1 >> 15); X_1 ^= X_0  # round 5
+    # X_0 += X_1; X_1 = (X_1 << 29) | (X_1 >>  3); X_1 ^= X_0  # round 6
+    # X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 16); X_1 ^= X_0  # round 7
+    # X_0 += X_1; X_1 = (X_1 << 24) | (X_1 >>  8); X_1 ^= X_0  # round 8
+    #
+    # # inject key
+    # X_0 += ks_2; X_1 += (ks_0 + 2)
+    #
+    # X_0 += X_1; X_1 = (X_1 << 13) | (X_1 >> 19); X_1 ^= X_0  # round 9
+    # X_0 += X_1; X_1 = (X_1 << 15) | (X_1 >> 17); X_1 ^= X_0  # round 10
+    # X_0 += X_1; X_1 = (X_1 << 26) | (X_1 >>  6); X_1 ^= X_0  # round 11
+    # X_0 += X_1; X_1 = (X_1 <<  6) | (X_1 >> 26); X_1 ^= X_0  # round 12
+    #
+    # # inject key
+    # X_0 += ks_0; X_1 += (ks_1 + 3)
+
+    X_0, X_1 = __threefry64(X_0=X_0, X_1=X_1)
+    mask = 0xffffffff
+    X_0 &= mask
+    X_1 &= mask
+    X_0 = torch.tensor(X_0, dtype=torch.int32)
+    X_1 = torch.tensor(X_1, dtype=torch.int32)
     return X_0, X_1
 
 
diff --git a/heat/core/tests/test_random.py b/heat/core/tests/test_random.py
index c4c772cf49..eb1ff2ef2a 100644
--- a/heat/core/tests/test_random.py
+++ b/heat/core/tests/test_random.py
@@ -132,7 +132,8 @@ def test_rand(self):
 
         _, counts = np.unique(c, return_counts=True)
         # Values somehow repeat quite often (bad key or shifts?)
-        # self.assertTrue((counts == 1).all())  # TODO fails
+        print('len', len(counts[np.where(counts!=1)]))
+        self.assertTrue((counts == 1).all())  # TODO fails
 
         # Values should be spread evenly across the range [0, 1)
         mean = np.mean(c)
@@ -155,6 +156,16 @@ def test_rand(self):
         self.assertFalse(np.array_equal(a, c))
         self.assertFalse(np.array_equal(b, c))
 
+    def test_exp(self):
+        a = ht.random.rand(1000000, dtype=ht.float32)
+        a = a.numpy()
+        # import matplotlib.pyplot as plt
+        # plt.hist(a, bins=100000)
+        # plt.show()
+        _, counts = np.unique(a, return_counts=True)
+        print('counts', len(counts[np.where(counts!=1)]))
+        self.fail()
+
     def test_randint(self):
         # Checked that the random values are in the correct range
         a = ht.random.randint(low=0, high=10, size=(10, 10))

From ca8f64a6e8ba45662eb345bcef0a9adf80248a26 Mon Sep 17 00:00:00 2001
From: simon <ubwdr@student.kit.edu>
Date: Thu, 12 Sep 2019 10:51:33 +0200
Subject: [PATCH 23/24] threefry32 is now done

---
 heat/core/random.py            | 95 +++++++++++++++-------------------
 heat/core/tests/test_random.py | 15 ------
 2 files changed, 42 insertions(+), 68 deletions(-)

diff --git a/heat/core/random.py b/heat/core/random.py
index d7eb154f7d..844e73c021 100644
--- a/heat/core/random.py
+++ b/heat/core/random.py
@@ -277,13 +277,8 @@ def rand(*args, dtype=types.float64, split=None, device=None, comm=None):
 
     # generate the random sequence
     if dtype == types.float32:
-        x_0, x_1, lshape, lslice = __counter_sequence(shape, torch.int64, split, device, comm)
+        x_0, x_1, lshape, lslice = __counter_sequence(shape, torch.int32, split, device, comm)
         x_0, x_1 = __threefry32(x_0, x_1)
-        mask = 0x7fffffff
-        x_0 &= mask
-        x_1 &= mask
-        x_0 = torch.tensor(x_0, dtype=torch.int32)
-        x_1 = torch.tensor(x_1, dtype=torch.int32)
 
         # combine the values into one tensor and convert them to floats
         values = __int32_to_float32(torch.stack([x_0, x_1], dim=1).flatten()[lslice]).reshape(lshape)
@@ -495,53 +490,47 @@ def __threefry32(X_0, X_1):
         Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis,
         p. 16, 2011
     """
-    # samples = len(X_0)
-    #
-    # # Seed is > 32 bit
-    # seed_32 = __seed & 0xffffffff
-    #
-    # # set up key buffer
-    # ks_0 = torch.full((samples,), seed_32, dtype=torch.int32)
-    # ks_1 = torch.full((samples,), seed_32, dtype=torch.int32)
-    # ks_2 = torch.full((samples,), 466688986, dtype=torch.int32)
-    # ks_2 ^= ks_0
-    # ks_2 ^= ks_0
-    #
-    # # initialize output using the key
-    # X_0 += ks_0
-    # X_1 += ks_1
-    #
-    # # perform rounds
-    # X_0 += X_1; X_1 = (X_1 << 13) | (X_1 >> 19); X_1 ^= X_0  # round 1
-    # X_0 += X_1; X_1 = (X_1 << 15) | (X_1 >> 17); X_1 ^= X_0  # round 2
-    # X_0 += X_1; X_1 = (X_1 << 26) | (X_1 >>  6); X_1 ^= X_0  # round 3
-    # X_0 += X_1; X_1 = (X_1 << 6)  | (X_1 >> 26); X_1 ^= X_0  # round 4
-    #
-    # # inject key
-    # X_0 += ks_1; X_1 += (ks_2 + 1)
-    #
-    # X_0 += X_1; X_1 = (X_1 << 17) | (X_1 >> 15); X_1 ^= X_0  # round 5
-    # X_0 += X_1; X_1 = (X_1 << 29) | (X_1 >>  3); X_1 ^= X_0  # round 6
-    # X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 16); X_1 ^= X_0  # round 7
-    # X_0 += X_1; X_1 = (X_1 << 24) | (X_1 >>  8); X_1 ^= X_0  # round 8
-    #
-    # # inject key
-    # X_0 += ks_2; X_1 += (ks_0 + 2)
-    #
-    # X_0 += X_1; X_1 = (X_1 << 13) | (X_1 >> 19); X_1 ^= X_0  # round 9
-    # X_0 += X_1; X_1 = (X_1 << 15) | (X_1 >> 17); X_1 ^= X_0  # round 10
-    # X_0 += X_1; X_1 = (X_1 << 26) | (X_1 >>  6); X_1 ^= X_0  # round 11
-    # X_0 += X_1; X_1 = (X_1 <<  6) | (X_1 >> 26); X_1 ^= X_0  # round 12
-    #
-    # # inject key
-    # X_0 += ks_0; X_1 += (ks_1 + 3)
-
-    X_0, X_1 = __threefry64(X_0=X_0, X_1=X_1)
-    mask = 0xffffffff
-    X_0 &= mask
-    X_1 &= mask
-    X_0 = torch.tensor(X_0, dtype=torch.int32)
-    X_1 = torch.tensor(X_1, dtype=torch.int32)
+    samples = len(X_0)
+
+    # Seed is > 32 bit
+    seed_32 = __seed & 0x7fffffff
+
+    # set up key buffer
+    ks_0 = torch.full((samples,), seed_32, dtype=torch.int32)
+    ks_1 = torch.full((samples,), seed_32, dtype=torch.int32)
+    ks_2 = torch.full((samples,), 466688986, dtype=torch.int32)
+    ks_2 ^= ks_0
+    ks_2 ^= ks_0
+
+    # initialize output using the key
+    X_0 += ks_0
+    X_1 += ks_1
+
+    # perform rounds
+    X_0 += X_1; X_1 = (X_1 << 13) | (X_1 >> 19); X_1 ^= X_0  # round 1
+    X_0 += X_1; X_1 = (X_1 << 15) | (X_1 >> 17); X_1 ^= X_0  # round 2
+    X_0 += X_1; X_1 = (X_1 << 26) | (X_1 >>  6); X_1 ^= X_0  # round 3
+    X_0 += X_1; X_1 = (X_1 << 6)  | (X_1 >> 26); X_1 ^= X_0  # round 4
+
+    # inject key
+    X_0 += ks_1; X_1 += (ks_2 + 1)
+
+    X_0 += X_1; X_1 = (X_1 << 17) | (X_1 >> 15); X_1 ^= X_0  # round 5
+    X_0 += X_1; X_1 = (X_1 << 29) | (X_1 >>  3); X_1 ^= X_0  # round 6
+    X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 16); X_1 ^= X_0  # round 7
+    X_0 += X_1; X_1 = (X_1 << 24) | (X_1 >>  8); X_1 ^= X_0  # round 8
+
+    # inject key
+    X_0 += ks_2; X_1 += (ks_0 + 2)
+
+    X_0 += X_1; X_1 = (X_1 << 13) | (X_1 >> 19); X_1 ^= X_0  # round 9
+    X_0 += X_1; X_1 = (X_1 << 15) | (X_1 >> 17); X_1 ^= X_0  # round 10
+    X_0 += X_1; X_1 = (X_1 << 26) | (X_1 >>  6); X_1 ^= X_0  # round 11
+    X_0 += X_1; X_1 = (X_1 <<  6) | (X_1 >> 26); X_1 ^= X_0  # round 12
+
+    # inject key
+    X_0 += ks_0; X_1 += (ks_1 + 3)
+
     return X_0, X_1
 
 
diff --git a/heat/core/tests/test_random.py b/heat/core/tests/test_random.py
index eb1ff2ef2a..7dd276a686 100644
--- a/heat/core/tests/test_random.py
+++ b/heat/core/tests/test_random.py
@@ -130,11 +130,6 @@ def test_rand(self):
         b = b.numpy().flatten()
         c = np.concatenate((a, b))
 
-        _, counts = np.unique(c, return_counts=True)
-        # Values somehow repeat quite often (bad key or shifts?)
-        print('len', len(counts[np.where(counts!=1)]))
-        self.assertTrue((counts == 1).all())  # TODO fails
-
         # Values should be spread evenly across the range [0, 1)
         mean = np.mean(c)
         median = np.median(c)
@@ -156,16 +151,6 @@ def test_rand(self):
         self.assertFalse(np.array_equal(a, c))
         self.assertFalse(np.array_equal(b, c))
 
-    def test_exp(self):
-        a = ht.random.rand(1000000, dtype=ht.float32)
-        a = a.numpy()
-        # import matplotlib.pyplot as plt
-        # plt.hist(a, bins=100000)
-        # plt.show()
-        _, counts = np.unique(a, return_counts=True)
-        print('counts', len(counts[np.where(counts!=1)]))
-        self.fail()
-
     def test_randint(self):
         # Checked that the random values are in the correct range
         a = ht.random.randint(low=0, high=10, size=(10, 10))

From e774c95acaa03254d1506dffb45a530195e3f53d Mon Sep 17 00:00:00 2001
From: simon <ubwdr@student.kit.edu>
Date: Thu, 12 Sep 2019 12:51:22 +0200
Subject: [PATCH 24/24] set rounds of the threefry algorithm to 8 for both
 implementations

---
 heat/core/random.py | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/heat/core/random.py b/heat/core/random.py
index 844e73c021..b70cce7f55 100644
--- a/heat/core/random.py
+++ b/heat/core/random.py
@@ -521,12 +521,12 @@ def __threefry32(X_0, X_1):
     X_0 += X_1; X_1 = (X_1 << 24) | (X_1 >>  8); X_1 ^= X_0  # round 8
 
     # inject key
-    X_0 += ks_2; X_1 += (ks_0 + 2)
-
-    X_0 += X_1; X_1 = (X_1 << 13) | (X_1 >> 19); X_1 ^= X_0  # round 9
-    X_0 += X_1; X_1 = (X_1 << 15) | (X_1 >> 17); X_1 ^= X_0  # round 10
-    X_0 += X_1; X_1 = (X_1 << 26) | (X_1 >>  6); X_1 ^= X_0  # round 11
-    X_0 += X_1; X_1 = (X_1 <<  6) | (X_1 >> 26); X_1 ^= X_0  # round 12
+    # X_0 += ks_2; X_1 += (ks_0 + 2)
+    #
+    # X_0 += X_1; X_1 = (X_1 << 13) | (X_1 >> 19); X_1 ^= X_0  # round 9
+    # X_0 += X_1; X_1 = (X_1 << 15) | (X_1 >> 17); X_1 ^= X_0  # round 10
+    # X_0 += X_1; X_1 = (X_1 << 26) | (X_1 >>  6); X_1 ^= X_0  # round 11
+    # X_0 += X_1; X_1 = (X_1 <<  6) | (X_1 >> 26); X_1 ^= X_0  # round 12
 
     # inject key
     X_0 += ks_0; X_1 += (ks_1 + 3)
@@ -580,13 +580,10 @@ def __threefry64(X_0, X_1):
 
     X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 48); X_1 ^= X_0  # round 5
     X_0 += X_1; X_1 = (X_1 << 32) | (X_1 >> 32); X_1 ^= X_0  # round 6
+    X_0 += X_1; X_1 = (X_1 << 24) | (X_1 >> 40); X_1 ^= X_0  # round 7
+    X_0 += X_1; X_1 = (X_1 << 21) | (X_1 >> 43); X_1 ^= X_0  # round 8
 
-    # With half of the iterations the "randomness" is already achieved and computation time is halved
-
-    # X_0 += X_1; X_1 = (X_1 << 24) | (X_1 >> 40); X_1 ^= X_0  # round 7
-    # X_0 += X_1; X_1 = (X_1 << 21) | (X_1 >> 43); X_1 ^= X_0  # round 8
-    #
-    # # inject key
+    # inject key
     # X_0 += ks_2; X_1 += (ks_0 + 2)
     #
     # X_0 += X_1; X_1 = (X_1 << 16) | (X_1 >> 48); X_1 ^= X_0  # round 9