diff --git a/changelog.md b/changelog.md
index 41e426b50..874da9fe0 100644
--- a/changelog.md
+++ b/changelog.md
@@ -1,3 +1,30 @@
+Arraymancer v0.3.0
+==========================
+
+I am very excited to announce the second release of Arraymancer which includes numerous improvements and breaking changes.
+WARNING: Deprecated proc will be removed in a new release in a week due to deprecated spam.
+
+Note:
+- zeros, ones, newTensor
+
+- **Very** Breaking
+  - Tensors uses reference semantics now: `let a = b` will share data by default and copies must be made explicitly.
+    - There is no need to use `unsafe` proc to avoid copies especially for slices.
+    - Unsafe procs are deprecated and will be removed leading to a smaller and simpler codebase and API/documentation.
+    - Tensors and CudaTensors now works the same way.
+    - Use `clone` to do copies.
+    - Arraymancer now works like Numpy and Julia, making it easier to port code.
+    - Unfortunately it makes it harder to debug unexpected data sharing.
+
+- Deprecated
+  - Version 0.3.1 with the ALL deprecated proc removed will be released in a week. Due to issue https://github.com/nim-lang/Nim/issues/6436,
+    even using non-deprecated proc like `zeros`, `ones`, `newTensor` you will get a deprecated warning.
+  - `newTensor`, `zeros`, `ones` arguments have been changed from `zeros([5, 5], int)` to `zeros[int]([5, 5])`
+  - All `unsafe` proc are now default and deprecated.
+
+
+- Cuda:
+  - Support for convolution forward and backward
 
 
 Arraymancer v0.2.0 Sept. 24, 2017 "The Color of Magic"
diff --git a/src/autograd/ag_accessors.nim b/src/autograd/ag_accessors.nim
index ddc0aaa4e..52c5e7467 100644
--- a/src/autograd/ag_accessors.nim
+++ b/src/autograd/ag_accessors.nim
@@ -21,8 +21,8 @@ template `[]`*[TT](v: Variable[TT], args: varargs[untyped]): Variable[TT] =
 
   result.tape = v.tape
   result.ancestor = v.ancestor
-  result.value = v.value.unsafeSlice(args)
-  result.grad = v.grad.unsafeSlice(args)
+  result.value = v.value[args]
+  result.grad = v.grad[args]
 
   result
 
diff --git a/src/autograd/gates_blas.nim b/src/autograd/gates_blas.nim
index c86241e84..97936630f 100644
--- a/src/autograd/gates_blas.nim
+++ b/src/autograd/gates_blas.nim
@@ -29,8 +29,8 @@ method forward*[TT](self: MatMulGate[TT], a, b: Variable[TT]): Variable[TT] {.in
   result.grad = zeros[getSubType(TT)](result.value.shape)
 
 method backward*[TT](self: MatMulGate[TT], gradient: TT): SmallDiffs[TT] {.noInit, inline, locks:0.}=
-  result[0] = gradient * self.b.value.unsafeTranspose
-  result[1] = self.a.value.unsafeTranspose * gradient
+  result[0] = gradient * self.b.value.transpose
+  result[1] = self.a.value.transpose * gradient
 
 proc `*`*[TT](a, b: Variable[TT]): Variable[TT] =
   when compileOption("boundChecks"):
diff --git a/src/autograd/gates_reduce.nim b/src/autograd/gates_reduce.nim
index f28290de4..36d3fcb22 100644
--- a/src/autograd/gates_reduce.nim
+++ b/src/autograd/gates_reduce.nim
@@ -34,7 +34,7 @@ method backward*[TT](self: MeanGate[TT], gradient: TT): SmallDiffs[TT] {.noInit,
   result[0] = gradient / getSubType(TT)(self.a_shape.product) # Conversion to subtype T, oh Higher kinded-types ...
 
   let z_shape = newSeqWith(self.a_shape.len, 1) # We create a shape of 1 dimension that we will expand with broadcast
-  result[0] = result[0].unsafeReshape(z_shape).unsafeBroadcast(self.a_shape)
+  result[0] = result[0].reshape(z_shape).broadcast(self.a_shape)
 
 proc mean*[TT](a: Variable[TT]): Variable[TT] =
   when compileOption("boundChecks"):
diff --git a/src/nn/activation/relu.nim b/src/nn/activation/relu.nim
index c074ae2e3..a8f804070 100644
--- a/src/nn/activation/relu.nim
+++ b/src/nn/activation/relu.nim
@@ -54,4 +54,4 @@ proc relu*[TT](a: Variable[TT]): Variable[TT] =
   node.child = result
 
   # Caching for backprop
-  gate.cache = result.value.unsafeView
\ No newline at end of file
+  gate.cache = result.value
\ No newline at end of file
diff --git a/src/nn/layers/linear.nim b/src/nn/layers/linear.nim
index a56dcce0d..ff14aee2a 100644
--- a/src/nn/layers/linear.nim
+++ b/src/nn/layers/linear.nim
@@ -33,8 +33,8 @@ method forward*[TT](self: LinearGate[TT], a: Variable[TT]): Variable[TT] {.inlin
   result.grad = zeros_like(result.value)
 
 method backward*[TT](self: LinearGate[TT], gradient: TT): SmallDiffs[TT] {.noInit, inline, locks:0.}=
-  result[0] = self.W.value.unsafeTranspose * gradient # grad w.r.t. x
-  result[1] = gradient * self.x.value.unsafeTranspose # grad w.r.t. weight
+  result[0] = self.W.value.transpose * gradient # grad w.r.t. x
+  result[1] = gradient * self.x.value.transpose # grad w.r.t. weight
 
   if not self.b.isNil:
     result[2] = sum(gradient, axis=0) # grad w.r.t. bias
diff --git a/src/nn/loss/sigmoid_cross_entropy.nim b/src/nn/loss/sigmoid_cross_entropy.nim
index dc6123d43..5c3a1b5db 100644
--- a/src/nn/loss/sigmoid_cross_entropy.nim
+++ b/src/nn/loss/sigmoid_cross_entropy.nim
@@ -30,7 +30,7 @@ method forward*[TT](self: SigmoidCrossEntropyLoss[TT], a: Variable[TT], target:
   result.tape = a.tape
 
   # TODO: implement a Scalar[T] concept instead of rewrapping the result into a Tensor
-  result.value = [sigmoid_cross_entropy(a.value, target)].toTensor.unsafeView
+  result.value = [sigmoid_cross_entropy(a.value, target)].toTensor
 
   result.grad = zeros[getSubType(TT)](1)
 
@@ -44,7 +44,7 @@ proc sigmoid_cross_entropy*[TT](a: Variable[TT], target: TT): Variable[TT] =
   new gate
   gate.arity = 1
   gate.cache = a
-  gate.target = target.unsafeView
+  gate.target = target
 
   # Node
   var node: Node[TT]
diff --git a/src/nn_primitives/backend/nnpack_interface.nim b/src/nn_primitives/backend/nnpack_interface.nim
index 209d42827..1fd8fdf08 100644
--- a/src/nn_primitives/backend/nnpack_interface.nim
+++ b/src/nn_primitives/backend/nnpack_interface.nim
@@ -26,8 +26,8 @@ proc nnpack_conv2d*(input, weight, bias: Tensor[float32], padding, stride: Size2
     output_width = (2*padding.width + input.nchw_width) - (weight.nchw_width - 1)
 
   # Make sure the data is contiguous before passing to nnpack
-  let input = input.unsafeContiguous()
-  let weight = weight.unsafeContiguous()
+  let input = input.asContiguous()
+  let weight = weight.asContiguous()
   var bias_nonnil: Tensor[float32] # TODO make bias truly optional and not just a tensor of rank 0
 
 
@@ -36,7 +36,7 @@ proc nnpack_conv2d*(input, weight, bias: Tensor[float32], padding, stride: Size2
     # Temporary bias filled with zeros just to pass to nnpack
     bias_nonnil = zeros[float32](output_channels)
   else:
-    bias_nonnil = bias.unsafeContiguous()
+    bias_nonnil = bias.asContiguous()
 
   # Prepare tensor that the result will be stored on
   result = newTensorUninit[float32](input.shape[0], output_channels, output_height, output_width)
diff --git a/src/nn_primitives/fallback/conv.nim b/src/nn_primitives/fallback/conv.nim
index ec657b015..8a558f5d3 100644
--- a/src/nn_primitives/fallback/conv.nim
+++ b/src/nn_primitives/fallback/conv.nim
@@ -91,19 +91,20 @@ proc im2colgemm_conv2d*[T](input, kernel, bias: Tensor[T],
     output_height = (input.nchw_height + (2*padding.height) - kernel.nchw_height) div stride.height + 1
     output_width = (input.nchw_width + (2*padding.width) - kernel.nchw_width) div stride.width + 1
     channels_col = input.nchw_channels * kernel.nchw_height * kernel.nchw_width
-    kernel_col = kernel.unsafeReshape(output_channels, channels_col)
+    kernel_col = kernel.reshape(output_channels, channels_col)
 
   result = newTensorUninit[T](batch_size, output_channels, output_height, output_width)
   var input_col = newTensorUninit[T](channels_col, output_height * output_width)
   var output: Tensor[T]
 
-  for i in 0..<batch_size:
-    im2col(input.unsafeAtAxisIndex(0, i).unsafeSqueeze(0), kernel_size, padding, stride, input_col)
-    output = result.unsafeAtAxisIndex(0, i).unsafeReshape(kernel_col.shape[0], input_col.shape[1])
+  for i in 0..<batch_size: #TODO: batch matmul
+    im2col(input.atAxisIndex(0, i).squeeze(0), kernel_size, padding, stride, input_col)
+    # The following must be done without copy: GEMM will directly write in the result tensor
+    output = result.atAxisIndex(0, i).reshape(kernel_col.shape[0], input_col.shape[1])
     gemm(kernel_col, input_col, output)
 
   if bias.rank > 0:
-    result .+= bias.unsafeUnsqueeze(0)
+    result .+= bias.unsqueeze(0)
 
 proc im2colgemm_conv2d_gradient*[T](input, kernel: Tensor[T],
                          padding: Size2D = (0,0),
@@ -119,7 +120,7 @@ proc im2colgemm_conv2d_gradient*[T](input, kernel: Tensor[T],
     output_width = (input.nchw_width + (2*padding.width) - kernel.nchw_width) div stride.width + 1
     output_flatten_size = output_height*output_width
     channels_col = input.nchw_channels * kernel_size.height * kernel_size.width
-    kernel_col = kernel.unsafeReshape(output_channels, input.nchw_channels*kernel.nchw_height*kernel.nchw_width)
+    kernel_col = kernel.reshape(output_channels, input.nchw_channels*kernel.nchw_height*kernel.nchw_width)
 
   # Check if grad output shape looks correct
   assert grad_output.nchw_width == output_width and grad_output.nchw_height == output_height
@@ -132,9 +133,9 @@ proc im2colgemm_conv2d_gradient*[T](input, kernel: Tensor[T],
 
   for i in 0..<batch_size:
     let
-      grad_output_col = grad_output.unsafeAtAxisIndex(0, i).unsafeReshape(output_channels, output_flatten_size)
-      grad_input_col = kernel_col.unsafeTranspose() * grad_output_col
+      grad_output_col = grad_output.atAxisIndex(0, i).reshape(output_channels, output_flatten_size)
+      grad_input_col = kernel_col.transpose() * grad_output_col
 
-    im2col(input.unsafeAtAxisIndex(0, i).unsafeSqueeze(0), kernel_size, padding, stride, input_col)
-    grad_input[i, _, _, _] = col2im(grad_input_col, input.nchw_channels, input.nchw_height, input.nchw_width, kernel_size, padding, stride).unsafeUnsqueeze(0)
-    grad_weight += (grad_output_col * input_col.unsafeTranspose()).unsafeReshape(grad_weight.shape)
+    im2col(input.atAxisIndex(0, i).squeeze(0), kernel_size, padding, stride, input_col)
+    grad_input[i, _, _, _] = col2im(grad_input_col, input.nchw_channels, input.nchw_height, input.nchw_width, kernel_size, padding, stride).unsqueeze(0)
+    grad_weight += (grad_output_col * input_col.transpose()).reshape(grad_weight.shape)
diff --git a/src/nn_primitives/nnp_conv2d_cudnn.nim b/src/nn_primitives/nnp_conv2d_cudnn.nim
index acaf09e3d..0338c578e 100644
--- a/src/nn_primitives/nnp_conv2d_cudnn.nim
+++ b/src/nn_primitives/nnp_conv2d_cudnn.nim
@@ -52,19 +52,19 @@ proc conv2d*[T: SomeReal](input, kernel, bias: CudaTensor[T],
     defaultHandle_cudnn,
     addr alpha,
     srcTensorDesc,
-    input.data.data[],
+    input.get_offset_ptr,
     kernelDesc,
-    kernel.data.data[],
+    kernel.get_offset_ptr,
     convDesc,
     algo_workspace.algo,
     algo_workspace.workspace[],
     algo_workspace.sizeInBytes,
     addr beta,
     dstTensorDesc,
-    result.data.data[]
+    result.get_offset_ptr
   )
 
-  result .+= bias.unsafeUnsqueeze(0)
+  result .+= bias.unsqueeze(0)
 
 proc conv2d_backward*[T: float32](input, kernel, bias: CudaTensor[T],
                          padding: SizeHW = [0,0],
@@ -97,7 +97,7 @@ proc conv2d_backward*[T: float32](input, kernel, bias: CudaTensor[T],
 
   # CuDNN requires grad_output to be C contiguous. (It is undocumented as of CuDNN v7)
   # If grad_output is F contiguous it throws CUDNN_STATUS_NOT_SUPPORTED in the algo procs.
-  let gOutput = grad_output.unsafeContiguous(rowMajor, force = true)
+  let gOutput = grad_output.asContiguous(rowMajor, force = true)
 
   let # TODO: Automatic destructor
     srcTensorDesc =        newCudnn4DTensorDesc   input
@@ -113,15 +113,15 @@ proc conv2d_backward*[T: float32](input, kernel, bias: CudaTensor[T],
 
   # Bias gradient
   if bias.rank > 0:
-    let gradBiasTensorDesc = newCudnn4DTensorDesc grad_bias.unsafeUnsqueeze(0)
+    let gradBiasTensorDesc = newCudnn4DTensorDesc grad_bias.unsqueeze(0)
     check cudnnConvolutionBackwardBias(
       defaultHandle_cudnn,
       addr alpha,
       gradOutputTensorDesc,
-      gOutput.data.data[],
+      gOutput.get_offset_ptr,
       addr beta,
       gradBiasTensorDesc,
-      grad_bias.data.data[]
+      grad_bias.get_offset_ptr
     )
 
     # TODO squeeze and divide by batch size?
@@ -143,16 +143,16 @@ proc conv2d_backward*[T: float32](input, kernel, bias: CudaTensor[T],
     defaultHandle_cudnn,
     addr alpha,
     srcTensorDesc,
-    input.data.data[],
+    input.get_offset_ptr,
     gradOutputTensorDesc,
-    gOutput.data.data[],
+    gOutput.get_offset_ptr,
     convDesc,
     kernel_algo_workspace.algo,
     kernel_algo_workspace.workspace[],
     kernel_algo_workspace.sizeInBytes,
     addr beta,
     gradKernelDesc,
-    grad_kernel.data.data[]
+    grad_kernel.get_offset_ptr
   )
 
   when defined(debug):
@@ -176,14 +176,14 @@ proc conv2d_backward*[T: float32](input, kernel, bias: CudaTensor[T],
     defaultHandle_cudnn,
     addr alpha,
     kernelDesc,
-    kernel.data.data[],
+    kernel.get_offset_ptr,
     gradOutputTensorDesc,
-    gOutput.data.data[],
+    gOutput.get_offset_ptr,
     convDesc,
     gradInput_algo_workspace.algo,
     gradInput_algo_workspace.workspace[],
     gradInput_algo_workspace.sizeInBytes,
     addr beta,
     gradInputTensorDesc,
-    grad_input.data.data[]
+    grad_input.get_offset_ptr
   )
diff --git a/src/nn_primitives/nnp_convolution.nim b/src/nn_primitives/nnp_convolution.nim
index b7bb4a29e..f092d359d 100644
--- a/src/nn_primitives/nnp_convolution.nim
+++ b/src/nn_primitives/nnp_convolution.nim
@@ -91,7 +91,7 @@ proc conv2d_backward*[T](input, weight, bias: Tensor[T],
   # Bias gradient
   if bias.rank > 0: # TODO make bias truly optional and not just a tensor of rank 0
     # TODO: sum over many axes
-    grad_bias = grad_output.sum(3).sum(2).sum(0).unsafeReshape(bias.shape)
+    grad_bias = grad_output.sum(3).sum(2).sum(0).reshape(bias.shape)
 
   case algorithm:
     of NNPackAuto:
diff --git a/src/nn_primitives/nnp_linear.nim b/src/nn_primitives/nnp_linear.nim
index 614fd00fa..f3a94faaf 100644
--- a/src/nn_primitives/nnp_linear.nim
+++ b/src/nn_primitives/nnp_linear.nim
@@ -34,8 +34,8 @@ proc linear_backward*[T](
         cached_tensor,
         weight, bias: Tensor[T],
         dW, db: var Tensor[T]): Tensor[T] {.inline.} =
-  result = weight.unsafeTranspose * gradient
-  gemm(gradient, cached_tensor.unsafeTranspose, dW)
+  result = weight.transpose * gradient
+  gemm(gradient, cached_tensor.transpose, dW)
 
   db = sum(gradient, axis=0) # https://mlxai.github.io/2017/01/10/a-modular-approach-to-implementing-fully-connected-neural-networks.html
 
@@ -44,6 +44,6 @@ proc linear_backward*[T](
         cached_tensor,
         weight: Tensor[T],
         dW: var Tensor[T]): Tensor[T] {.inline.} =
-  result = weight.unsafeTranspose * gradient
-  gemm(gradient, cached_tensor.unsafeTranspose, dW)
+  result = weight.transpose * gradient
+  gemm(gradient, cached_tensor.transpose, dW)
 
diff --git a/src/nn_primitives/nnp_softmax_cross_entropy.nim b/src/nn_primitives/nnp_softmax_cross_entropy.nim
index 39e7ecd11..c1ec458d3 100644
--- a/src/nn_primitives/nnp_softmax_cross_entropy.nim
+++ b/src/nn_primitives/nnp_softmax_cross_entropy.nim
@@ -97,12 +97,12 @@ proc sparse_softmax_cross_entropy*[T](input: Tensor[T], target: Tensor[int]): T
   # ∑i(- ti * yi) is either -yi or 0 in the sparse case.
   # Since target holds coordinates: ∑i(- ti * yi) = - yi[ti]
   for i in 0||(input.shape[1]-1):
-    let lse = input.unsafeSlice(_,i).logsumexp
+    let lse = input[_,i].logsumexp
 
     when not declared(openmp):
-      result += lse - input.unsafeSlice(target.unsafeSlice(i), i)
+      result += lse - input[target[i], i]
     else:
-      let tmp = lse - input.unsafeSlice(target.unsafeSlice(i), i)
+      let tmp = lse - input[target[i], i]
       {.emit:"#pragma omp atomic".}
       {.emit:"`result` += `tmp`;".}
 
@@ -140,7 +140,7 @@ proc softmax_cross_entropy_backward*[T](
   elif gradient is Tensor:
     let grad = gradient.data[gradient.offset]
 
-  let axis_max_sumexp = cached_tensor.streaming_max_sumexp(axis = 1).unsafeBroadcast(cached_tensor.shape)
+  let axis_max_sumexp = cached_tensor.streaming_max_sumexp(axis = 1).broadcast(cached_tensor.shape)
 
   result = map3_inline(cached_tensor, target, axis_max_sumexp):
       grad * (stable_softmax(x, z.max, z.sumexp) - y) / T(batch_size)
@@ -176,8 +176,8 @@ proc sparse_softmax_cross_entropy_backward*[T](
   for i, truth_idx in enumerate(target):
     result[truth_idx, i] = -1
 
-  let axis_max_sumexp = cached_tensor.streaming_max_sumexp(axis = 1).unsafeBroadcast(cached_tensor.shape)
-  # let axis_max_sumexp = cached_tensor.classic_max_sumexp(axis = 1).unsafeBroadcast(cached_tensor.shape)
+  let axis_max_sumexp = cached_tensor.streaming_max_sumexp(axis = 1).broadcast(cached_tensor.shape)
+  # let axis_max_sumexp = cached_tensor.classic_max_sumexp(axis = 1).broadcast(cached_tensor.shape)
 
 
   apply3_inline(result, cached_tensor, axis_max_sumexp):
diff --git a/src/nn_primitives/private/p_logsumexp.nim b/src/nn_primitives/private/p_logsumexp.nim
index c9c329e5d..4a0f8f85e 100644
--- a/src/nn_primitives/private/p_logsumexp.nim
+++ b/src/nn_primitives/private/p_logsumexp.nim
@@ -45,13 +45,13 @@ proc streaming_max_sumexp*[T](t: Tensor[T], axis: int): Tensor[tuple[max:T, sume
   result = newTensorUninit[tuple[max:T, sumexp: T]](t.shape[axis])
 
   for i in `||`(0, t.shape[axis]-1, "simd"):
-    result.data[i] = t.unsafeAtAxisIndex(axis, i).streaming_max_sumexp
+    result.data[i] = t.atAxisIndex(axis, i).streaming_max_sumexp
 
   # Reexpand the tensor to be consistent with fold_axis/reduce_axis
   if axis == 0:
-    result = result.unsafeUnsqueeze(1)
+    result = result.unsqueeze(1)
   else:
-    result = result.unsafeUnsqueeze(0)
+    result = result.unsqueeze(0)
 
 
 
diff --git a/src/tensor/accessors.nim b/src/tensor/accessors.nim
index 35910fa37..ef820e504 100644
--- a/src/tensor/accessors.nim
+++ b/src/tensor/accessors.nim
@@ -26,22 +26,16 @@ proc atContiguousIndex*[T](t: var Tensor[T], idx: int): var T {.noSideEffect,inl
   ## i.e. as treat the tensor as flattened
   return t.data[t.getContiguousIndex(idx)]
 
-proc unsafeAtAxisIndex*[T](t: Tensor[T], axis, idx: int): Tensor[T] {.noInit,inline.} =
-  ## Returns a sliced tensor in the given axis index (unsafe)
+proc atAxisIndex*[T](t: Tensor[T], axis, idx: int): Tensor[T] {.noInit,inline.} =
+  ## Returns a sliced tensor in the given axis index
+
   when compileOption("boundChecks"):
     check_axis_index(t, axis, idx)
 
-  result = t.unsafeView()
+  result = t
   result.shape[axis] = 1
   result.offset += result.strides[axis]*idx
 
-proc atAxisIndex*[T](t: Tensor[T], axis, idx: int): Tensor[T] {.noInit,inline.} =
-  ## Returns a sliced tensor in the given axis index
-
-  # As contiguous is called to force a copy of the slice
-  # otherwise the result would copy the whole parent tensor data
-  t.unsafeAtAxisIndex(axis, idx).clone()
-
 iterator items*[T](t: Tensor[T]): T {.inline,noSideEffect.} =
   ## Inline iterator on Tensor values
   ##
@@ -285,8 +279,7 @@ template axisIterator[T](t: Tensor[T], axis, iter_offset, iter_size: int): untyp
   ##   - A slice along the given axis at each iteration.
   ##
   ## Note: The slice dimension is not collapsed by default.
-  ## You can use ``unsafeSqueeze`` to collapse it without copy.
-  ## In this case ``unsafeSqueeze`` is safe.
+  ## You can use ``squeeze`` to collapse it.
   ##
   ## Usage:
   ##  .. code:: nim
@@ -296,7 +289,7 @@ template axisIterator[T](t: Tensor[T], axis, iter_offset, iter_size: int): untyp
     check_axis_index(t, axis, iter_offset)
     check_axis_index(t, axis, iter_offset+iter_size-1)
 
-  var out_t = t.unsafeAtAxisIndex(axis, iter_offset)
+  var out_t = t.atAxisIndex(axis, iter_offset)
 
   for _ in 0..<iter_size:
     yield out_t
@@ -310,8 +303,7 @@ template dualAxisIterator[T, U](a: Tensor[T], b: Tensor[U], axis, iter_offset, i
   ##   - 2 slices along the given axis at each iteration.
   ##
   ## Note: The slice dimension is not collapsed by default.
-  ## You can use ``unsafeSqueeze`` to collapse it without copy.
-  ## In this case ``unsafeSqueeze`` is safe.
+  ## You can use ``squeeze`` to collapse it.
   ##
   ## Usage:
   ##  .. code:: nim
@@ -322,8 +314,8 @@ template dualAxisIterator[T, U](a: Tensor[T], b: Tensor[U], axis, iter_offset, i
     check_axis_index(b, axis, iter_offset+iter_size-1)
     assert a.shape[axis] == b.shape[axis] # TODO use a proper check
 
-  var out_a = a.unsafeAtAxisIndex(axis, iter_offset)
-  var out_b = b.unsafeAtAxisIndex(axis, iter_offset)
+  var out_a = a.atAxisIndex(axis, iter_offset)
+  var out_b = b.atAxisIndex(axis, iter_offset)
 
   for _ in 0..<iter_size:
     yield (out_a, out_b)
diff --git a/src/tensor/accessors_macros_read.nim b/src/tensor/accessors_macros_read.nim
index df5d8b6b3..1b1209433 100644
--- a/src/tensor/accessors_macros_read.nim
+++ b/src/tensor/accessors_macros_read.nim
@@ -53,23 +53,4 @@ macro `[]`*[T](t: AnyTensor[T], args: varargs[untyped]): untyped =
   let new_args = getAST(desugar(args))
 
   result = quote do:
-    inner_typed_dispatch(`t`, `new_args`)
-
-macro unsafeSlice*[T](t: Tensor[T], args: varargs[untyped]): untyped =
-  ## Slice a Tensor
-  ## Input:
-  ##   - a Tensor
-  ##   - and:
-  ##     - specific coordinates (``varargs[int]``)
-  ##     - or a slice (cf. tutorial)
-  ## Returns:
-  ##   - a value or a view of the Tensor corresponding to the slice
-  ## Warning ⚠:
-  ##   This is a no-copy operation, data is shared with the input.
-  ##   This proc does not guarantee that a ``let`` value is immutable.
-  ## Usage:
-  ##   See the ``[]`` macro
-  let new_args = getAST(desugar(args))
-
-  result = quote do:
-    unsafe_inner_typed_dispatch(`t`, `new_args`)
\ No newline at end of file
+    slice_typed_dispatch(`t`, `new_args`)
\ No newline at end of file
diff --git a/src/tensor/accessors_macros_syntax.nim b/src/tensor/accessors_macros_syntax.nim
index a67c04e8f..15ededde2 100644
--- a/src/tensor/accessors_macros_syntax.nim
+++ b/src/tensor/accessors_macros_syntax.nim
@@ -89,7 +89,7 @@ const `...`* = Ellipsis()
 
 type ArrayOfSlices* = DynamicStackArray[SteppedSlice]
 
-converter toArrayOfSlices*(s: varargs[SteppedSlice]): ArrayOfSlices {.inline.} =
+proc toArrayOfSlices*(s: varargs[SteppedSlice]): ArrayOfSlices {.inline.} =
   # boundsChecks automatically done for array indexing
   # when compileOption("boundChecks"):
   #   assert s.len <= MAXRANK
diff --git a/src/tensor/accessors_macros_write.nim b/src/tensor/accessors_macros_write.nim
index 2cbab5d73..6918985bf 100644
--- a/src/tensor/accessors_macros_write.nim
+++ b/src/tensor/accessors_macros_write.nim
@@ -49,7 +49,7 @@ macro `[]=`*[T](t: var Tensor[T], args: varargs[untyped]): untyped =
   let new_args = getAST(desugar(tmp))
 
   result = quote do:
-    inner_typed_dispatch_mut(`t`, `new_args`,`val`)
+    slice_typed_dispatch_mut(`t`, `new_args`,`val`)
 
 
 # # Linked to: https://github.com/mratsim/Arraymancer/issues/52
@@ -71,4 +71,4 @@ macro `[]=`*[T](t: var Tensor[T], args: varargs[untyped]): untyped =
 #   let new_args = getAST(desugar(args))
 
 #   result = quote do:
-#     inner_typed_dispatch_var(`t`, `new_args`)
\ No newline at end of file
+#     slice_typed_dispatch_var(`t`, `new_args`)
\ No newline at end of file
diff --git a/src/tensor/backend/cuda.nim b/src/tensor/backend/cuda.nim
index 5d4e5c5b4..c3e436ca6 100644
--- a/src/tensor/backend/cuda.nim
+++ b/src/tensor/backend/cuda.nim
@@ -32,13 +32,12 @@ proc deallocCuda*[T](p: ref[ptr T]) {.noSideEffect.}=
 
 
 # ##############################################################
-# # Base CudaSeq type
-# # End goal is for it to have value semantics like Nim seq
+# # Base CudaStorage type
 
-proc newCudaSeq*[T: SomeReal](length: int): CudaSeq[T] {.noSideEffect.}=
-  result.len = length
-  new(result.data, deallocCuda)
-  result.data[] = cast[ptr UncheckedArray[T]](cudaMalloc[T](result.len))
+proc newCudaStorage*[T: SomeReal](length: int): CudaStorage[T] {.noSideEffect.}=
+  result.Flen = length
+  new(result.Fref_tracking, deallocCuda)
+  result.Fdata = cast[ptr UncheckedArray[T]](cudaMalloc[T](result.Flen))
 
 # #########################################################
 # # Sending tensor layout to Cuda Kernel
@@ -66,7 +65,7 @@ proc newCudaSeq*[T: SomeReal](length: int): CudaSeq[T] {.noSideEffect.}=
 
 type
   # CudaLayoutArray = array[MAXRANK, cint]
-  # This will replace the current ref[ptr T] for shape and strides in the future
+  # This will replace the current ref[ptr T] for shape and strides in the future with Uinified Memory
   ## Using arrays instead of seq avoids having to indicate __restrict__ everywhere to indicate no-aliasing
   ## We also prefer stack allocated array sice the data will be used at every single loop iteration to compute elements position.
   ## Ultimately it avoids worrying about deallocation too
diff --git a/src/tensor/backend/metadataArray.nim b/src/tensor/backend/metadataArray.nim
index 39b65d4d8..147205f79 100644
--- a/src/tensor/backend/metadataArray.nim
+++ b/src/tensor/backend/metadataArray.nim
@@ -213,7 +213,7 @@ proc `==`*(a, s: DynamicStackArray): bool {.inline.} =
 
 iterator zip*[T, U](a: DynamicStackArray[T], b: DynamicStackArray[U]): (T, T)=
 
-  # unsafeReshape relies on zip stopping early
+  # reshape_no_copy relies on zip stopping early
   let len = min(a.len, b.len)
 
   for i in 0..<len:
diff --git a/src/tensor/backend/openmp.nim b/src/tensor/backend/openmp.nim
index 13f7948d4..b23079e26 100644
--- a/src/tensor/backend/openmp.nim
+++ b/src/tensor/backend/openmp.nim
@@ -86,7 +86,7 @@ template omp_parallel_reduce_blocks*[T](reduced: T, block_offset, block_size: un
           let num_blocks = min(min(size, omp_get_max_threads()), OMP_MAX_REDUCE_BLOCKS)
           if num_blocks > 1:
             withMemoryOptimHints()
-            var results{.align64.}: array[OMP_MAX_REDUCE_BLOCKS * maxItemsPerCacheLine, type(reduced)]
+            var results{.align64, noInit.}: array[OMP_MAX_REDUCE_BLOCKS * maxItemsPerCacheLine, type(reduced)]
             let bsize = size div num_blocks
 
             if bsize > 1:
diff --git a/src/tensor/comparison.nim b/src/tensor/comparison.nim
index 47d04bca5..417ea2423 100644
--- a/src/tensor/comparison.nim
+++ b/src/tensor/comparison.nim
@@ -39,7 +39,7 @@ proc `.==`*[T](a, b: Tensor[T]): Tensor[bool] {.noInit.} =
   ##
   ## Returns:
   ##   - A tensor of boolean
-  let (tmp_a, tmp_b) = unsafeBroadcast2(a, b)
+  let (tmp_a, tmp_b) = broadcast2(a, b)
   result = map2_inline(tmp_a, tmp_b, x == y)
 
 proc `.!=`*[T](a, b: Tensor[T]): Tensor[bool] {.noInit.} =
@@ -49,7 +49,7 @@ proc `.!=`*[T](a, b: Tensor[T]): Tensor[bool] {.noInit.} =
   ##
   ## Returns:
   ##   - A tensor of boolean
-  let (tmp_a, tmp_b) = unsafeBroadcast2(a, b)
+  let (tmp_a, tmp_b) = broadcast2(a, b)
   result = map2_inline(tmp_a, tmp_b, x != y)
 
 proc `.<=`*[T](a, b: Tensor[T]): Tensor[bool] {.noInit.} =
@@ -59,7 +59,7 @@ proc `.<=`*[T](a, b: Tensor[T]): Tensor[bool] {.noInit.} =
   ##
   ## Returns:
   ##   - A tensor of boolean
-  let (tmp_a, tmp_b) = unsafeBroadcast2(a, b)
+  let (tmp_a, tmp_b) = broadcast2(a, b)
   result = map2_inline(tmp_a, tmp_b, x <= y)
 
 proc `.<`*[T](a, b: Tensor[T]): Tensor[bool] {.noInit.} =
@@ -69,7 +69,7 @@ proc `.<`*[T](a, b: Tensor[T]): Tensor[bool] {.noInit.} =
   ##
   ## Returns:
   ##   - A tensor of boolean
-  let (tmp_a, tmp_b) = unsafeBroadcast2(a, b)
+  let (tmp_a, tmp_b) = broadcast2(a, b)
   result = map2_inline(tmp_a, tmp_b, x < y)
 
 proc `.>=`*[T](a, b: Tensor[T]): Tensor[bool] {.noInit.} =
@@ -79,7 +79,7 @@ proc `.>=`*[T](a, b: Tensor[T]): Tensor[bool] {.noInit.} =
   ##
   ## Returns:
   ##   - A tensor of boolean
-  let (tmp_a, tmp_b) = unsafeBroadcast2(a, b)
+  let (tmp_a, tmp_b) = broadcast2(a, b)
   result = map2_inline(tmp_a, tmp_b, x >= y)
 
 proc `.>`*[T](a, b: Tensor[T]): Tensor[bool] {.noInit.} =
@@ -89,5 +89,5 @@ proc `.>`*[T](a, b: Tensor[T]): Tensor[bool] {.noInit.} =
   ##
   ## Returns:
   ##   - A tensor of boolean
-  let (tmp_a, tmp_b) = unsafeBroadcast2(a, b)
+  let (tmp_a, tmp_b) = broadcast2(a, b)
   result = map2_inline(tmp_a, tmp_b, x > y)
\ No newline at end of file
diff --git a/src/tensor/data_structure.nim b/src/tensor/data_structure.nim
index 9ec329f73..599b9e54c 100644
--- a/src/tensor/data_structure.nim
+++ b/src/tensor/data_structure.nim
@@ -26,43 +26,87 @@ type
     Cpu,
     Cuda
 
+  CpuStorage* {.shallow.} [T] = object
+    ## Opaque data storage for Tensors
+    ## Currently implemented as a seq with reference semantics (shallow copy on assignment).
+    ## It may change in the future for a custom memory managed and 64 bit aligned solution.
+    ##
+    ## Warning ⚠:
+    ##   Do not use Fdata directly, direct access will be removed in 0.4.0.
+
+    # `Fdata` will be transformed into an opaque type once `unsafeToTensorReshape` is removed.
+    Fdata*: seq[T]
+
   Tensor*[T] = object
     ## Tensor data structure stored on Cpu
     ##   - ``shape``: Dimensions of the tensor
     ##   - ``strides``: Numbers of items to skip to get the next item along a dimension.
-    ##   - ``offset``: Offset to get the first item of the Tensor. Note: offset can be negative, in particular for slices.
-    ##   - ``data``: A sequence that holds the actual data
+    ##   - ``offset``: Offset to get the first item of the tensor. Note: offset can be negative, in particular for slices.
+    ##   - ``storage``: An opaque data storage for the tensor
     ## Fields are public so that external libraries can easily construct a Tensor.
+    ## You can use ``.data`` to access the opaque data storage.
+    ##
+    ## Warning ⚠:
+    ##   Assignment ```var a = b``` does not copy the data. Data modification on one tensor will be reflected on the other.
+    ##   However modification on metadata (shape, strides or offset) will not affect the other tensor.
+    ##   Explicit copies can be made with ``clone``: ```var a = b.clone```
     shape*: MetadataArray
     strides*: MetadataArray
     offset*: int
-    data*: seq[T] # Perf note: seq are always deep copied on "var" assignement.
+    storage*: CpuStorage[T]
 
-  CudaSeq* [T: SomeReal] = object
-    ## Seq-like structure on the Cuda backend.
+type
+  CudaStorage*[T: SomeReal] = object
+    ## Opaque seq-like structure for storage on the Cuda backend.
+    ##
+    ## Nim garbage collector will automatically ask cuda to clear GPU memory if data becomes unused.
     ##
-    ## Nim garbage collector will automatically ask cuda to clear GPU memory if ``data`` becomes unused.
-    len*: int
-    data*: ref[ptr UncheckedArray[T]]
+    # TODO: Forward declaring this and making this completely private prevent assignment in newCudaStorage from working
+    Flen*: int
+    Fdata*: ptr UncheckedArray[T]
+    Fref_tracking*: ref[ptr UncheckedArray[T]] # We keep ref tracking for the GC in a separate field to avoid double indirection.
 
   CudaTensor*[T: SomeReal] = object
     ## Tensor data structure stored on Nvidia GPU (Cuda)
-    ##   - ``shape``: Dimensions of the tensor
+    ##   - ``shape``: Dimensions of the CudaTensor
     ##   - ``strides``: Numbers of items to skip to get the next item along a dimension.
-    ##   - ``offset``: Offset to get the first item of the Tensor. Note: offset can be negative, in particular for slices.
-    ##   - ``data``: A cuda seq-like object that points to the data location
-    ## Note: currently ``=`` assignement for CudaTensor does not copy. Both CudaTensors will share a view of the same data location.
-    ## Modifying the data in one will modify the data in the other.
+    ##   - ``offset``: Offset to get the first item of the CudaTensor. Note: offset can be negative, in particular for slices.
+    ##   - ``storage``: An opaque data storage for the CudaTensor
     ##
-    ## In the future CudaTensor will leverage Nim compiler to automatically
-    ## copy if a memory location would be used more than once in a mutable manner.
+    ## Warning ⚠:
+    ##   Assignment ```var a = b``` does not copy the data. Data modification on one CudaTensor will be reflected on the other.
+    ##   However modification on metadata (shape, strides or offset) will not affect the other tensor.
+    ##   Explicit copies can be made with ``clone``: ```var a = b.clone```
     shape*: MetadataArray
     strides*: MetadataArray
     offset*: int
-    data*: CudaSeq[T] # Memory on Cuda device will be automatically garbage-collected
+    storage*: CudaStorage[T]
 
   AnyTensor*[T] = Tensor[T] or CudaTensor[T]
 
+# ###############
+# Field accessors
+# ###############
+
+proc data*[T](t: Tensor[T]): seq[T] {.inline, noSideEffect, noInit.} =
+  # Get tensor raw data
+  # This is intended for library writer
+  shallowCopy(result, t.storage.Fdata)
+
+proc data*[T](t: var Tensor[T]): var seq[T] {.inline, noSideEffect, noInit.} =
+  # Get mutable tensor raw data
+  # This is intended for library writer
+  shallowCopy(result, t.storage.Fdata)
+
+proc `data=`*[T](t: var Tensor[T], s: seq[T]) {.inline, noSideEffect.}=
+  # Set tensor raw data
+  # This is intended for library writer
+  t.storage.Fdata = s
+
+# ################
+# Tensor Metadata
+# ################
+
 template rank*(t: AnyTensor): int =
   ## Input:
   ##     - A tensor
@@ -105,7 +149,7 @@ proc shape_to_strides*(shape: MetadataArray, layout: OrderType = rowMajor, resul
     accum *= shape[i]
   return
 
-proc is_C_contiguous*(t: AnyTensor): bool {.noSideEffect,inline.}=
+proc is_C_contiguous*(t: AnyTensor): bool {.noSideEffect, inline.}=
   ## Check if the tensor follows C convention / is row major
   var z = 1
   for i in countdown(t.shape.high,0):
@@ -116,7 +160,7 @@ proc is_C_contiguous*(t: AnyTensor): bool {.noSideEffect,inline.}=
     z *= t.shape[i]
   return true
 
-proc is_F_contiguous*(t: AnyTensor): bool {.noSideEffect,inline.}=
+proc is_F_contiguous*(t: AnyTensor): bool {.noSideEffect, inline.}=
   ## Check if the tensor follows Fortran convention / is column major
   var z = 1
   for i in 0..<t.shape.len:
@@ -127,31 +171,33 @@ proc is_F_contiguous*(t: AnyTensor): bool {.noSideEffect,inline.}=
     z *= t.shape[i]
   return true
 
-proc isContiguous*(t: AnyTensor): bool {.noSideEffect,inline.}=
+proc isContiguous*(t: AnyTensor): bool {.noSideEffect, inline.}=
   ## Check if the tensor is contiguous
   return t.is_C_contiguous or t.is_F_contiguous
 
-proc get_data_ptr*[T](t: AnyTensor[T]): ptr T {.inline.}=
+# ##################
+# Raw pointer access
+# ##################
+
+
+proc get_data_ptr*[T](t: AnyTensor[T]): ptr T {.noSideEffect, inline.}=
   ## Input:
   ##     - A tensor
   ## Returns:
   ##     - A pointer to the real start of its data (no offset)
-  when t is Tensor:
-    unsafeAddr(t.data[0])
-  elif t is CudaTensor:
-    unsafeAddr(t.data.data[0])
+  unsafeAddr(t.storage.Fdata[0])
 
-proc get_offset_ptr*[T](t: Tensor[T]): ptr T {.inline.}=
+proc get_offset_ptr*[T](t: AnyTensor[T]): ptr T {.noSideEffect, inline.}=
   ## Input:
   ##     - A tensor
   ## Returns:
   ##     - A pointer to the offset start of its data
-  unsafeAddr(t.data[t.offset])
+  unsafeAddr(t.storage.Fdata[t.offset])
 
-proc dataArray*[T](t: Tensor[T]): ptr UncheckedArray[T] {.inline.}=
+proc dataArray*[T](t: Tensor[T]): ptr UncheckedArray[T] {.noSideEffect, inline.}=
   ## Input:
   ##     - A tensor
   ## Returns:
   ##     - A pointer to the offset start of the data.
   ##       Return value supports array indexing.
-  cast[ptr UncheckedArray[T]](t.data[t.offset].unsafeAddr)
+  cast[ptr UncheckedArray[T]](t.storage.Fdata[t.offset].unsafeAddr)
diff --git a/src/tensor/deprecated/accessors_deprecated_0_3_0.nim b/src/tensor/deprecated/accessors_deprecated_0_3_0.nim
new file mode 100644
index 000000000..16cb4bf6d
--- /dev/null
+++ b/src/tensor/deprecated/accessors_deprecated_0_3_0.nim
@@ -0,0 +1,27 @@
+# Copyright 2017 the Arraymancer contributors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import  ../private/p_accessors,
+        ../private/p_checks,
+        ../data_structure
+
+
+proc unsafeAtAxisIndex*[T](t: Tensor[T], axis, idx: int): Tensor[T] {.noInit,inline,deprecated.} =
+  ## Returns a sliced tensor in the given axis index (unsafe)
+  when compileOption("boundChecks"):
+    check_axis_index(t, axis, idx)
+
+  result = t
+  result.shape[axis] = 1
+  result.offset += result.strides[axis]*idx
\ No newline at end of file
diff --git a/src/tensor/deprecated/accessors_macros_read_deprecated_0_3_0.nim b/src/tensor/deprecated/accessors_macros_read_deprecated_0_3_0.nim
new file mode 100644
index 000000000..b3774365e
--- /dev/null
+++ b/src/tensor/deprecated/accessors_macros_read_deprecated_0_3_0.nim
@@ -0,0 +1,39 @@
+# Copyright 2017 the Arraymancer contributors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import  ../private/p_accessors_macros_desugar,
+        ../private/p_accessors_macros_read,
+        ../data_structure,
+        macros
+
+
+macro unsafeSlice*[T](t: Tensor[T], args: varargs[untyped]): untyped {.deprecated.}=
+  ## Slice a Tensor
+  ## Input:
+  ##   - a Tensor
+  ##   - and:
+  ##     - specific coordinates (``varargs[int]``)
+  ##     - or a slice (cf. tutorial)
+  ## Returns:
+  ##   - a value or a view of the Tensor corresponding to the slice
+  ## Warning ⚠:
+  ##   This is a no-copy operation, data is shared with the input.
+  ##   This proc does not guarantee that a ``let`` value is immutable.
+  ## Usage:
+  ##   See the ``[]`` macro
+  let new_args = getAST(desugar(args))
+
+  result = quote do:
+    slice_typed_dispatch(`t`, `new_args`)
\ No newline at end of file
diff --git a/src/tensor/deprecated/higher_order_deprecated_0_2_0.nim b/src/tensor/deprecated/higher_order_deprecated_0_2_0.nim
index 368605c70..1e719e5f8 100644
--- a/src/tensor/deprecated/higher_order_deprecated_0_2_0.nim
+++ b/src/tensor/deprecated/higher_order_deprecated_0_2_0.nim
@@ -12,6 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Deprecated on 2017-09-16 by https://github.com/mratsim/Arraymancer/commit/ee8163553b829e63efa258a82890b6e9df9ca60c#diff-8da4a33312bdbbaa6ddfc1706b9e41dd
+# First release with deprecated tag: 0.2.0
+
+import  ../data_structure,
+        future
 
 proc fmap*[T, U](t: Tensor[T], f: T -> U): Tensor[U]
   {.deprecated, inline.}=
diff --git a/src/tensor/deprecated/init_cpu_deprecated_0_2_0.nim b/src/tensor/deprecated/init_cpu_deprecated_0_3_0.nim
similarity index 58%
rename from src/tensor/deprecated/init_cpu_deprecated_0_2_0.nim
rename to src/tensor/deprecated/init_cpu_deprecated_0_3_0.nim
index 9c92f7f21..3d9eb8ffe 100644
--- a/src/tensor/deprecated/init_cpu_deprecated_0_2_0.nim
+++ b/src/tensor/deprecated/init_cpu_deprecated_0_3_0.nim
@@ -13,6 +13,14 @@
 # limitations under the License.
 
 
+# Deprecated on 2017-09-07 by https://github.com/mratsim/Arraymancer/commit/ea7508c0724a7df7559b68cf8c8470d9ee0d1588
+# First release with deprecated tag: 0.2.0
+
+import  ../private/p_init_cpu,
+        ../data_structure,
+        ../init_cpu,
+        sequtils
+
 proc newTensor*(shape: openarray[int], T: typedesc): Tensor[T] {.noSideEffect, inline, deprecated.} =
   ## Creates a new Tensor on Cpu backend
   ## Input:
@@ -44,4 +52,29 @@ proc ones*[T: SomeNumber](shape: openarray[int], typ: typedesc[T]): Tensor[T] {.
   ## Result:
   ##      - A one-ed Tensor of the same shape
   tensorCpu(shape, result)
-  result.data = newSeqWith(result.size, 1.T)
\ No newline at end of file
+  result.data = newSeqWith(result.size, 1.T)
+
+proc unsafeView*[T](t: Tensor[T]): Tensor[T] {.noSideEffect,noInit,inline, deprecated.}=
+  ## DEPRECATED: With the switch to reference semantics, ``unsafe`` is now the default.
+  ##
+  ## Input:
+  ##     - A tensor
+  ## Returns:
+  ##     - A shallow copy. Both tensors share the same memory location.
+  ##
+  ## Warning ⚠
+  ##   Both tensors shares the same memory. Data modification on one will be reflected on the other.
+  ##   However modifying the shape, strides or offset will not affect the other.
+  result = t
+
+proc unsafeToTensor*[T: SomeNumber](data: seq[T]): Tensor[T] {.noInit,noSideEffect, deprecated.} =
+  ## DEPRECATED
+  ##
+  ## Convert a seq to a Tensor, sharing the seq data
+  ## Input:
+  ##      - A seq with the tensor data
+  ## Result:
+  ##      - A rank 1 tensor with the same size of the input
+  ## WARNING: result share storage with input
+  tensorCpu([data.len], result)
+  shallowCopy(result.data, data)
\ No newline at end of file
diff --git a/src/tensor/deprecated/init_deprecated_0_1_0.nim b/src/tensor/deprecated/init_deprecated_0_2_0.nim
similarity index 95%
rename from src/tensor/deprecated/init_deprecated_0_1_0.nim
rename to src/tensor/deprecated/init_deprecated_0_2_0.nim
index cb578c7ce..3287b4787 100644
--- a/src/tensor/deprecated/init_deprecated_0_1_0.nim
+++ b/src/tensor/deprecated/init_deprecated_0_2_0.nim
@@ -16,6 +16,13 @@
 # init procs will not offer the backend parameter anymore.
 # Full rationale in the Design_Document on Github.
 
+# Deprecated on 2017-09-07 by https://github.com/mratsim/Arraymancer/commit/58f2aff4fbd670d03d5b9c64cbd0e5467d24037b#diff-687a8a0e558961410c916e5cfe045d97
+# First release with deprecated tag: 0.2.0
+
+import  ../private/p_init_cpu,
+        ../data_structure,
+        ../init_cpu
+
 proc newTensor*(shape: openarray[int], T: typedesc, backend: static[Backend]): auto {.noSideEffect, deprecated.} =
   ## DEPRECATED - The backend: static[Backend] argument has been deprecated for easier maintenance.
   ##
diff --git a/src/tensor/deprecated/optim_ops_fusion_deprecated_0_3_0.nim b/src/tensor/deprecated/optim_ops_fusion_deprecated_0_3_0.nim
new file mode 100644
index 000000000..ad642e45d
--- /dev/null
+++ b/src/tensor/deprecated/optim_ops_fusion_deprecated_0_3_0.nim
@@ -0,0 +1,40 @@
+# Copyright 2017 the Arraymancer contributors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import  ../private/p_checks,
+        ../private/p_init_cpu,
+        ../data_structure
+
+
+proc unsafeToTensorReshape*[T](data: seq[T], shape: varargs[int]): Tensor[T] {.noSideEffect, deprecated.} =
+  ## Deprecated
+  ##
+  ## Fuse unsafeToTensor and unsafeReshape in one operation
+  ##
+  ## With move semantics + reference semantics this is not needed.
+  # Note: once this is removed, CpuStorage can be changed to not expose Fdata.
+
+  when compileOption("boundChecks"):
+    check_nested_elements(shape.toMetadataArray, data.len)
+
+  tensorCpu(shape, result)
+  shallowCopy(result.storage.Fdata, data)
+
+template rewriteUnsafeToTensorReshape*{unsafeReshape(unsafeToTensor(s), shape)}(
+  s: seq,
+  shape: varargs[int]): auto =
+  ## Fuse ``sequence.unsafeToTensor().unsafeReshape(new_shape)`` into a single operation.
+  ##
+  ## Operation fusion leverage the Nim compiler and should not be called explicitly.
+  unsafeToTensorReshape(s, shape, dummy_bugfix)
diff --git a/src/tensor/deprecated/shapeshifting_deprecated_0_3_0.nim b/src/tensor/deprecated/shapeshifting_deprecated_0_3_0.nim
new file mode 100644
index 000000000..8cee99554
--- /dev/null
+++ b/src/tensor/deprecated/shapeshifting_deprecated_0_3_0.nim
@@ -0,0 +1,208 @@
+# Copyright 2017 the Arraymancer contributors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import  ../backend/metadataArray,
+        ../private/p_shapeshifting,
+        ../private/p_checks,
+        ../private/p_accessors_macros_write,
+        ../data_structure, ../init_cpu, ../higher_order
+
+
+proc unsafeTranspose*(t: Tensor): Tensor {.noInit,noSideEffect,inline, deprecated.} =
+  ## DEPRECATED
+  ##
+  ## Transpose a Tensor without copy.
+  ##
+  ## Warning ⚠:
+  ##   This is a no-copy operation, data is shared with the input.
+  ##   This proc does not guarantee that a ``let`` value is immutable.
+  ##
+  ## For N-d Tensor with shape (0, 1, 2 ... n-1) the resulting tensor will have shape (n-1, ... 2, 1, 0)
+  t.shape.reversed(result.shape)
+  t.strides.reversed(result.strides)
+  result.offset = t.offset
+  shallowCopy(result.data, t.data)
+
+proc unsafeContiguous*[T](t: Tensor[T], layout: OrderType = rowMajor, force: bool = false): Tensor[T] {.noInit, deprecated.} =
+  ## DEPRECATED
+  ##
+  ## Transform a tensor with general striding to a Tensor with contiguous layout.
+  ##
+  ## If the tensor is already contiguous it is returned without copy, underlying data is shared between the input and the output.
+  ##
+  ## Warning ⚠:
+  ##   This may be a no-copy operation with result data shared with the input.
+  ##   This proc does not guarantee that a ``let`` value is immutable.
+  ##
+  ## By default tensor will be rowMajor.
+  ##
+  ## By default nothing is done if the tensor is already contiguous (C Major or F major)
+  ## The "force" parameter can force re-ordering to a specific layout
+
+  let cCont = t.is_C_contiguous
+  let fCont = t.is_F_contiguous
+
+  if (cCont or fCont) and not force:
+    return t
+  elif cCont and layout == rowMajor:
+    return t
+  elif fCont and layout == colMajor:
+    return t
+  contiguousT(result, t, layout)
+
+proc unsafeReshape*(t: Tensor, new_shape: varargs[int]): Tensor {.noInit, deprecated.} =
+  ## DEPRECATED
+  ##
+  ## Reshape a tensor without copy.
+  ##
+  ## ⚠ Reshaping without copy is only possible on contiguous Tensors
+  ##
+  ## Warning ⚠:
+  ##   This is a no-copy operation, data is shared with the input.
+  ##   This proc does not guarantee that a ``let`` value is immutable.
+
+  t.reshape_no_copy(new_shape, result)
+  result.storage = t.storage
+
+proc unsafeReshape*(t: Tensor, new_shape: MetadataArray): Tensor {.noInit, deprecated.} =
+  ## DEPRECATED
+  ##
+  ## Reshape a tensor without copy.
+  ##
+  ## ⚠ Reshaping without copy is only possible on contiguous Tensors
+  ##
+  ## Warning ⚠:
+  ##   This is a no-copy operation, data is shared with the input.
+  ##   This proc does not guarantee that a ``let`` value is immutable.
+
+  t.reshape_no_copy(new_shape, result)
+  result.storage = t.storage
+
+
+proc unsafeBroadcast*[T](t: Tensor[T], shape: varargs[int]): Tensor[T] {.noInit,noSideEffect, deprecated.}=
+  ## DEPRECATED
+  ##
+  ## Explicitly broadcast a Tensor to the specified shape.
+  ## The returned broadcasted Tensor share the underlying data with the input.
+  ##
+  ## Dimension(s) of size 1 can be expanded to arbitrary size by replicating
+  ## values along that dimension.
+  ##
+  ## Warning ⚠:
+  ##   This is a no-copy operation, data is shared with the input.
+  ##   This proc does not guarantee that a ``let`` value is immutable.
+  ##   A broadcasted tensor should not be modified and only used for computation.
+  result = t
+  result.broadcastT(shape)
+
+proc unsafeBroadcast*[T](t: Tensor[T], shape: MetadataArray): Tensor[T] {.noInit,noSideEffect, deprecated.}=
+  ## DEPRECATED
+  ##
+  ## Explicitly broadcast a Tensor to the specified shape.
+  ## The returned broadcasted Tensor share the underlying data with the input.
+  ##
+  ## Dimension(s) of size 1 can be expanded to arbitrary size by replicating
+  ## values along that dimension.
+  ##
+  ## Warning ⚠:
+  ##   This is a no-copy operation, data is shared with the input.
+  ##   This proc does not guarantee that a ``let`` value is immutable.
+  ##   A broadcasted tensor should not be modified and only used for computation.
+  result = t
+  result.broadcastT(shape)
+
+proc unsafeBroadcast2*[T](a, b: Tensor[T]): tuple[a, b: Tensor[T]] {.noSideEffect, deprecated.}=
+  ## DEPRECATED
+  ##
+  ## Broadcast 2 tensors so they have compatible shapes for element-wise computations.
+  ##
+  ## Tensors in the tuple can be accessed with output.a and output.b
+  ##
+  ## The returned broadcasted Tensors share the underlying data with the input.
+  ##
+  ## Dimension(s) of size 1 can be expanded to arbitrary size by replicating
+  ## values along that dimension.
+  ##
+  ## Warning ⚠:
+  ##   This is a no-copy operation, data is shared with the input.
+  ##   This proc does not guarantee that a ``let`` value is immutable.
+  ##   A broadcasted tensor should not be modified and only used for computation.
+
+  broadcast2T(a,b, result)
+
+  shallowCopy(result.a.data, a.data)
+  shallowCopy(result.b.data, b.data)
+
+
+proc unsafePermute*(t: Tensor, dims: varargs[int]): Tensor {.noInit,noSideEffect, deprecated.}=
+  ## DEPRECATED
+  ##
+  ## Permute dimensions of a tensors
+  ## Input:
+  ##   - a tensor
+  ##   - the new dimension order
+  ## Returns:
+  ##   - a tensor with re-order dimension
+  ## Usage:
+  ##  .. code:: nim
+  ##     a.permute(0,2,1) # dim 0 stays at 0, dim 1 becomes dim 2 and dim 2 becomes dim 1
+  ##
+  ## Warning ⚠:
+  ##   This is a no-copy operation, data is shared with the input.
+  ##   This proc does not guarantee that a ``let`` value is immutable.
+  ##   A broadcasted tensor should not be modified and only used for computation.
+
+  # TODO: bounds check
+  result = t
+  permuteT(result, dims)
+
+proc unsafeSqueeze*(t: Tensor): Tensor {.noInit,noSideEffect, deprecated.}=
+  ## DEPRECATED
+  ##
+  ## Squeeze tensors. For example a Tensor of shape [4,1,3] will become [4,3]
+  ## Input:
+  ##   - a tensor
+  ## Returns:
+  ##   - a tensor with singleton dimensions collapsed that share the same underlying storage
+  ## Warning ⚠:
+  ##   This is a no-copy operation, data is shared with the input.
+  ##   This proc does not guarantee that a ``let`` value is immutable.
+  result = t
+  result.squeezeT
+
+proc unsafeSqueeze*(t: Tensor, axis: int): Tensor {.noInit,noSideEffect, deprecated.}=
+  ## DEPRECATED
+  ##
+  ## Collapse the given axis, if the dimension is not 1; it does nothing
+  ## Input:
+  ##   - a tensor
+  ##   - an axis (dimension)
+  ## Returns:
+  ##   - a tensor with singleton dimensions collapsed
+  ## Warning ⚠:
+  ##   This is a no-copy operation, data is shared with the input.
+  ##   This proc does not guarantee that a ``let`` value is immutable.
+  result = t
+  result.squeezeT(axis)
+
+proc unsafeUnsqueeze*(t: Tensor, axis: int): Tensor {.noInit,noSideEffect, deprecated.}=
+  ## DEPRECATED
+  ##
+  ## Insert a new axis just before the given axis, increasing the tensor
+  ## dimension (rank) by 1
+  ##   - a tensor with that new axis
+  ## WARNING: result share storage with input
+  ## This does not guarantee `let` variable immutability
+  result = t
+  result.unsqueezeT(axis)
\ No newline at end of file
diff --git a/src/tensor/deprecated/syntactic_sugar_deprecated_0_3_0.nim b/src/tensor/deprecated/syntactic_sugar_deprecated_0_3_0.nim
new file mode 100644
index 000000000..affe00236
--- /dev/null
+++ b/src/tensor/deprecated/syntactic_sugar_deprecated_0_3_0.nim
@@ -0,0 +1,35 @@
+# Copyright 2017 the Arraymancer contributors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+template unsafeAt*[T](t: Tensor[T], args: varargs[untyped]): untyped {.deprecated.}=
+  ## DEPRECATED: use `at` instead.
+  ##
+  ## Slice a Tensor and collapse singleton dimension.
+  ##
+  ## Data is shared between input and output.
+  ## Input:
+  ##   - a Tensor
+  ##   - and:
+  ##     - specific coordinates (``varargs[int]``)
+  ##     - or a slice (cf. tutorial)
+  ## Returns:
+  ##   - a value or a view of the Tensor corresponding to the slice
+  ##     Singleton dimension are collapsed
+  ## Warning ⚠:
+  ##   This is a no-copy operation, data is shared with the input.
+  ##   This proc does not guarantee that a ``let`` value is immutable.
+  ## Usage:
+  ##   See the ``[]`` macro
+  t.unsafeSlice(args).unsafeSqueeze
\ No newline at end of file
diff --git a/src/tensor/exporting.nim b/src/tensor/exporting.nim
index 26eaf3b0c..06a77922b 100644
--- a/src/tensor/exporting.nim
+++ b/src/tensor/exporting.nim
@@ -33,7 +33,7 @@ proc export_tensor*[T](t: Tensor[T]):
   ## If the tensor was not contiguous (a slice for example), it is reshaped.
   ## Data is exported in C order (last index changes the fastest, column in 2D case)
 
-  let contig_t = t.unsafeContiguous
+  let contig_t = t.asContiguous
 
   result.shape = contig_t.shape
   result.strides = contig_t.strides
diff --git a/src/tensor/fallback/naive_l2_gemv.nim b/src/tensor/fallback/naive_l2_gemv.nim
index 247d65db6..75df8366b 100644
--- a/src/tensor/fallback/naive_l2_gemv.nim
+++ b/src/tensor/fallback/naive_l2_gemv.nim
@@ -47,12 +47,12 @@ proc naive_gemv_fallback*[T: SomeInteger](
 
   # Naive implementation: split the matrices along vertical axis
 
-  let cont_A = A.unsafeContiguous(rowMajor, force=true)
+  let cont_A = A.asContiguous(rowMajor, force=true)
   # if A is C_contiguous (row-major) slices along the row are also contiguous
-  # so we can use unsafeReshape and avoid allocation inside the for loop
+  # so we can use reshape and avoid allocation inside the for loop
   let colA = cont_A.shape[1]
 
   var i: int = 0
   for ai in cont_A.axis(0):
-    y[i] += alpha * dot(ai.unsafeReshape(colA), x)
+    y[i] += alpha * dot(ai.reshape(colA), x)
     i += 1
\ No newline at end of file
diff --git a/src/tensor/higher_order.nim b/src/tensor/higher_order.nim
index 5ae6611a7..44cbd7713 100644
--- a/src/tensor/higher_order.nim
+++ b/src/tensor/higher_order.nim
@@ -48,7 +48,7 @@ template map_inline*[T](t: Tensor[T], op:untyped): untyped =
   omp_parallel_blocks(block_offset, block_size, dest.size):
     for i, x {.inject.} in enumerate(t, block_offset, block_size):
       data[i] = op
-  dest.unsafeView()
+  dest
 
 template map2_inline*[T, U](t1: Tensor[T], t2: Tensor[U], op:untyped): untyped =
   when compileOption("boundChecks"):
@@ -61,7 +61,7 @@ template map2_inline*[T, U](t1: Tensor[T], t2: Tensor[U], op:untyped): untyped =
   omp_parallel_blocks(block_offset, block_size, t1.size):
     for i, x {.inject.}, y {.inject.} in enumerateZip(t1, t2, block_offset, block_size):
       data[i] = op
-  dest.unsafeView()
+  dest
 
 template map3_inline*[T, U, V](t1: Tensor[T], t2: Tensor[U], t3: Tensor[V], op:untyped): untyped =
   when compileOption("boundChecks"):
@@ -76,7 +76,7 @@ template map3_inline*[T, U, V](t1: Tensor[T], t2: Tensor[U], t3: Tensor[V], op:u
     for i, x {.inject.}, y {.inject.}, z {.inject.} in enumerateZip(t1, t2, t3, block_offset, block_size):
       data[i] = op
 
-  dest.unsafeView()
+  dest
 
 template reduce_inline*[T](t: Tensor[T], op: untyped): untyped =
   var reduced : T
@@ -101,17 +101,17 @@ template reduce_axis_inline*[T](t: Tensor[T], reduction_axis: int, op: untyped):
   var reduced : type(t)
   let weight = t.size div t.shape[reduction_axis]
   omp_parallel_reduce_blocks(reduced, block_offset, block_size, t.shape[reduction_axis], weight, op) do:
-    x = t.atAxisIndex(reduction_axis, block_offset).unsafeView()
+    x = t.atAxisIndex(reduction_axis, block_offset).clone()
   do:
     for y {.inject.} in t.axis(reduction_axis, block_offset, block_size):
       op
-  reduced.unsafeView()
+  reduced
 
 template fold_axis_inline*[T](t: Tensor[T], result_type: typedesc, fold_axis: int, op_initial, op_middle, op_final: untyped): untyped =
   var reduced : result_type
   let weight = t.size div t.shape[fold_axis]
   omp_parallel_reduce_blocks(reduced, block_offset, block_size, t.shape[fold_axis], weight, op_final) do:
-    let y {.inject.} = t.atAxisIndex(fold_axis, block_offset).unsafeView()
+    let y {.inject.} = t.atAxisIndex(fold_axis, block_offset)
     op_initial
   do:
     for y {.inject.} in t.axis(fold_axis, block_offset, block_size):
@@ -119,7 +119,7 @@ template fold_axis_inline*[T](t: Tensor[T], result_type: typedesc, fold_axis: in
 
   # If the result is a Tensor, return without copy
   when reduced is AnyTensor:
-    reduced.unsafeView()
+    reduced
   else:
     reduced
 
diff --git a/src/tensor/init_cpu.nim b/src/tensor/init_cpu.nim
index 276783bff..1b63faa50 100644
--- a/src/tensor/init_cpu.nim
+++ b/src/tensor/init_cpu.nim
@@ -22,20 +22,6 @@ import  ../private/[functional, nested_containers, sequninit],
         random,
         math
 
-proc unsafeView*[T](t: Tensor[T]): Tensor[T] {.noSideEffect,noInit,inline.}=
-  ## Input:
-  ##     - A tensor
-  ## Returns:
-  ##     - A shallow copy. Both tensors share the same memory location.
-  ##
-  ## Warning ⚠
-  ##   Both tensors shares the same memory. Data modification on one will be reflected on the other.
-  ##   However modifying the shape, strides or offset will not affect the other.
-  result.shape = t.shape
-  result.strides = t.strides
-  result.offset = t.offset
-  shallowCopy(result.data, t.data)
-
 proc newTensorUninit*[T](shape: varargs[int]): Tensor[T] {.noSideEffect,noInit, inline.} =
   ## Creates a new Tensor on Cpu backend
   ## Input:
@@ -95,16 +81,6 @@ proc toTensor*(s:openarray, dummy_bugfix: static[int] = 0 ): auto {.noSideEffect
   # TODO: remove 'dummy_bugfix' - https://github.com/nim-lang/Nim/issues/6343
   toTensorCpu(s)
 
-proc unsafeToTensor*[T: SomeNumber](data: seq[T]): Tensor[T] {.noInit,noSideEffect.} =
-  ## Convert a seq to a Tensor, sharing the seq data
-  ## Input:
-  ##      - A seq with the tensor data
-  ## Result:
-  ##      - A rank 1 tensor with the same size of the input
-  ## WARNING: result share storage with input
-  tensorCpu([data.len], result)
-  shallowCopy(result.data, data)
-
 proc toTensor*(s:string): auto {.noSideEffect.} =
   ## Convert a string to a Tensor
   ##
diff --git a/src/tensor/init_cpu_copy.nim b/src/tensor/init_cpu_copy.nim
index 205187987..9d0150a0c 100644
--- a/src/tensor/init_cpu_copy.nim
+++ b/src/tensor/init_cpu_copy.nim
@@ -18,7 +18,7 @@ import  ./data_structure,
 # Unfortunately higher_order depends on init_cpu and "clone" depends on higher_order, so we need an extra file
 # to deal with circular dependencies
 
-proc clone*[T](t: Tensor[T]): Tensor[T] {.noSideEffect,noInit,inline.}=
+proc clone*[T](t: Tensor[T]): Tensor[T] {.noSideEffect,noInit.}=
   ## Input:
   ##     - A tensor
   ## Returns:
diff --git a/src/tensor/init_cuda.nim b/src/tensor/init_cuda.nim
index bf9ea5a4a..23e5e85a0 100644
--- a/src/tensor/init_cuda.nim
+++ b/src/tensor/init_cuda.nim
@@ -21,26 +21,12 @@ import  ../private/sequninit,
         ./init_cpu,
         nimcuda/[cuda_runtime_api, driver_types]
 
-proc unsafeView*[T](t: CudaTensor[T]): CudaTensor[T] {.inline,noSideEffect.}=
-  ## Input:
-  ##     - A CudaTensor
-  ## Returns:
-  ##     - A shallow copy.
-  ##
-  ## Warning ⚠
-  ##   Both tensors shares the same memory. Data modification on one will be reflected on the other.
-  ##   However modifying the shape, strides or offset will not affect the other.
-
-  # shape and strides fields have value semantics by default
-  # CudaSeq has ref semantics
-  system.`=`(result, t)
-
 proc clone*[T](t: CudaTensor[T]): CudaTensor[T] {.noInit.}=
   ## Clone (deep copy) a CudaTensor.
   ## Copy will not share its data with the original.
   ##
   ## Tensor is copied as is. For example it will not be made contiguous.
-  ## Use `unsafeContiguous` for this case
+  ## Use `asContiguous` for this case
 
   # Note: due to modifying the defaultStream global var for async memcopy
   # proc cannot be tagged noSideEffect
@@ -48,8 +34,8 @@ proc clone*[T](t: CudaTensor[T]): CudaTensor[T] {.noInit.}=
   result.shape = t.shape
   result.strides = t.strides
   result.offset = t.offset
-  result.data = newCudaSeq[T](t.data.len)
-  let size = t.data.len * sizeof(T)
+  result.storage = newCudaStorage[T](t.storage.Flen)
+  let size = t.storage.Flen * sizeof(T)
 
   check cudaMemCpyAsync(result.get_data_ptr,
                         t.get_data_ptr,
@@ -57,34 +43,6 @@ proc clone*[T](t: CudaTensor[T]): CudaTensor[T] {.noInit.}=
                         cudaMemcpyDeviceToDevice,
                         defaultStream) # defaultStream is a cudaStream_t global var
 
-# ###########################################################
-# Implement value semantics for CudaTensor
-# Pending https://github.com/nim-lang/Nim/issues/6348
-# Tracked in https://github.com/mratsim/Arraymancer/issues/19
-#
-# proc `=`*[T](dest: var CudaTensor[T]; src: CudaTensor[T]) =
-#   ## Overloading the assignment operator
-#   ## It will have value semantics by default
-#   dest.shape = src.shape
-#   dest.strides = src.strides
-#   dest.offset = src.offset
-#   dest.data = newCudaSeq(src.data.len)
-#
-#   let size = dest.size * sizeof(T)
-#
-#   check cudaMemCpy(dest.get_data_ptr,
-#                    src.get_data_ptr,
-#                    size,
-#                    cudaMemcpyDeviceToDevice)
-#   echo "Value copied"
-#
-# proc `=`*[T](dest: var CudaTensor[T]; src: CudaTensor[T]{call}) {.inline.}=
-#   ## Overloading the assignment operator
-#   ## Optimized version that knows that
-#   ## the source CudaTensor is unique and thus don't need to be copied
-#   system.`=`(result, t)
-#   echo "Value moved"
-
 proc cuda*[T:SomeReal](t: Tensor[T]): CudaTensor[T] {.noInit.}=
   ## Convert a tensor on Cpu to a tensor on a Cuda device.
   # Note: due to modifying the defaultStream global var for async copy
@@ -93,7 +51,7 @@ proc cuda*[T:SomeReal](t: Tensor[T]): CudaTensor[T] {.noInit.}=
   result = newCudaTensor[T](t.shape)
 
   # TODO: avoid reordering rowMajor tensors. This is only needed for inplace operation in CUBLAS.
-  let contig_t = t.unsafeContiguous(colMajor, force = true)
+  let contig_t = t.asContiguous(colMajor, force = true)
   let size = csize(result.size * sizeof(T))
 
   # For host to device we use non-blocking copy
@@ -113,9 +71,9 @@ proc cpu*[T:SomeReal](t: CudaTensor[T]): Tensor[T] {.noSideEffect, noInit.}=
   result.shape = t.shape
   result.strides = t.strides
   result.offset = t.offset
-  result.data = newSeqUninit[T](t.data.len) # We copy over all the memory allocated
+  result.data = newSeqUninit[T](t.storage.Flen) # We copy over all the memory allocated
 
-  let size = csize(t.data.len * sizeof(T))
+  let size = csize(t.storage.Flen * sizeof(T))
 
   check cudaMemCpy( result.get_data_ptr,
                     t.get_data_ptr,
diff --git a/src/tensor/operators_blas_l1_cuda.nim b/src/tensor/operators_blas_l1_cuda.nim
index 4a5af0897..95e806fb7 100644
--- a/src/tensor/operators_blas_l1_cuda.nim
+++ b/src/tensor/operators_blas_l1_cuda.nim
@@ -87,7 +87,7 @@ proc `*=`*[T:SomeReal](t: var CudaTensor[T]; a: T) {.inline.}=
   # We multiply all elements of the CudaTensor regardless of shape/strides
   # So this operation can be applied to tensors of all ranks.
   # Hence we use the whole allocated length and a stride of 1
-  cublas_scal(t.data.len, a, t.get_data_ptr, 1)
+  cublas_scal(t.storage.Flen, a, t.get_data_ptr, 1)
 
 proc `*`*[T:SomeReal](a: T, t: CudaTensor[T]): CudaTensor[T] {.noInit, inline.}=
   ## CudaTensor multiplication by a scalar
diff --git a/src/tensor/operators_broadcasted.nim b/src/tensor/operators_broadcasted.nim
index b743c77dd..7a86293c3 100644
--- a/src/tensor/operators_broadcasted.nim
+++ b/src/tensor/operators_broadcasted.nim
@@ -23,12 +23,12 @@ import  ./data_structure,
 
 proc `.+`*[T: SomeNumber](a, b: Tensor[T]): Tensor[T] {.noInit,inline.} =
   ## Broadcasted addition for tensors of incompatible but broadcastable shape.
-  let (tmp_a, tmp_b) = unsafeBroadcast2(a, b)
+  let (tmp_a, tmp_b) = broadcast2(a, b)
   result = tmp_a + tmp_b
 
 proc `.-`*[T: SomeNumber](a, b: Tensor[T]): Tensor[T] {.noInit,inline.} =
   ## Broadcasted addition for tensors of incompatible but broadcastable shape.
-  let (tmp_a, tmp_b) = unsafeBroadcast2(a, b)
+  let (tmp_a, tmp_b) = broadcast2(a, b)
   result = tmp_a - tmp_b
 
 proc `.*`*[T: SomeNumber](a, b: Tensor[T]): Tensor[T] {.noInit.} =
@@ -36,21 +36,21 @@ proc `.*`*[T: SomeNumber](a, b: Tensor[T]): Tensor[T] {.noInit.} =
   ##
   ## And broadcasted element-wise multiplication.
 
-  let (tmp_a, tmp_b) = unsafeBroadcast2(a, b)
+  let (tmp_a, tmp_b) = broadcast2(a, b)
   result = map2_inline(tmp_a, tmp_b, x * y)
 
 proc `./`*[T: SomeInteger](a, b: Tensor[T]): Tensor[T] {.noInit.} =
   ## Tensor element-wise division for integer numbers.
   ##
   ## And broadcasted element-wise division.
-  let (tmp_a, tmp_b) = unsafeBroadcast2(a, b)
+  let (tmp_a, tmp_b) = broadcast2(a, b)
   result = map2_inline(tmp_a, tmp_b, x div y)
 
 proc `./`*[T: SomeReal](a, b: Tensor[T]): Tensor[T] {.noInit.} =
   ## Tensor element-wise division for real numbers.
   ##
   ## And broadcasted element-wise division.
-  let (tmp_a, tmp_b) = unsafeBroadcast2(a, b)
+  let (tmp_a, tmp_b) = broadcast2(a, b)
   result = map2_inline(tmp_a, tmp_b, x / y )
 
 # ##############################################
@@ -62,7 +62,7 @@ proc `.+=`*[T: SomeNumber](a: var Tensor[T], b: Tensor[T]) =
   ## Only the right hand side tensor can be broadcasted.
   # shape check done in apply2 proc
 
-  let tmp_b = b.unsafeBroadcast(a.shape)
+  let tmp_b = b.broadcast(a.shape)
   apply2_inline(a, tmp_b, x + y)
 
 proc `.-=`*[T: SomeNumber](a: var Tensor[T], b: Tensor[T]) =
@@ -71,7 +71,7 @@ proc `.-=`*[T: SomeNumber](a: var Tensor[T], b: Tensor[T]) =
   ## Only the right hand side tensor can be broadcasted.
   # shape check done in apply2 proc
 
-  let tmp_b = b.unsafeBroadcast(a.shape)
+  let tmp_b = b.broadcast(a.shape)
   apply2_inline(a, tmp_b, x - y)
 
 proc `.*=`*[T: SomeNumber](a: var Tensor[T], b: Tensor[T]) =
@@ -80,7 +80,7 @@ proc `.*=`*[T: SomeNumber](a: var Tensor[T], b: Tensor[T]) =
   ## Only the right hand side tensor can be broadcasted
   # shape check done in apply2 proc
 
-  let tmp_b = b.unsafeBroadcast(a.shape)
+  let tmp_b = b.broadcast(a.shape)
   apply2_inline(a, tmp_b, x * y)
 
 proc `./=`*[T: SomeInteger](a: var Tensor[T], b: Tensor[T]) =
@@ -89,7 +89,7 @@ proc `./=`*[T: SomeInteger](a: var Tensor[T], b: Tensor[T]) =
   ## Only the right hand side tensor can be broadcasted.
   # shape check done in apply2 proc
 
-  let tmp_b = b.unsafeBroadcast(a.shape)
+  let tmp_b = b.broadcast(a.shape)
   apply2_inline(a, tmp_b, x div y)
 
 proc `./=`*[T: SomeReal](a: var Tensor[T], b: Tensor[T]) =
@@ -98,7 +98,7 @@ proc `./=`*[T: SomeReal](a: var Tensor[T], b: Tensor[T]) =
   ## Only the right hand side tensor can be broadcasted.
   # shape check done in apply2 proc
 
-  let tmp_b = b.unsafeBroadcast(a.shape)
+  let tmp_b = b.broadcast(a.shape)
   apply2_inline(a, tmp_b, x / y)
 
 
diff --git a/src/tensor/operators_broadcasted_cuda.nim b/src/tensor/operators_broadcasted_cuda.nim
index a52bb5b99..e1e5cae32 100644
--- a/src/tensor/operators_broadcasted_cuda.nim
+++ b/src/tensor/operators_broadcasted_cuda.nim
@@ -32,12 +32,12 @@ cuda_binary_glue("cuda_Div", "DivOp", cuda_Div)
 
 proc `.+`*[T: SomeReal](a, b: CudaTensor[T]): CudaTensor[T] {.noInit,inline.} =
   ## Broadcasted addition for tensors of incompatible but broadcastable shape.
-  let (tmp_a, tmp_b) = unsafeBroadcast2(a, b)
+  let (tmp_a, tmp_b) = broadcast2(a, b)
   result = tmp_a + tmp_b
 
 proc `.-`*[T: SomeReal](a, b: CudaTensor[T]): CudaTensor[T] {.noInit,inline.} =
   ## Broadcasted addition for tensors of incompatible but broadcastable shape.
-  let (tmp_a, tmp_b) = unsafeBroadcast2(a, b)
+  let (tmp_a, tmp_b) = broadcast2(a, b)
   result = tmp_a - tmp_b
 
 
@@ -46,7 +46,7 @@ proc `.*`*[T: SomeReal](a,b: CudaTensor[T]): CudaTensor[T] {.noInit.} =
   ##
   ## And broadcasted element-wise multiplication.
 
-  let (tmp_a, tmp_b) = unsafeBroadcast2(a, b)
+  let (tmp_a, tmp_b) = broadcast2(a, b)
 
   result = newCudaTensor[T](tmp_a.shape)
   cuda_binary_call(cuda_Mul, result, tmp_a, tmp_b)
@@ -54,7 +54,7 @@ proc `.*`*[T: SomeReal](a,b: CudaTensor[T]): CudaTensor[T] {.noInit.} =
 proc `./`*[T: SomeReal](a,b: CudaTensor[T]): CudaTensor[T] {.noInit.} =
   ## CudaTensor substraction
 
-  let (tmp_a, tmp_b) = unsafeBroadcast2(a, b)
+  let (tmp_a, tmp_b) = broadcast2(a, b)
 
   result = newCudaTensor[T](tmp_a.shape)
   cuda_binary_call(cuda_Div, result, tmp_a, tmp_b)
@@ -71,7 +71,7 @@ proc `.+=`*[T: SomeReal](a: var CudaTensor[T], b: CudaTensor[T]) =
   ## Only the right hand side tensor can be broadcasted.
   # shape check done in apply2 proc
 
-  let tmp_b = b.unsafeBroadcast(a.shape)
+  let tmp_b = b.broadcast(a.shape)
   a += tmp_b
 
 proc `.-=`*[T: SomeReal](a: var CudaTensor[T], b: CudaTensor[T]) =
@@ -80,7 +80,7 @@ proc `.-=`*[T: SomeReal](a: var CudaTensor[T], b: CudaTensor[T]) =
   ## Only the right hand side tensor can be broadcasted.
   # shape check done in apply2 proc
 
-  let tmp_b = b.unsafeBroadcast(a.shape)
+  let tmp_b = b.broadcast(a.shape)
   a -= tmp_b
 
 proc `.*=`*[T: SomeReal](a: var CudaTensor[T], b: CudaTensor[T]) =
@@ -89,7 +89,7 @@ proc `.*=`*[T: SomeReal](a: var CudaTensor[T], b: CudaTensor[T]) =
   ## Only the right hand side tensor can be broadcasted
   # shape check done in apply2 proc
 
-  let tmp_b = b.unsafeBroadcast(a.shape)
+  let tmp_b = b.broadcast(a.shape)
   cuda_assign_call(cuda_mMulOp, a, tmp_b)
 
 proc `./=`*[T: SomeReal](a: var CudaTensor[T], b: CudaTensor[T]) =
@@ -98,7 +98,7 @@ proc `./=`*[T: SomeReal](a: var CudaTensor[T], b: CudaTensor[T]) =
   ## Only the right hand side tensor can be broadcasted.
   # shape check done in apply2 proc
 
-  let tmp_b = b.unsafeBroadcast(a.shape)
+  let tmp_b = b.broadcast(a.shape)
   cuda_assign_call(cuda_mDivOp, a, tmp_b)
 
 # ##############################################
diff --git a/src/tensor/optim_move.nim b/src/tensor/optim_move.nim
deleted file mode 100644
index 2a10075fd..000000000
--- a/src/tensor/optim_move.nim
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright 2017 Mamy André-Ratsimbazafy
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import  ./data_structure,
-        ./shapeshifting
-
-
-# TODO, find a way to test that that moves works properly (global counter  for testing?)
-
-template asContiguous*[T](t: Tensor[T]{call}, layout: OrderType = rowMajor, force: bool = false): Tensor[T] =
-  ## Transform a tensor with general striding to a Tensor with contiguous layout.
-  ##
-  ## By default tensor will be rowMajor.
-  ##
-  ## By default nothing is done if the tensor is already contiguous (C Major or F major)
-  ## The "force" parameter can force re-ordering to a specific layout
-  ##
-  ## This is a move optimization for function chaining to avoid copying value returned by the previous function
-
-  unsafeContiguous(t, layout, force)
-
-template permute*[T](t: Tensor[T]{call}, dims: varargs[int]): Tensor[T] =
-  ## Permute dimensions of a tensors
-  ## Input:
-  ##   - a tensor
-  ##   - the new dimension order
-  ## Returns:
-  ##   - a tensor with re-order dimension
-  ## Usage:
-  ##  .. code:: nim
-  ##     a.permute(0,2,1) # dim 0 stays at 0, dim 1 becomes dim 2 and dim 2 becomes dim 1
-  ##
-  ## This is a move optimization for function chaining to avoid copying value returned by the previous function
-  unsafePermute(t, dims)
\ No newline at end of file
diff --git a/src/tensor/optim_ops_fusion.nim b/src/tensor/optim_ops_fusion.nim
index c90519e74..04eec4405 100644
--- a/src/tensor/optim_ops_fusion.nim
+++ b/src/tensor/optim_ops_fusion.nim
@@ -1,4 +1,4 @@
-# Copyright 2017 Mamy André-Ratsimbazafy
+# Copyright 2017 the Arraymancer contributors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -112,19 +112,3 @@ template rewriteToTensorReshape*{reshape(toTensor(oa, dummy_bugfix), shape)}(
   ## Operation fusion leverage the Nim compiler and should not be called explicitly.
   toTensorReshape(oa, shape, dummy_bugfix)
 
-proc unsafeToTensorReshape*[T](data: seq[T], shape: varargs[int]): Tensor[T] {.noSideEffect.} =
-  ## Fuse unsafeToTensor and unsafeReshape in one operation
-
-  when compileOption("boundChecks"):
-    check_nested_elements(shape.toMetadataArray, data.len)
-
-  tensorCpu(shape, result)
-  shallowCopy(result.data, data)
-
-template rewriteUnsafeToTensorReshape*{unsafeReshape(unsafeToTensor(s), shape)}(
-  s: seq,
-  shape: varargs[int]): auto =
-  ## Fuse ``sequence.unsafeToTensor().unsafeReshape(new_shape)`` into a single operation.
-  ##
-  ## Operation fusion leverage the Nim compiler and should not be called explicitly.
-  unsafeToTensorReshape(s, shape, dummy_bugfix)
diff --git a/src/tensor/private/p_accessors.nim b/src/tensor/private/p_accessors.nim
index d5b8e69ec..5a4d4343b 100644
--- a/src/tensor/private/p_accessors.nim
+++ b/src/tensor/private/p_accessors.nim
@@ -98,8 +98,9 @@ type
 template initStridedIteration*(coord, backstrides, iter_pos: untyped, t, iter_offset, iter_size: typed): untyped =
   ## Iterator init
   var iter_pos = 0
-  var coord {.noInit.}: array[MAXRANK, int]
-  var backstrides {.noInit.}: array[MAXRANK, int]
+  withMemoryOptimHints() # MAXRANK = 8, 8 ints = 64 Bytes, cache line = 64 Bytes --> profit !
+  var coord {.align64, noInit.}: array[MAXRANK, int]
+  var backstrides {.align64, noInit.}: array[MAXRANK, int]
   for i in 0..<t.rank:
     backstrides[i] = t.strides[i]*(t.shape[i]-1)
     coord[i] = 0
diff --git a/src/tensor/private/p_accessors_macros_read.nim b/src/tensor/private/p_accessors_macros_read.nim
index f02471afe..f128b3205 100644
--- a/src/tensor/private/p_accessors_macros_read.nim
+++ b/src/tensor/private/p_accessors_macros_read.nim
@@ -43,7 +43,7 @@ template slicerT*[T](result: AnyTensor[T]|var AnyTensor[T], slices: ArrayOfSlice
     result.strides[i] *= slice.step
     result.shape[i] = abs((b-a) div slice.step) + 1
 
-proc slicer[T](t: AnyTensor[T], slices: varargs[SteppedSlice]): AnyTensor[T] {.noInit,noSideEffect.}=
+proc slicer*[T](t: AnyTensor[T], slices: varargs[SteppedSlice]): AnyTensor[T] {.noInit,noSideEffect.}=
   ## Take a Tensor and SteppedSlices
   ## Returns:
   ##    A copy of the original Tensor
@@ -52,7 +52,7 @@ proc slicer[T](t: AnyTensor[T], slices: varargs[SteppedSlice]): AnyTensor[T] {.n
   result = t
   slicerT(result, slices.toArrayOfSlices)
 
-proc slicer[T](t: AnyTensor[T],
+proc slicer*[T](t: AnyTensor[T],
                 slices: varargs[SteppedSlice],
                 ellipsis: Ellipsis): AnyTensor[T] {.noInit,noSideEffect.}=
   ## Take a Tensor, SteppedSlices and Ellipsis
@@ -64,7 +64,7 @@ proc slicer[T](t: AnyTensor[T],
   let full_slices = slices.toArrayOfSlices & initSpanSlices(t.rank - slices.len)
   slicerT(result, full_slices)
 
-proc slicer[T](t: AnyTensor[T],
+proc slicer*[T](t: AnyTensor[T],
                 ellipsis: Ellipsis,
                 slices: varargs[SteppedSlice]
                 ): AnyTensor[T] {.noInit,noSideEffect.}=
@@ -77,7 +77,7 @@ proc slicer[T](t: AnyTensor[T],
   let full_slices = initSpanSlices(t.rank - slices.len) & slices.toArrayOfSlices
   slicerT(result, full_slices)
 
-proc slicer[T](t: AnyTensor[T],
+proc slicer*[T](t: AnyTensor[T],
                 slices1: varargs[SteppedSlice],
                 ellipsis: Ellipsis,
                 slices2: varargs[SteppedSlice]
@@ -93,136 +93,21 @@ proc slicer[T](t: AnyTensor[T],
                             slices2.toArrayOfSlices)
   slicerT(result, full_slices)
 
-proc unsafeSlicer*[T](t: Tensor[T], slices: ArrayOfSlices): Tensor[T] {.noInit,noSideEffect.}=
+proc slicer*[T](t: Tensor[T], slices: ArrayOfSlices): Tensor[T] {.noInit,noSideEffect.}=
   ## Take a Tensor and SteppedSlices
   ## Returns:
   ##    A view of the original Tensor
   ##    Offset and strides are changed to achieve the desired effect.
   ##    Warning: mutating the result will mutate the original
   ##    As such a `var Tensor` is required
-  ## WARNING: passing a non-var Tensor is unsafe
 
-  result = unsafeView(t)
+  result = t
   slicerT(result, slices)
 
-
-proc unsafeSlicer*[T](t: AnyTensor[T],
-                      slices: ArrayOfSlices,
-                      ellipsis: Ellipsis): AnyTensor[T] {.noInit,noSideEffect.}=
-  ## Take a Tensor, SteppedSlices and Ellipsis
-  ## Returns:
-  ##    A view of the original Tensor
-  ##    Offset and strides are changed to achieve the desired effect.
-  ##    Warning: mutating the result will mutate the original
-  ##    As such a `var Tensor` is required
-  ## WARNING: passing a non-var Tensor is unsafe
-
-  result = unsafeView(t)
-  let full_slices = slices & initSpanSlices(t.rank - slices.len)
-  slicerT(result, full_slices)
-
-proc unsafeSlicer*[T](t: AnyTensor[T],
-                      ellipsis: Ellipsis,
-                      slices: ArrayOfSlices
-                      ): AnyTensor[T] {.noInit,noSideEffect.}=
-  ## Take a Tensor, Ellipsis and SteppedSlices
-  ## Returns:
-  ##    A view of the original Tensor
-  ##    Offset and strides are changed to achieve the desired effect.
-  ##    Warning: mutating the result will mutate the original
-  ##    As such a `var Tensor` is required
-  ## WARNING: passing a non-var Tensor is unsafe
-
-  result = unsafeView(t)
-  let full_slices = initSpanSlices(t.rank - slices.len) & slices
-  slicerT(result, full_slices)
-
-proc unsafeSlicer*[T](t: AnyTensor[T],
-                      slices1: ArrayOfSlices,
-                      ellipsis: Ellipsis,
-                      slices2: ArrayOfSlices
-                      ): AnyTensor[T] {.noInit,noSideEffect.}=
-  ## Take a Tensor, SteppedSlices, Ellipsis and SteppedSlices
-  ## Returns:
-  ##    A view of the original Tensor
-  ##    Offset and strides are changed to achieve the desired effect.
-  ##    Warning: mutating the result will mutate the original
-  ##    As such a `var Tensor` is required
-  ## WARNING: passing a non-var Tensor is unsafe
-
-  result = unsafeView(t)
-  let full_slices = concat(slices1,
-                            initSpanSlices(t.rank - slices1.len - slices2.len) ,
-                            slices2)
-  slicerT(result, full_slices)
-
-proc unsafeSlicer*[T](t: Tensor[T], slices: varargs[SteppedSlice]): Tensor[T] {.noInit,noSideEffect.}=
-  ## Take a Tensor and SteppedSlices
-  ## Returns:
-  ##    A view of the original Tensor
-  ##    Offset and strides are changed to achieve the desired effect.
-  ##    Warning: mutating the result will mutate the original
-  ##    As such a `var Tensor` is required
-  ## WARNING: passing a non-var Tensor is unsafe
-
-  result = unsafeView(t)
-  slicerT(result, slices.toArrayOfSlices)
-
-
-proc unsafeSlicer*[T](t: AnyTensor[T],
-                      slices: varargs[SteppedSlice],
-                      ellipsis: Ellipsis): AnyTensor[T] {.noInit,noSideEffect.}=
-  ## Take a Tensor, SteppedSlices and Ellipsis
-  ## Returns:
-  ##    A view of the original Tensor
-  ##    Offset and strides are changed to achieve the desired effect.
-  ##    Warning: mutating the result will mutate the original
-  ##    As such a `var Tensor` is required
-  ## WARNING: passing a non-var Tensor is unsafe
-
-  result = unsafeView(t)
-  let full_slices = slices.toArrayOfSlices & initSpanSlices(t.rank - slices.len)
-  slicerT(result, full_slices)
-
-proc unsafeSlicer*[T](t: AnyTensor[T],
-                      ellipsis: Ellipsis,
-                      slices: varargs[SteppedSlice]
-                      ): AnyTensor[T] {.noInit,noSideEffect.}=
-  ## Take a Tensor, Ellipsis and SteppedSlices
-  ## Returns:
-  ##    A view of the original Tensor
-  ##    Offset and strides are changed to achieve the desired effect.
-  ##    Warning: mutating the result will mutate the original
-  ##    As such a `var Tensor` is required
-  ## WARNING: passing a non-var Tensor is unsafe
-
-  result = unsafeView(t)
-  let full_slices = initSpanSlices(t.rank - slices.len) & slices.toArrayOfSlices
-  slicerT(result, full_slices)
-
-proc unsafeSlicer*[T](t: AnyTensor[T],
-                      slices1: varargs[SteppedSlice],
-                      ellipsis: Ellipsis,
-                      slices2: varargs[SteppedSlice]
-                      ): AnyTensor[T] {.noInit,noSideEffect.}=
-  ## Take a Tensor, SteppedSlices, Ellipsis and SteppedSlices
-  ## Returns:
-  ##    A view of the original Tensor
-  ##    Offset and strides are changed to achieve the desired effect.
-  ##    Warning: mutating the result will mutate the original
-  ##    As such a `var Tensor` is required
-  ## WARNING: passing a non-var Tensor is unsafe
-
-  result = unsafeView(t)
-  let full_slices = concat(slices1.toArrayOfSlices,
-                            initSpanSlices(t.rank - slices1.len - slices2.len) ,
-                            slices2.toArrayOfSlices)
-  slicerT(result, full_slices)
-
 # #########################################################################
 # Dispatching logic
 
-macro inner_typed_dispatch*(t: typed, args: varargs[typed]): untyped =
+macro slice_typed_dispatch*(t: typed, args: varargs[typed]): untyped =
   ## Typed macro so that isAllInt has typed context and we can dispatch.
   ## If args are all int, we dispatch to atIndex and return T
   ## Else, all ints are converted to SteppedSlices and we return a Tensor.
@@ -234,25 +119,6 @@ macro inner_typed_dispatch*(t: typed, args: varargs[typed]): untyped =
       result.add(slice)
   else:
     result = newCall(bindSym("slicer"), t)
-    for slice in args:
-      if isInt(slice):
-        ## Convert [10, 1..10|1] to [10..10|1, 1..10|1]
-        result.add(infix(slice, "..", infix(slice, "|", newIntLitNode(1))))
-      else:
-        result.add(slice)
-
-macro unsafe_inner_typed_dispatch*(t: typed, args: varargs[typed]): untyped =
-  ## Typed macro so that isAllInt has typed context and we can dispatch.
-  ## If args are all int, we dispatch to atIndex and return T
-  ## Else, all ints are converted to SteppedSlices and we return a Tensor.
-  ## Note, normal slices and `_` were already converted in the `[]` macro
-  ## TODO in total we do 3 passes over the list of arguments :/. It is done only at compile time though
-  if isAllInt(args):
-    result = newCall(bindSym("atIndex"), t)
-    for slice in args:
-      result.add(slice)
-  else:
-    result = newCall(bindSym("unsafeSlicer"), t)
     for slice in args:
       if isInt(slice):
         ## Convert [10, 1..10|1] to [10..10|1, 1..10|1]
diff --git a/src/tensor/private/p_accessors_macros_write.nim b/src/tensor/private/p_accessors_macros_write.nim
index e71e1b7ed..59a256dbb 100644
--- a/src/tensor/private/p_accessors_macros_write.nim
+++ b/src/tensor/private/p_accessors_macros_write.nim
@@ -28,7 +28,7 @@ import  ../../private/[nested_containers, ast_utils],
 # Setting a single value
 
 template slicerMutT_val[T](t: var Tensor[T], slices: ArrayOfSlices, val: T): untyped =
-  var sliced = t.unsafeSlicer(slices)
+  var sliced = t.slicer(slices)
   for old_val in sliced.mitems:
     old_val = val
 
@@ -77,7 +77,7 @@ proc slicerMut*[T](t: var Tensor[T],
 
 template slicerMutT_oa[T](t: var Tensor[T], slices: varargs[SteppedSlice], oa: openarray) =
   ## Assign value from openarrays
-  var sliced = t.unsafeSlicer(slices)
+  var sliced = t.slicer(slices)
   when compileOption("boundChecks"):
     check_shape(sliced, oa)
 
@@ -135,7 +135,7 @@ proc slicerMut*[T](t: var Tensor[T],
 
 template slicerMutT_T[T](t: var Tensor[T], slices: varargs[SteppedSlice], t2: Tensor[T]) =
   ## Assign the value to the whole slice
-  var sliced = t.unsafeSlicer(slices)
+  var sliced = t.slicer(slices)
 
   when compileOption("boundChecks"):
     check_shape(sliced, t2)
@@ -180,7 +180,7 @@ proc slicerMut*[T](t: var Tensor[T],
 # #########################################################################
 # Dispatching logic
 
-macro inner_typed_dispatch_mut*(t: typed, args: varargs[typed], val: typed): untyped =
+macro slice_typed_dispatch_mut*(t: typed, args: varargs[typed], val: typed): untyped =
   ## Assign `val` to Tensor T at slice/position `args`
   if isAllInt(args):
     result = newCall(bindSym("atIndexMut"), t)
@@ -251,7 +251,7 @@ proc slicer_var[T](t: var AnyTensor[T],
                             slices1.toArrayOfSlices)
   slicerT(result, full_slices)
 
-macro inner_typed_dispatch_var*(t: typed, args: varargs[typed]): untyped =
+macro slice_typed_dispatch_var*(t: typed, args: varargs[typed]): untyped =
   ## Typed macro so that isAllInt has typed context and we can dispatch.
   ## If args are all int, we dispatch to atIndex and return T
   ## Else, all ints are converted to SteppedSlices and we return a Tensor.
diff --git a/src/tensor/private/p_checks.nim b/src/tensor/private/p_checks.nim
index 7ba473e9f..74a9a71aa 100644
--- a/src/tensor/private/p_checks.nim
+++ b/src/tensor/private/p_checks.nim
@@ -72,7 +72,7 @@ proc check_steps*(a,b, step:int) {.noSideEffect, inline.}=
                 start must be inferior to stop and inversely if your step is negative
                 start must be superior to stop.""")
 
-proc check_shape*(a, b: Tensor|openarray) {.noSideEffect, inline.}=
+proc check_shape*(a: Tensor; b: Tensor|openarray) {.noSideEffect, inline.}=
   ## Compare shape
 
   let b_shape = b.shape # There is a shape proc that converts openarray to MetadataArray
@@ -90,10 +90,6 @@ proc check_reshape*(t: AnyTensor, new_shape:MetadataArray) {.noSideEffect, inlin
                                     $new_shape.product &
                                     ") reshaped tensor must be the same")
 
-proc check_nocopyReshape*(t: AnyTensor) {.noSideEffect, inline.}=
-  if unlikely(not t.isContiguous):
-    raise newException(ValueError, "The tensor must be contiguous for reshape without copy")
-
 proc check_concat*(t1, t2: Tensor, axis: int) {.noSideEffect,inline.}=
   let check1 = t1.shape[0..<axis] == t2.shape[0..<axis]
   let check2 = t2.shape[axis+1..t1.shape.high] == t2.shape[axis+1..t2.shape.high]
diff --git a/src/tensor/private/p_init_cuda.nim b/src/tensor/private/p_init_cuda.nim
index d57e125c4..83097d534 100644
--- a/src/tensor/private/p_init_cuda.nim
+++ b/src/tensor/private/p_init_cuda.nim
@@ -25,7 +25,7 @@ template tensorCuda[T: SomeReal](
   result.shape.copyFrom(shape)
   shape_to_strides(result.shape, layout, result.strides)
   result.offset = 0
-  result.data = newCudaSeq[T](result.size)
+  result.storage = newCudaStorage[T](result.size)
 
 proc newCudaTensor*[T: SomeReal](
   shape: varargs[int],
diff --git a/src/tensor/private/p_operator_blas_l2l3.nim b/src/tensor/private/p_operator_blas_l2l3.nim
index 3fe46af20..010674f88 100644
--- a/src/tensor/private/p_operator_blas_l2l3.nim
+++ b/src/tensor/private/p_operator_blas_l2l3.nim
@@ -61,7 +61,7 @@ proc blasMV_y_eq_aAx_p_by*[T: SomeReal](
     M = a.shape[0]
     N = a.shape[1] # = x.shape[0], x is considered as a column vector
 
-    cont_A = a.unsafeContiguous # if not contiguous, change to row Major
+    cont_A = a.asContiguous # if not contiguous, change to row Major
     cont_A_is_rowMajor = cont_A.is_C_contiguous
 
     cont_A_order =  if cont_A_is_rowMajor: rowMajor
@@ -129,9 +129,9 @@ proc blasMM_C_eq_aAB_p_bC*[T: SomeReal](
     K = a.shape[1] # b.shape[0]
     N = b.shape[1]
 
-    cont_a = a.unsafeContiguous
-    cont_b = b.unsafeContiguous
-    c = c.unsafeContiguous
+    cont_a = a.asContiguous
+    cont_b = b.asContiguous
+    c = c.asContiguous
 
     cont_A_is_rowMajor = cont_a.is_C_contiguous
     cont_B_is_rowMajor = cont_b.is_C_contiguous
diff --git a/src/tensor/private/p_shapeshifting.nim b/src/tensor/private/p_shapeshifting.nim
index c9e7d04de..ecaa91a65 100644
--- a/src/tensor/private/p_shapeshifting.nim
+++ b/src/tensor/private/p_shapeshifting.nim
@@ -21,25 +21,32 @@ template contiguousT*[T](result, t: Tensor[T], layout: OrderType): untyped=
   if layout == rowMajor:
     result = t.map_inline(x)
   else:
-    let t_transposed = t.unsafeTranspose()
+    let t_transposed = t.transpose()
     result = t_transposed.map_inline(x)
 
-proc reshape_with_copy*[T](t: Tensor[T], new_shape: varargs[int]|MetadataArray): Tensor[T] {.noInit,inline.}=
-  # Can't call "tensorCpu" template here for some reason
+template reshape_with_copy*[T](t: Tensor[T], new_shape: varargs[int]|MetadataArray, result: var Tensor[T]) =
   result = newTensorUninit[T](new_shape)
   result.apply2_inline(t,y)
 
-template reshape_no_copy*(t: AnyTensor, new_shape: varargs[int]|MetadataArray): untyped =
-  when compileOption("boundChecks"):
-    check_nocopyReshape t
-    when not (new_shape is MetadataArray):
-      check_reshape(t, new_shape.toMetadataArray)
-    else:
-      check_reshape(t, new_shape)
+template reshape_no_copy*(t: AnyTensor, new_shape: varargs[int]|MetadataArray, result: var AnyTensor) =
   result.shape.copyFrom(new_shape)
   shape_to_strides(result.shape, rowMajor, result.strides)
   result.offset = t.offset
 
+template reshapeT*(t: AnyTensor, new_shape: varargs[int]|MetadataArray, result: var AnyTensor) =
+  when compileOption("boundChecks"):
+    when new_shape is MetadataArray:
+      check_reshape(t, new_shape)
+    else:
+      check_reshape(t, new_shape.toMetadataArray)
+
+  if t.isContiguous:
+    reshape_no_copy(t, new_shape, result)
+    result.storage = t.storage
+    return
+
+  reshape_with_copy(t, new_shape, result)
+
 template broadcastT*(t: var AnyTensor, shape: varargs[int]|MetadataArray) =
   when compileOption("boundChecks"):
     assert t.rank == shape.len
@@ -96,7 +103,7 @@ proc exch_dim*[T](t: Tensor[T], dim1, dim2: int): Tensor[T] {.noInit,noSideEffec
   if dim1 == dim2:
     return
 
-  result = t.unsafeView # copy or no-copy is managed in the caller of exch_dim or permuteT
+  result = t # copy or no-copy is managed in the caller of exch_dim or permuteT
   swap(result.strides[dim1], result.strides[dim2])
   swap(result.shape[dim1], result.shape[dim2])
 
diff --git a/src/tensor/shapeshifting.nim b/src/tensor/shapeshifting.nim
index 176063554..1afe3423c 100644
--- a/src/tensor/shapeshifting.nim
+++ b/src/tensor/shapeshifting.nim
@@ -17,7 +17,7 @@ import  ./backend/metadataArray,
         ./private/p_checks,
         ./private/p_accessors_macros_write,
         ./data_structure, ./init_cpu, ./higher_order,
-        nimblas, sequtils
+        sequtils
 
 # NOTE: Procs that accepts shape are duplicated to accept both varargs and MetadataArray
 # until either https://github.com/nim-lang/Nim/issues/6528 or https://github.com/nim-lang/Nim/issues/6529
@@ -28,25 +28,11 @@ proc transpose*(t: Tensor): Tensor {.noInit,noSideEffect,inline.} =
   ##
   ## For N-d Tensor with shape (0, 1, 2 ... n-1) the resulting tensor will have shape (n-1, ... 2, 1, 0)
   ##
-  ## Data is copied as-is and not modified.
+  ## Data is not copied or modified, only metadata is modified.
   t.shape.reversed(result.shape)
   t.strides.reversed(result.strides)
   result.offset = t.offset
-  result.data = t.data
-
-
-proc unsafeTranspose*(t: Tensor): Tensor {.noInit,noSideEffect,inline.} =
-  ## Transpose a Tensor without copy.
-  ##
-  ## Warning ⚠:
-  ##   This is a no-copy operation, data is shared with the input.
-  ##   This proc does not guarantee that a ``let`` value is immutable.
-  ##
-  ## For N-d Tensor with shape (0, 1, 2 ... n-1) the resulting tensor will have shape (n-1, ... 2, 1, 0)
-  t.shape.reversed(result.shape)
-  t.strides.reversed(result.strides)
-  result.offset = t.offset
-  shallowCopy(result.data, t.data)
+  result.storage = t.storage
 
 proc asContiguous*[T](t: Tensor[T], layout: OrderType = rowMajor, force: bool = false): Tensor[T] {.noInit.} =
   ## Transform a tensor with general striding to a Tensor with contiguous layout.
@@ -61,37 +47,12 @@ proc asContiguous*[T](t: Tensor[T], layout: OrderType = rowMajor, force: bool =
   let cCont = t.is_C_contiguous
   let fCont = t.is_F_contiguous
 
-  if cCont and not force:
-    contiguousT(result, t, rowMajor)
-    return
-  elif fCont and not force:
-    contiguousT(result, t, colMajor)
-    return
-  contiguousT(result, t, layout)
-
-proc unsafeContiguous*[T](t: Tensor[T], layout: OrderType = rowMajor, force: bool = false): Tensor[T] {.noInit.} =
-  ## Transform a tensor with general striding to a Tensor with contiguous layout.
-  ##
-  ## If the tensor is already contiguous it is returned without copy, underlying data is shared between the input and the output.
-  ##
-  ## Warning ⚠:
-  ##   This may be a no-copy operation with result data shared with the input.
-  ##   This proc does not guarantee that a ``let`` value is immutable.
-  ##
-  ## By default tensor will be rowMajor.
-  ##
-  ## By default nothing is done if the tensor is already contiguous (C Major or F major)
-  ## The "force" parameter can force re-ordering to a specific layout
-
-  let cCont = t.is_C_contiguous
-  let fCont = t.is_F_contiguous
-
   if (cCont or fCont) and not force:
-    return t.unsafeView
+    return t
   elif cCont and layout == rowMajor:
-    return t.unsafeView
+    return t
   elif fCont and layout == colMajor:
-    return t.unsafeView
+    return t
   contiguousT(result, t, layout)
 
 proc reshape*(t: Tensor, new_shape: varargs[int]): Tensor {.noInit.} =
@@ -102,11 +63,7 @@ proc reshape*(t: Tensor, new_shape: varargs[int]): Tensor {.noInit.} =
   ##   - a new shape. Number of elements must be the same
   ## Returns:
   ##   - a tensor with the same data but reshaped.
-
-  when compileOption("boundChecks"):
-    check_reshape(t, new_shape.toMetadataArray)
-
-  return t.reshape_with_copy(new_shape)
+  reshapeT(t, new_shape, result)
 
 proc reshape*(t: Tensor, new_shape: MetadataArray): Tensor {.noInit.} =
   ## Reshape a tensor
@@ -116,36 +73,7 @@ proc reshape*(t: Tensor, new_shape: MetadataArray): Tensor {.noInit.} =
   ##   - a new shape. Number of elements must be the same
   ## Returns:
   ##   - a tensor with the same data but reshaped.
-
-  when compileOption("boundChecks"):
-    check_reshape(t, new_shape.toMetadataArray)
-
-  return t.reshape_with_copy(new_shape)
-
-proc unsafeReshape*(t: Tensor, new_shape: varargs[int]): Tensor {.noInit.} =
-  ## Reshape a tensor without copy.
-  ##
-  ## ⚠ Reshaping without copy is only possible on contiguous Tensors
-  ##
-  ## Warning ⚠:
-  ##   This is a no-copy operation, data is shared with the input.
-  ##   This proc does not guarantee that a ``let`` value is immutable.
-
-  t.reshape_no_copy(new_shape)
-  shallowCopy(result.data, t.data)
-
-proc unsafeReshape*(t: Tensor, new_shape: MetadataArray): Tensor {.noInit.} =
-  ## Reshape a tensor without copy.
-  ##
-  ## ⚠ Reshaping without copy is only possible on contiguous Tensors
-  ##
-  ## Warning ⚠:
-  ##   This is a no-copy operation, data is shared with the input.
-  ##   This proc does not guarantee that a ``let`` value is immutable.
-
-  t.reshape_no_copy(new_shape)
-  shallowCopy(result.data, t.data)
-
+  reshapeT(t, new_shape, result)
 
 proc broadcast*[T](t: Tensor[T], shape: varargs[int]): Tensor[T] {.noInit,noSideEffect.}=
   ## Explicitly broadcast a tensor to the specified shape.
@@ -171,34 +99,6 @@ proc broadcast*[T](t: Tensor[T], shape: MetadataArray): Tensor[T] {.noInit,noSid
   result = t
   result.broadcastT(shape)
 
-proc unsafeBroadcast*[T](t: Tensor[T], shape: varargs[int]): Tensor[T] {.noInit,noSideEffect.}=
-  ## Explicitly broadcast a Tensor to the specified shape.
-  ## The returned broadcasted Tensor share the underlying data with the input.
-  ##
-  ## Dimension(s) of size 1 can be expanded to arbitrary size by replicating
-  ## values along that dimension.
-  ##
-  ## Warning ⚠:
-  ##   This is a no-copy operation, data is shared with the input.
-  ##   This proc does not guarantee that a ``let`` value is immutable.
-  ##   A broadcasted tensor should not be modified and only used for computation.
-  result = t.unsafeView
-  result.broadcastT(shape)
-
-proc unsafeBroadcast*[T](t: Tensor[T], shape: MetadataArray): Tensor[T] {.noInit,noSideEffect.}=
-  ## Explicitly broadcast a Tensor to the specified shape.
-  ## The returned broadcasted Tensor share the underlying data with the input.
-  ##
-  ## Dimension(s) of size 1 can be expanded to arbitrary size by replicating
-  ## values along that dimension.
-  ##
-  ## Warning ⚠:
-  ##   This is a no-copy operation, data is shared with the input.
-  ##   This proc does not guarantee that a ``let`` value is immutable.
-  ##   A broadcasted tensor should not be modified and only used for computation.
-  result = t.unsafeView
-  result.broadcastT(shape)
-
 proc broadcast*[T: SomeNumber](val: T, shape: varargs[int]): Tensor[T] {.noInit,noSideEffect.} =
   ## Broadcast a number
   ##
@@ -247,7 +147,7 @@ template bc*(t: (Tensor|SomeNumber), shape: MetadataArray): untyped =
   ## Alias for ``broadcast``
   t.broadcast(shape)
 
-proc unsafeBroadcast2*[T](a, b: Tensor[T]): tuple[a, b: Tensor[T]] {.noSideEffect.}=
+proc broadcast2*[T](a, b: Tensor[T]): tuple[a, b: Tensor[T]] {.noSideEffect, noInit.}=
   ## Broadcast 2 tensors so they have compatible shapes for element-wise computations.
   ##
   ## Tensors in the tuple can be accessed with output.a and output.b
@@ -264,8 +164,9 @@ proc unsafeBroadcast2*[T](a, b: Tensor[T]): tuple[a, b: Tensor[T]] {.noSideEffec
 
   broadcast2T(a,b, result)
 
-  shallowCopy(result.a.data, a.data)
-  shallowCopy(result.b.data, b.data)
+  result.a.storage = a.storage
+  result.b.storage = b.storage
+
 
 proc permute*(t: Tensor, dims: varargs[int]): Tensor {.noInit,noSideEffect.}=
   ## Permute dimensions of a tensors
@@ -282,26 +183,6 @@ proc permute*(t: Tensor, dims: varargs[int]): Tensor {.noInit,noSideEffect.}=
   result = t
   permuteT(result, dims)
 
-proc unsafePermute*(t: Tensor, dims: varargs[int]): Tensor {.noInit,noSideEffect.}=
-  ## Permute dimensions of a tensors
-  ## Input:
-  ##   - a tensor
-  ##   - the new dimension order
-  ## Returns:
-  ##   - a tensor with re-order dimension
-  ## Usage:
-  ##  .. code:: nim
-  ##     a.permute(0,2,1) # dim 0 stays at 0, dim 1 becomes dim 2 and dim 2 becomes dim 1
-  ##
-  ## Warning ⚠:
-  ##   This is a no-copy operation, data is shared with the input.
-  ##   This proc does not guarantee that a ``let`` value is immutable.
-  ##   A broadcasted tensor should not be modified and only used for computation.
-
-  # TODO: bounds check
-  result = t.unsafeView
-  permuteT(result, dims)
-
 proc concat*[T](t_list: varargs[Tensor[T]], axis: int): Tensor[T]  {.noInit,noSideEffect.}=
   ## Concatenate tensors
   ## Input:
@@ -343,18 +224,6 @@ proc squeeze*(t: AnyTensor): AnyTensor {.noInit,noSideEffect.}=
   result = t
   result.squeezeT
 
-proc unsafeSqueeze*(t: Tensor): Tensor {.noInit,noSideEffect.}=
-  ## Squeeze tensors. For example a Tensor of shape [4,1,3] will become [4,3]
-  ## Input:
-  ##   - a tensor
-  ## Returns:
-  ##   - a tensor with singleton dimensions collapsed that share the same underlying storage
-  ## Warning ⚠:
-  ##   This is a no-copy operation, data is shared with the input.
-  ##   This proc does not guarantee that a ``let`` value is immutable.
-  result = t.unsafeView
-  result.squeezeT
-
 proc squeeze*(t: Tensor, axis: int): Tensor {.noInit,noSideEffect.}=
   ## Collapse the given axis, if the dimension is not 1, it does nothing.
   ## Input:
@@ -365,19 +234,6 @@ proc squeeze*(t: Tensor, axis: int): Tensor {.noInit,noSideEffect.}=
   result = t
   result.squeezeT(axis)
 
-proc unsafeSqueeze*(t: Tensor, axis: int): Tensor {.noInit,noSideEffect.}=
-  ## Collapse the given axis, if the dimension is not 1; it does nothing
-  ## Input:
-  ##   - a tensor
-  ##   - an axis (dimension)
-  ## Returns:
-  ##   - a tensor with singleton dimensions collapsed
-  ## Warning ⚠:
-  ##   This is a no-copy operation, data is shared with the input.
-  ##   This proc does not guarantee that a ``let`` value is immutable.
-  result = t.unsafeView
-  result.squeezeT(axis)
-
 proc unsqueeze*(t: Tensor, axis: int): Tensor {.noInit,noSideEffect.}=
   ## Insert a new axis just before the given axis, increasing the tensor
   ## dimension (rank) by 1
@@ -389,15 +245,6 @@ proc unsqueeze*(t: Tensor, axis: int): Tensor {.noInit,noSideEffect.}=
   result = t
   result.unsqueezeT(axis)
 
-proc unsafeUnsqueeze*(t: Tensor, axis: int): Tensor {.noInit,noSideEffect.}=
-  ## Insert a new axis just before the given axis, increasing the tensor
-  ## dimension (rank) by 1
-  ##   - a tensor with that new axis
-  ## WARNING: result share storage with input
-  ## This does not guarantee `let` variable immutability
-  result = t.unsafeView
-  result.unsqueezeT(axis)
-
 proc stack*[T](tensors: varargs[Tensor[T]], axis: int = 0): Tensor[T] {.noInit.} =
   ## Join a sequence of tensors along a new axis into a new tensor.
   ## Input:
@@ -405,5 +252,5 @@ proc stack*[T](tensors: varargs[Tensor[T]], axis: int = 0): Tensor[T] {.noInit.}
   ##   - an axis (dimension)
   ## Returns:
   ##   - a new stacked tensor along the new axis
-  proc stack_unsqueeze(t: Tensor[T]): Tensor[T] = t.unsafeUnsqueeze(axis)
+  proc stack_unsqueeze(t: Tensor[T]): Tensor[T] = t.unsqueeze(axis)
   tensors.map(stack_unsqueeze).concat(axis)
diff --git a/src/tensor/shapeshifting_cuda.nim b/src/tensor/shapeshifting_cuda.nim
index e94ae36a0..ae9811c2b 100644
--- a/src/tensor/shapeshifting_cuda.nim
+++ b/src/tensor/shapeshifting_cuda.nim
@@ -22,23 +22,19 @@ include ./private/incl_accessors_cuda,
         ./private/incl_higher_order_cuda,
         ./private/incl_kernels_cuda
 
-proc unsafeTranspose*(t: CudaTensor): CudaTensor {.noSideEffect.}=
+proc transpose*(t: CudaTensor): CudaTensor {.noSideEffect.}=
   ## Transpose a Tensor.
   ##
   ## For N-d Tensor with shape (0, 1, 2 ... n-1) the resulting tensor will have shape (n-1, ... 2, 1, 0)
-  ##
-  ## Warning ⚠ CudaTensor temporary default:
-  ##   This is a no-copy operation, data is shared with the input.
-  ##   This proc does not guarantee that a ``let`` value is immutable.
 
   t.shape.reversed(result.shape)
   t.strides.reversed(result.strides)
   result.offset = t.offset
-  result.data = t.data
+  result.storage = t.storage
 
-cuda_assign_glue("cuda_unsafeContiguous", "CopyOp", cuda_unsafeContiguous)
+cuda_assign_glue("cuda_asContiguous", "CopyOp", cuda_asContiguous)
 
-proc unsafeContiguous*[T: SomeReal](t: CudaTensor[T], layout: OrderType = colMajor, force: bool = false):
+proc asContiguous*[T: SomeReal](t: CudaTensor[T], layout: OrderType = colMajor, force: bool = false):
   CudaTensor[T] {.noSideEffect.}=
   ## Transform a tensor with general striding to a Tensor with contiguous layout.
   ##
@@ -46,10 +42,6 @@ proc unsafeContiguous*[T: SomeReal](t: CudaTensor[T], layout: OrderType = colMaj
   ##
   ## By default nothing is done if the tensor is already contiguous (C Major or F major)
   ## The "force" parameter can force re-ordering to a specific layout
-  ##
-  ## Warning ⚠ CudaTensor temporary default:
-  ##   If the CudaTensor is contiguous, this is a no-copy operation, data is shared with the input.
-  ##   This proc does not guarantee that a ``let`` value is immutable.
 
   if t.isContiguous and not force:
     return t
@@ -60,22 +52,18 @@ proc unsafeContiguous*[T: SomeReal](t: CudaTensor[T], layout: OrderType = colMaj
 
   result = newCudaTensor[T](t.shape, layout)
 
-  cuda_assign_call(cuda_unsafeContiguous, result, t)
+  cuda_assign_call(cuda_asContiguous, result, t)
 
 
-proc unsafeReshape*(t: CudaTensor, new_shape: varargs[int]): CudaTensor =
+proc reshape*(t: CudaTensor, new_shape: varargs[int]): CudaTensor =
   ## Reshape a CudaTensor without copy.
   ##
   ## ⚠ Reshaping without copy is only possible on contiguous Tensors
-  ##
-  ## Warning ⚠:
-  ##   This is a no-copy operation, data is shared with the input.
-  ##   This proc does not guarantee that a ``let`` value is immutable.
 
-  t.reshape_no_copy(new_shape)
-  result.data = t.data
+  t.reshape_no_copy(new_shape, result)
+  result.storage = t.storage
 
-proc unsafeBroadcast*(t: CudaTensor, shape: varargs[int]): CudaTensor {.noSideEffect.}=
+proc broadcast*(t: CudaTensor, shape: varargs[int]): CudaTensor {.noSideEffect.}=
   ## Explicitly broadcast a CudaTensor to the specified shape.
   ## The returned broadcasted CudaTensor share the underlying data with the input.
   ##
@@ -89,7 +77,7 @@ proc unsafeBroadcast*(t: CudaTensor, shape: varargs[int]): CudaTensor {.noSideEf
   result = t
   result.broadcastT(shape)
 
-proc unsafeBroadcast*(t: CudaTensor, shape: MetadataArray): CudaTensor {.noSideEffect.}=
+proc broadcast*(t: CudaTensor, shape: MetadataArray): CudaTensor {.noSideEffect.}=
   ## Explicitly broadcast a CudaTensor to the specified shape.
   ## The returned broadcasted CudaTensor share the underlying data with the input.
   ##
@@ -103,7 +91,7 @@ proc unsafeBroadcast*(t: CudaTensor, shape: MetadataArray): CudaTensor {.noSideE
   result = t
   result.broadcastT(shape)
 
-proc unsafeBroadcast2*[T](a, b: CudaTensor[T]): tuple[a, b: CudaTensor[T]] {.noSideEffect.}=
+proc broadcast2*[T](a, b: CudaTensor[T]): tuple[a, b: CudaTensor[T]] {.noSideEffect.}=
   ## Broadcast 2 tensors so they have compatible shapes for element-wise computations.
   ##
   ## Tensors in the tuple can be accessed with output.a and output.b
@@ -120,10 +108,10 @@ proc unsafeBroadcast2*[T](a, b: CudaTensor[T]): tuple[a, b: CudaTensor[T]] {.noS
 
   broadcast2T(a,b, result)
 
-  result.a.data = a.data
-  result.b.data = b.data
+  result.a.storage = a.storage
+  result.b.storage = b.storage
 
-proc unsafeSqueeze*(t: CudaTensor, axis: int): CudaTensor {.noSideEffect.}=
+proc squeeze*(t: CudaTensor, axis: int): CudaTensor {.noSideEffect.}=
   ## Collapse the given axis, if the dimension is not 1; it does nothing
   ## Input:
   ##   - a CudaTensor
@@ -136,7 +124,7 @@ proc unsafeSqueeze*(t: CudaTensor, axis: int): CudaTensor {.noSideEffect.}=
   result = t
   result.squeezeT(axis)
 
-proc unsafeUnsqueeze*(t: CudaTensor, axis: int): CudaTensor {.noSideEffect.}=
+proc unsqueeze*(t: CudaTensor, axis: int): CudaTensor {.noSideEffect.}=
   ## Insert a new axis just before the given axis, increasing the CudaTensor
   ## dimension (rank) by 1
   ##   - a tensor with that new axis
diff --git a/src/tensor/syntactic_sugar.nim b/src/tensor/syntactic_sugar.nim
index 3e67922e0..95beecdf4 100644
--- a/src/tensor/syntactic_sugar.nim
+++ b/src/tensor/syntactic_sugar.nim
@@ -28,23 +28,4 @@ template at*[T](t: Tensor[T], args: varargs[untyped]): untyped =
   ##     Singleton dimension are collapsed
   ## Usage:
   ##   See the ``[]`` macro
-  t[args].unsafeSqueeze
-
-template unsafeAt*[T](t: Tensor[T], args: varargs[untyped]): untyped =
-  ## Slice a Tensor and collapse singleton dimension.
-  ##
-  ## Data is shared between input and output.
-  ## Input:
-  ##   - a Tensor
-  ##   - and:
-  ##     - specific coordinates (``varargs[int]``)
-  ##     - or a slice (cf. tutorial)
-  ## Returns:
-  ##   - a value or a view of the Tensor corresponding to the slice
-  ##     Singleton dimension are collapsed
-  ## Warning ⚠:
-  ##   This is a no-copy operation, data is shared with the input.
-  ##   This proc does not guarantee that a ``let`` value is immutable.
-  ## Usage:
-  ##   See the ``[]`` macro
-  t.unsafeSlice(args).unsafeSqueeze
+  t[args].squeeze
diff --git a/src/tensor/tensor.nim b/src/tensor/tensor.nim
index df94e537e..e7af15f94 100644
--- a/src/tensor/tensor.nim
+++ b/src/tensor/tensor.nim
@@ -20,17 +20,18 @@ import  ./backend/metadataArray,
         ./data_structure,
         ./init_cpu,
         ./init_cpu_copy,
-        # ./init_deprecated_0_1_0,
-        # ./init_cpu_deprecated_0_2_0, # source of deprecation spam https://github.com/nim-lang/Nim/issues/6436
+        ./deprecated/init_deprecated_0_2_0,
+        ./deprecated/init_cpu_deprecated_0_3_0, # source of deprecation spam https://github.com/nim-lang/Nim/issues/6436
         ./accessors,
         ./accessors_macros_syntax,
         ./accessors_macros_read,
+        ./deprecated/accessors_macros_read_deprecated_0_3_0,
         ./accessors_macros_write,
         ./comparison,
         ./higher_order,
-        # ./higher_order_deprecated_0_2_0,
+        ./deprecated/higher_order_deprecated_0_2_0,
         ./shapeshifting,
-        # ./shapeshifting_deprecated_0_2_0,
+        ./deprecated/shapeshifting_deprecated_0_3_0,
         ./display,
         ./ufunc,
         ./operators_blas_l1,
@@ -42,7 +43,7 @@ import  ./backend/metadataArray,
         ./aggregate,
         ./lapack,
         ./optim_ops_fusion,
-        ./optim_move,
+        ./deprecated/optim_ops_fusion_deprecated_0_3_0,
         ./syntactic_sugar,
         ./exporting
 
@@ -50,16 +51,18 @@ export  metadataArray,
         data_structure,
         init_cpu,
         init_cpu_copy,
-        # init_deprecated_0_1_0,
-        # init_cpu_deprecated_0_2_0, # source of deprecation spam https://github.com/nim-lang/Nim/issues/6436
+        init_deprecated_0_2_0,
+        init_cpu_deprecated_0_3_0, # source of deprecation spam https://github.com/nim-lang/Nim/issues/6436
         accessors,
         accessors_macros_syntax,
         accessors_macros_read,
+        accessors_macros_read_deprecated_0_3_0,
         accessors_macros_write,
         comparison,
         higher_order,
-        #higher_order_deprecated,
+        higher_order_deprecated_0_2_0,
         shapeshifting,
+        shapeshifting_deprecated_0_3_0,
         display,
         ufunc,
         operators_blas_l1,
@@ -71,7 +74,7 @@ export  metadataArray,
         aggregate,
         lapack,
         optim_ops_fusion,
-        optim_move,
+        optim_ops_fusion_deprecated_0_3_0,
         syntactic_sugar,
         exporting
 
diff --git a/tests/tensor/deprecated/test_accessors_deprecated.nim b/tests/tensor/deprecated/test_accessors_deprecated.nim
index eb2c79761..26d32d7d8 100644
--- a/tests/tensor/deprecated/test_accessors_deprecated.nim
+++ b/tests/tensor/deprecated/test_accessors_deprecated.nim
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import ../../src/arraymancer
+import ../../../src/arraymancer
 import unittest, math
 
 
diff --git a/tests/tensor/deprecated/test_accessors_slicer_deprecated.nim b/tests/tensor/deprecated/test_accessors_slicer_deprecated.nim
index 4f47f749b..10936d13d 100644
--- a/tests/tensor/deprecated/test_accessors_slicer_deprecated.nim
+++ b/tests/tensor/deprecated/test_accessors_slicer_deprecated.nim
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import ../../src/arraymancer
+import ../../../src/arraymancer
 import unittest, math
 
 
@@ -177,7 +177,7 @@ suite "Slice mutations":
       check false
 
   test "Setting a slice to a single value":
-    var t_van = t_van_immut
+    var t_van = t_van_immut.clone
     let test =  @[@[1,  1,   1,   1,    1],
             @[2,  4,   8, 999,  999],
             @[3,  9,  27, 999,  999],
@@ -189,7 +189,7 @@ suite "Slice mutations":
     check: t_van == t_test
 
   test "Setting a slice to an array/seq of values":
-    var t_van = t_van_immut
+    var t_van = t_van_immut.clone
     let test =  @[@[111,  222,   1,   1,    1],
             @[333,  444,   8,  16,   32],
             @[  3,    9,  27,  81,  243],
@@ -201,7 +201,7 @@ suite "Slice mutations":
     check: t_van == t_test
 
   test "Setting a slice from a different Tensor":
-    var t_van = t_van_immut
+    var t_van = t_van_immut.clone
     let test =  @[@[1,  1,     1,   1,   1],
             @[2,  4,     8,  16,  32],
             @[3,  9,    27,  81, 243],
@@ -212,17 +212,19 @@ suite "Slice mutations":
     t_van[^2..^1,2..4] = t_van_immut[^1..^2|-1, 4..2|-1]
     check: t_van == t_test
 
-  test "Setting a slice from a view of the same Tensor":
-    var t_van = t_van_immut
-    let test =  @[@[1,  1,     1,   1,   1],
-            @[2,  4,     8,  16,  32],
-            @[3,  9,    27,  81, 243],
-            @[4, 16,  3125, 625, 125],
-            @[5, 25,  1024, 256, 64]]
-
-    let t_test = test.toTensor(Cpu)
-    t_van[^2..^1,2..4] = t_van[^1..^2|-1, 4..2|-1]
-    check: t_van == t_test
+  # Deprecated with reference semantics
+  #
+  # test "Setting a slice from a view of the same Tensor":
+  #   var t_van = t_van_immut.clone
+  #   let test =  @[@[1,  1,     1,   1,   1],
+  #           @[2,  4,     8,  16,  32],
+  #           @[3,  9,    27,  81, 243],
+  #           @[4, 16,  3125, 625, 125],
+  #           @[5, 25,  1024, 256, 64]]
+
+  #   let t_test = test.toTensor(Cpu)
+  #   t_van[^2..^1,2..4] = t_van[^1..^2|-1, 4..2|-1]
+  #   check: t_van == t_test
   
   test "Bounds checking":
     var t_van = t_van_immut
diff --git a/tests/tensor/deprecated/test_aggregate_deprecated.nim b/tests/tensor/deprecated/test_aggregate_deprecated.nim
index 3e9f7d001..f34c22c36 100644
--- a/tests/tensor/deprecated/test_aggregate_deprecated.nim
+++ b/tests/tensor/deprecated/test_aggregate_deprecated.nim
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import ../../src/arraymancer
+import ../../../src/arraymancer
 import unittest, math
 
 suite "Testing aggregation functions":
diff --git a/tests/tensor/deprecated/test_comparison_deprecated.nim b/tests/tensor/deprecated/test_comparison_deprecated.nim
index ec889e29d..793a853c4 100644
--- a/tests/tensor/deprecated/test_comparison_deprecated.nim
+++ b/tests/tensor/deprecated/test_comparison_deprecated.nim
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import ../../src/arraymancer
+import ../../../src/arraymancer
 import unittest, math
 
 
diff --git a/tests/tensor/deprecated/test_display_deprecated.nim b/tests/tensor/deprecated/test_display_deprecated.nim
index bbe395423..a174d6795 100644
--- a/tests/tensor/deprecated/test_display_deprecated.nim
+++ b/tests/tensor/deprecated/test_display_deprecated.nim
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import ../../src/arraymancer
+import ../../../src/arraymancer
 import math, unittest
 
 
diff --git a/tests/tensor/test_init_deprecated.nim b/tests/tensor/deprecated/test_init_deprecated.nim
similarity index 98%
rename from tests/tensor/test_init_deprecated.nim
rename to tests/tensor/deprecated/test_init_deprecated.nim
index 28010dc0d..630d48018 100644
--- a/tests/tensor/test_init_deprecated.nim
+++ b/tests/tensor/deprecated/test_init_deprecated.nim
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import ../../src/arraymancer
+import ../../../src/arraymancer
 import unittest, math, sequtils
 
 suite "Creating a new Tensor":
diff --git a/tests/tensor/deprecated/test_operators_blas_deprecated.nim b/tests/tensor/deprecated/test_operators_blas_deprecated.nim
index 68dabf835..2494df2d2 100644
--- a/tests/tensor/deprecated/test_operators_blas_deprecated.nim
+++ b/tests/tensor/deprecated/test_operators_blas_deprecated.nim
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import ../../src/arraymancer
+import ../../../src/arraymancer
 import unittest, future
 
 suite "BLAS (Basic Linear Algebra Subprograms)":
diff --git a/tests/tensor/deprecated/test_shapeshifting_deprecated.nim b/tests/tensor/deprecated/test_shapeshifting_deprecated.nim
index 09e3289fb..a3a0629d4 100644
--- a/tests/tensor/deprecated/test_shapeshifting_deprecated.nim
+++ b/tests/tensor/deprecated/test_shapeshifting_deprecated.nim
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import ../../src/arraymancer
+import ../../../src/arraymancer
 import unittest, future, sequtils
 
 suite "Shapeshifting":
diff --git a/tests/tensor/deprecated/test_ufunc_deprecated.nim b/tests/tensor/deprecated/test_ufunc_deprecated.nim
index 0d918d5c0..5c1ab5066 100644
--- a/tests/tensor/deprecated/test_ufunc_deprecated.nim
+++ b/tests/tensor/deprecated/test_ufunc_deprecated.nim
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import ../../src/arraymancer
+import ../../../src/arraymancer
 import math, unittest
 
 suite "Universal functions":
@@ -52,7 +52,7 @@ suite "Universal functions":
     let td = d.toTensor(Cpu)
     let te = e.toTensor(Cpu)
 
-    when compiles (td == te): check: false
+    when compiles(td == te): check: false
 
     check: td.fmap(stringify) == te
     check: td.fmap(stringify)[0,1] == "4"
diff --git a/tests/tensor/test_accessors_slicer.nim b/tests/tensor/test_accessors_slicer.nim
index 364cee626..9f8fe197f 100644
--- a/tests/tensor/test_accessors_slicer.nim
+++ b/tests/tensor/test_accessors_slicer.nim
@@ -180,7 +180,7 @@ suite "Slice mutations":
       check false
 
   test "Setting a slice to a single value":
-    var t_van = t_van_immut
+    var t_van = t_van_immut.clone
     let test =  @[@[1,  1,   1,   1,    1],
             @[2,  4,   8, 999,  999],
             @[3,  9,  27, 999,  999],
@@ -192,7 +192,7 @@ suite "Slice mutations":
     check: t_van == t_test
 
   test "Setting a slice to an array/seq of values":
-    var t_van = t_van_immut
+    var t_van = t_van_immut.clone
     let test =  @[@[111,  222,   1,   1,    1],
             @[333,  444,   8,  16,   32],
             @[  3,    9,  27,  81,  243],
@@ -204,7 +204,7 @@ suite "Slice mutations":
     check: t_van == t_test
 
   test "Setting a slice from a different Tensor":
-    var t_van = t_van_immut
+    var t_van = t_van_immut.clone
     let test =  @[@[1,  1,     1,   1,   1],
             @[2,  4,     8,  16,  32],
             @[3,  9,    27,  81, 243],
@@ -215,21 +215,22 @@ suite "Slice mutations":
     t_van[^2..^1,2..4] = t_van_immut[^1..^2|-1, 4..2|-1]
     check: t_van == t_test
 
-  test "Setting a slice from a view of the same Tensor":
-    var t_van = t_van_immut
-    let test =  @[@[1,  1,     1,   1,   1],
-            @[2,  4,     8,  16,  32],
-            @[3,  9,    27,  81, 243],
-            @[4, 16,  3125, 625, 125],
-            @[5, 25,  1024, 256, 64]]
+  ### Not supported anymore with ref semantics, an explicit copy must be made.
+  # test "Setting a slice from a view of the same Tensor":
+  #   var t_van = t_van_immut.clone
+  #   let test =  @[@[1,  1,     1,   1,   1],
+  #           @[2,  4,     8,  16,  32],
+  #           @[3,  9,    27,  81, 243],
+  #           @[4, 16,  3125, 625, 125],
+  #           @[5, 25,  1024, 256, 64]]
+
+  #   let t_test = test.toTensor()
+  #   t_van[^2..^1,2..4] = t_van[^1..^2|-1, 4..2|-1]
+  #   check: t_van == t_test
 
-    let t_test = test.toTensor()
-    t_van[^2..^1,2..4] = t_van[^1..^2|-1, 4..2|-1]
-    check: t_van == t_test
-  
   when compileOption("boundChecks") and not defined(openmp):
     test "Bounds checking":
-      var t_van = t_van_immut
+      var t_van = t_van_immut.clone
       expect(IndexError):
         t_van[0..1,0..1] = [111, 222, 333, 444, 555]
       expect(IndexError):
diff --git a/tests/tensor/test_broadcasting.nim b/tests/tensor/test_broadcasting.nim
index 5bdef6a13..8e081e5f3 100644
--- a/tests/tensor/test_broadcasting.nim
+++ b/tests/tensor/test_broadcasting.nim
@@ -189,7 +189,7 @@ suite "Shapeshifting - broadcasting and non linear algebra elementwise operation
 
     block: # Float Exponentiation
       var a = [1.0, 10, 20, 30].toTensor().reshape(4,1)
-      var b = a
+      var b = a.clone
 
       a .^= 2.0
       check: a  == [[1.0],
diff --git a/tests/tensor/test_bugtracker.nim b/tests/tensor/test_bugtracker.nim
index 4a4515c44..b9d06582d 100644
--- a/tests/tensor/test_bugtracker.nim
+++ b/tests/tensor/test_bugtracker.nim
@@ -34,6 +34,6 @@ suite "Testing specific issues from bug tracker":
 
     # https://github.com/mratsim/Arraymancer/issues/61
     proc foo[T](t: Tensor[T], x: int): Tensor[T] =
-      t.unsafeSlice(x, _, _).unsafeReshape([t.shape[1], t.shape[2]])
+      t[x, _, _].reshape(t.shape[1], t.shape[2])
 
     discard zeros[int]([2,2,2]).foo(1)
\ No newline at end of file
diff --git a/tests/tensor/test_higherorder.nim b/tests/tensor/test_higherorder.nim
index c41d115d6..7f5778287 100644
--- a/tests/tensor/test_higherorder.nim
+++ b/tests/tensor/test_higherorder.nim
@@ -33,11 +33,11 @@ suite "Testing higher-order functions":
     var t = toSeq(0..11).toTensor().reshape([4,3])
     let t2 = toSeq(1..12).toTensor().reshape([4,3])
 
-    var tmp1 = t
+    var tmp1 = t.clone
     tmp1.apply(x => x+1) # out of place
     check: tmp1 == t2
 
-    var tmp2 = t[_,2]
+    var tmp2 = t[_,2].clone
 
     proc plus_one[T](x: var T) = x += 1
     tmp2.apply(plus_one) # in-place
diff --git a/tests/tensor/test_operators_blas.nim b/tests/tensor/test_operators_blas.nim
index f25d2239e..a6b780b77 100644
--- a/tests/tensor/test_operators_blas.nim
+++ b/tests/tensor/test_operators_blas.nim
@@ -269,9 +269,9 @@ suite "BLAS (Basic Linear Algebra Subprograms)":
     #  [-56, -2]]   *   [-91]]
     # http://www.calcul.com/show/calculator/matrix-multiplication?matrix1=[[%2253%22,%22-70%22],[%22-56%22,%22-2%22]]&matrix2=[[%2269%22],[%2281%22]]&operator=*
 
-    let b2 = b.unsafeSlice(4..2|-2, 3..1|-2)
+    let b2 = b[4..2|-2, 3..1|-2]
 
-    let u2 = u.unsafeSlice(2..0|-2)
+    let u2 = u[2..0|-2]
 
     check: b2*u2 == [10027, -3682].toTensor()
 
@@ -282,7 +282,7 @@ suite "BLAS (Basic Linear Algebra Subprograms)":
 
 
 
-    let b3 = b.unsafeSlice(2..3, 3..1|-2)
+    let b3 = b[2..3, 3..1|-2]
 
     check: b3*u2 == [-3682, -4783].toTensor
 
diff --git a/tests/tensor/test_operators_blas_cuda.nim b/tests/tensor/test_operators_blas_cuda.nim
index bea7acd7d..8c4c19180 100644
--- a/tests/tensor/test_operators_blas_cuda.nim
+++ b/tests/tensor/test_operators_blas_cuda.nim
@@ -219,7 +219,7 @@ suite "CUDA CuBLAS backend (Basic Linear Algebra Subprograms)":
 
   test "Addition-Substraction - slices":
     let a = @[@[1.0,2,3],@[4.0,5,6], @[7.0,8,9]].toTensor().cuda
-    let a_t = a.unsafeTranspose()
+    let a_t = a.transpose()
 
     check: (a[0..1, 0..1] + a_t[0..1, 0..1]).cpu == [[2.0, 6], [6.0, 10]].toTensor()
     check: (a[1..2, 1..2] - a_t[1..2, 1..2]).cpu == [[0.0, -2], [2.0, 0]].toTensor()
diff --git a/tests/tensor/test_shapeshifting.nim b/tests/tensor/test_shapeshifting.nim
index d737533c7..03fb2ced2 100644
--- a/tests/tensor/test_shapeshifting.nim
+++ b/tests/tensor/test_shapeshifting.nim
@@ -54,7 +54,7 @@ suite "Shapeshifting":
   test "Unsafe reshape":
     block:
       let a = toSeq(1..4).toTensor()
-      var a_view = a.unsafeReshape(2,2)
+      var a_view = a.reshape(2,2)
       check: a_view == [[1,2],[3,4]].toTensor()
       a_view[_, _] = 0
       check: a == [0,0,0,0].toTensor()
@@ -62,10 +62,10 @@ suite "Shapeshifting":
     # on slices
     block:
       # not that 'a' here a let variable, however
-      # unsafeView and unsafeReshape allow us to
+      # unsafeView and reshape allow us to
       # modify its elements value
       let a = toSeq(1..4).toTensor()
-      var a_view = a.unsafeSlice(1..2).unsafeReshape(1,2)
+      var a_view = a[1..2].reshape(1,2)
       check: a_view == [[2,3]].toTensor()
       a_view[_, _] = 0
       check: a == [1,0,0,4].toTensor()
diff --git a/tests/tensor/test_shapeshifting_cuda.nim b/tests/tensor/test_shapeshifting_cuda.nim
index 3017c3254..815da9035 100644
--- a/tests/tensor/test_shapeshifting_cuda.nim
+++ b/tests/tensor/test_shapeshifting_cuda.nim
@@ -27,17 +27,17 @@ suite "CUDA: Shapeshifting":
     # |8      1       6       2       6       6|
     # |2      0       4       3       2       0|
 
-    let b = a.unsafeContiguous()
+    let b = a.asContiguous()
     check: b.cpu.toRawSeq == @[7.0, 8, 2, 4, 1, 0, 3, 6, 4, 1, 2, 3, 8, 6, 2, 6, 6, 0]
 
     # a is already contiguous, even if wrong layout.
     # Nothing should be done
-    let c = a.unsafeContiguous(colMajor)
+    let c = a.asContiguous(colMajor)
     check: c.cpu.toRawSeq == @[7.0, 8, 2, 4, 1, 0, 3, 6, 4, 1, 2, 3, 8, 6, 2, 6, 6, 0]
 
     # force parameter has been used.
     # Layout will change even if a was contiguous
-    let d = a.unsafeContiguous(colMajor, force = true)
+    let d = a.asContiguous(colMajor, force = true)
     check: d.cpu.toRawSeq == @[7.0, 8, 2, 4, 1, 0, 3, 6, 4, 1, 2, 3, 8, 6, 2, 6, 6, 0]
 
 
@@ -46,12 +46,12 @@ suite "CUDA: Shapeshifting":
     check: u.cpu.toRawSeq == @[7.0, 8, 2, 4, 1, 0, 3, 6, 4, 1, 2, 3, 8, 6, 2, 6, 6, 0]
     check: u.cpu == [7.0,4,8,1,2,0].toTensor.reshape([3,2])
 
-    check: u.unsafeContiguous(rowMajor, force=true).cpu.toRawSeq == @[7.0,4,8,1,2,0]
+    check: u.asContiguous(rowMajor, force=true).cpu.toRawSeq == @[7.0,4,8,1,2,0]
 
   test "Unsafe reshape":
     block:
       let a = toSeq(1..4).toTensor().astype(float).cuda
-      var a_view = a.unsafeReshape(2,2)
+      var a_view = a.reshape(2,2)
       check: a_view.cpu == [[1.0,2],[3.0,4]].toTensor()
 
       # TODO
@@ -61,10 +61,10 @@ suite "CUDA: Shapeshifting":
     # on slices
     block:
       # not that 'a' here a let variable, however
-      # unsafeView and unsafeReshape allow us to
+      # unsafeView and reshape allow us to
       # modify its elements value
       let a = toSeq(1..4).toTensor().astype(float).cuda
-      var a_view = a[1..2].unsafeReshape(1,2) # a[1..2] == a.unsafeSlice(1..2) for CudaTensors
+      var a_view = a[1..2].reshape(1,2)
       check: a_view.cpu == [[2.0,3]].toTensor()
 
       # TODO: pending slice assignation
diff --git a/tests/tests_cpu_deprecated.nim b/tests/tests_cpu_deprecated.nim
index 22839e666..0f9dd23f6 100644
--- a/tests/tests_cpu_deprecated.nim
+++ b/tests/tests_cpu_deprecated.nim
@@ -13,12 +13,12 @@
 # limitations under the License.
 
 import ../src/arraymancer,
-        ./tensor/test_init_deprecated,
-        ./tensor/test_comparison_deprecated,
-        ./tensor/test_accessors_deprecated,
-        ./tensor/test_accessors_slicer_deprecated,
-        ./tensor/test_display_deprecated,
-        ./tensor/test_operators_blas_deprecated,
-        ./tensor/test_aggregate_deprecated,
-        ./tensor/test_shapeshifting_deprecated,
-        ./tensor/test_ufunc_deprecated
+        ./tensor/deprecated/test_init_deprecated,
+        ./tensor/deprecated/test_comparison_deprecated,
+        ./tensor/deprecated/test_accessors_deprecated,
+        ./tensor/deprecated/test_accessors_slicer_deprecated,
+        ./tensor/deprecated/test_display_deprecated,
+        ./tensor/deprecated/test_operators_blas_deprecated,
+        ./tensor/deprecated/test_aggregate_deprecated,
+        ./tensor/deprecated/test_shapeshifting_deprecated,
+        ./tensor/deprecated/test_ufunc_deprecated