diff --git a/changelog.md b/changelog.md index 41e426b50..874da9fe0 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,30 @@ +Arraymancer v0.3.0 +========================== + +I am very excited to announce the second release of Arraymancer which includes numerous improvements and breaking changes. +WARNING: Deprecated proc will be removed in a new release in a week due to deprecated spam. + +Note: +- zeros, ones, newTensor + +- **Very** Breaking + - Tensors uses reference semantics now: `let a = b` will share data by default and copies must be made explicitly. + - There is no need to use `unsafe` proc to avoid copies especially for slices. + - Unsafe procs are deprecated and will be removed leading to a smaller and simpler codebase and API/documentation. + - Tensors and CudaTensors now works the same way. + - Use `clone` to do copies. + - Arraymancer now works like Numpy and Julia, making it easier to port code. + - Unfortunately it makes it harder to debug unexpected data sharing. + +- Deprecated + - Version 0.3.1 with the ALL deprecated proc removed will be released in a week. Due to issue https://github.com/nim-lang/Nim/issues/6436, + even using non-deprecated proc like `zeros`, `ones`, `newTensor` you will get a deprecated warning. + - `newTensor`, `zeros`, `ones` arguments have been changed from `zeros([5, 5], int)` to `zeros[int]([5, 5])` + - All `unsafe` proc are now default and deprecated. + + +- Cuda: + - Support for convolution forward and backward Arraymancer v0.2.0 Sept. 24, 2017 "The Color of Magic" diff --git a/src/autograd/ag_accessors.nim b/src/autograd/ag_accessors.nim index ddc0aaa4e..52c5e7467 100644 --- a/src/autograd/ag_accessors.nim +++ b/src/autograd/ag_accessors.nim @@ -21,8 +21,8 @@ template `[]`*[TT](v: Variable[TT], args: varargs[untyped]): Variable[TT] = result.tape = v.tape result.ancestor = v.ancestor - result.value = v.value.unsafeSlice(args) - result.grad = v.grad.unsafeSlice(args) + result.value = v.value[args] + result.grad = v.grad[args] result diff --git a/src/autograd/gates_blas.nim b/src/autograd/gates_blas.nim index c86241e84..97936630f 100644 --- a/src/autograd/gates_blas.nim +++ b/src/autograd/gates_blas.nim @@ -29,8 +29,8 @@ method forward*[TT](self: MatMulGate[TT], a, b: Variable[TT]): Variable[TT] {.in result.grad = zeros[getSubType(TT)](result.value.shape) method backward*[TT](self: MatMulGate[TT], gradient: TT): SmallDiffs[TT] {.noInit, inline, locks:0.}= - result[0] = gradient * self.b.value.unsafeTranspose - result[1] = self.a.value.unsafeTranspose * gradient + result[0] = gradient * self.b.value.transpose + result[1] = self.a.value.transpose * gradient proc `*`*[TT](a, b: Variable[TT]): Variable[TT] = when compileOption("boundChecks"): diff --git a/src/autograd/gates_reduce.nim b/src/autograd/gates_reduce.nim index f28290de4..36d3fcb22 100644 --- a/src/autograd/gates_reduce.nim +++ b/src/autograd/gates_reduce.nim @@ -34,7 +34,7 @@ method backward*[TT](self: MeanGate[TT], gradient: TT): SmallDiffs[TT] {.noInit, result[0] = gradient / getSubType(TT)(self.a_shape.product) # Conversion to subtype T, oh Higher kinded-types ... let z_shape = newSeqWith(self.a_shape.len, 1) # We create a shape of 1 dimension that we will expand with broadcast - result[0] = result[0].unsafeReshape(z_shape).unsafeBroadcast(self.a_shape) + result[0] = result[0].reshape(z_shape).broadcast(self.a_shape) proc mean*[TT](a: Variable[TT]): Variable[TT] = when compileOption("boundChecks"): diff --git a/src/nn/activation/relu.nim b/src/nn/activation/relu.nim index c074ae2e3..a8f804070 100644 --- a/src/nn/activation/relu.nim +++ b/src/nn/activation/relu.nim @@ -54,4 +54,4 @@ proc relu*[TT](a: Variable[TT]): Variable[TT] = node.child = result # Caching for backprop - gate.cache = result.value.unsafeView \ No newline at end of file + gate.cache = result.value \ No newline at end of file diff --git a/src/nn/layers/linear.nim b/src/nn/layers/linear.nim index a56dcce0d..ff14aee2a 100644 --- a/src/nn/layers/linear.nim +++ b/src/nn/layers/linear.nim @@ -33,8 +33,8 @@ method forward*[TT](self: LinearGate[TT], a: Variable[TT]): Variable[TT] {.inlin result.grad = zeros_like(result.value) method backward*[TT](self: LinearGate[TT], gradient: TT): SmallDiffs[TT] {.noInit, inline, locks:0.}= - result[0] = self.W.value.unsafeTranspose * gradient # grad w.r.t. x - result[1] = gradient * self.x.value.unsafeTranspose # grad w.r.t. weight + result[0] = self.W.value.transpose * gradient # grad w.r.t. x + result[1] = gradient * self.x.value.transpose # grad w.r.t. weight if not self.b.isNil: result[2] = sum(gradient, axis=0) # grad w.r.t. bias diff --git a/src/nn/loss/sigmoid_cross_entropy.nim b/src/nn/loss/sigmoid_cross_entropy.nim index dc6123d43..5c3a1b5db 100644 --- a/src/nn/loss/sigmoid_cross_entropy.nim +++ b/src/nn/loss/sigmoid_cross_entropy.nim @@ -30,7 +30,7 @@ method forward*[TT](self: SigmoidCrossEntropyLoss[TT], a: Variable[TT], target: result.tape = a.tape # TODO: implement a Scalar[T] concept instead of rewrapping the result into a Tensor - result.value = [sigmoid_cross_entropy(a.value, target)].toTensor.unsafeView + result.value = [sigmoid_cross_entropy(a.value, target)].toTensor result.grad = zeros[getSubType(TT)](1) @@ -44,7 +44,7 @@ proc sigmoid_cross_entropy*[TT](a: Variable[TT], target: TT): Variable[TT] = new gate gate.arity = 1 gate.cache = a - gate.target = target.unsafeView + gate.target = target # Node var node: Node[TT] diff --git a/src/nn_primitives/backend/nnpack_interface.nim b/src/nn_primitives/backend/nnpack_interface.nim index 209d42827..1fd8fdf08 100644 --- a/src/nn_primitives/backend/nnpack_interface.nim +++ b/src/nn_primitives/backend/nnpack_interface.nim @@ -26,8 +26,8 @@ proc nnpack_conv2d*(input, weight, bias: Tensor[float32], padding, stride: Size2 output_width = (2*padding.width + input.nchw_width) - (weight.nchw_width - 1) # Make sure the data is contiguous before passing to nnpack - let input = input.unsafeContiguous() - let weight = weight.unsafeContiguous() + let input = input.asContiguous() + let weight = weight.asContiguous() var bias_nonnil: Tensor[float32] # TODO make bias truly optional and not just a tensor of rank 0 @@ -36,7 +36,7 @@ proc nnpack_conv2d*(input, weight, bias: Tensor[float32], padding, stride: Size2 # Temporary bias filled with zeros just to pass to nnpack bias_nonnil = zeros[float32](output_channels) else: - bias_nonnil = bias.unsafeContiguous() + bias_nonnil = bias.asContiguous() # Prepare tensor that the result will be stored on result = newTensorUninit[float32](input.shape[0], output_channels, output_height, output_width) diff --git a/src/nn_primitives/fallback/conv.nim b/src/nn_primitives/fallback/conv.nim index ec657b015..8a558f5d3 100644 --- a/src/nn_primitives/fallback/conv.nim +++ b/src/nn_primitives/fallback/conv.nim @@ -91,19 +91,20 @@ proc im2colgemm_conv2d*[T](input, kernel, bias: Tensor[T], output_height = (input.nchw_height + (2*padding.height) - kernel.nchw_height) div stride.height + 1 output_width = (input.nchw_width + (2*padding.width) - kernel.nchw_width) div stride.width + 1 channels_col = input.nchw_channels * kernel.nchw_height * kernel.nchw_width - kernel_col = kernel.unsafeReshape(output_channels, channels_col) + kernel_col = kernel.reshape(output_channels, channels_col) result = newTensorUninit[T](batch_size, output_channels, output_height, output_width) var input_col = newTensorUninit[T](channels_col, output_height * output_width) var output: Tensor[T] - for i in 0.. 0: - result .+= bias.unsafeUnsqueeze(0) + result .+= bias.unsqueeze(0) proc im2colgemm_conv2d_gradient*[T](input, kernel: Tensor[T], padding: Size2D = (0,0), @@ -119,7 +120,7 @@ proc im2colgemm_conv2d_gradient*[T](input, kernel: Tensor[T], output_width = (input.nchw_width + (2*padding.width) - kernel.nchw_width) div stride.width + 1 output_flatten_size = output_height*output_width channels_col = input.nchw_channels * kernel_size.height * kernel_size.width - kernel_col = kernel.unsafeReshape(output_channels, input.nchw_channels*kernel.nchw_height*kernel.nchw_width) + kernel_col = kernel.reshape(output_channels, input.nchw_channels*kernel.nchw_height*kernel.nchw_width) # Check if grad output shape looks correct assert grad_output.nchw_width == output_width and grad_output.nchw_height == output_height @@ -132,9 +133,9 @@ proc im2colgemm_conv2d_gradient*[T](input, kernel: Tensor[T], for i in 0.. 0: - let gradBiasTensorDesc = newCudnn4DTensorDesc grad_bias.unsafeUnsqueeze(0) + let gradBiasTensorDesc = newCudnn4DTensorDesc grad_bias.unsqueeze(0) check cudnnConvolutionBackwardBias( defaultHandle_cudnn, addr alpha, gradOutputTensorDesc, - gOutput.data.data[], + gOutput.get_offset_ptr, addr beta, gradBiasTensorDesc, - grad_bias.data.data[] + grad_bias.get_offset_ptr ) # TODO squeeze and divide by batch size? @@ -143,16 +143,16 @@ proc conv2d_backward*[T: float32](input, kernel, bias: CudaTensor[T], defaultHandle_cudnn, addr alpha, srcTensorDesc, - input.data.data[], + input.get_offset_ptr, gradOutputTensorDesc, - gOutput.data.data[], + gOutput.get_offset_ptr, convDesc, kernel_algo_workspace.algo, kernel_algo_workspace.workspace[], kernel_algo_workspace.sizeInBytes, addr beta, gradKernelDesc, - grad_kernel.data.data[] + grad_kernel.get_offset_ptr ) when defined(debug): @@ -176,14 +176,14 @@ proc conv2d_backward*[T: float32](input, kernel, bias: CudaTensor[T], defaultHandle_cudnn, addr alpha, kernelDesc, - kernel.data.data[], + kernel.get_offset_ptr, gradOutputTensorDesc, - gOutput.data.data[], + gOutput.get_offset_ptr, convDesc, gradInput_algo_workspace.algo, gradInput_algo_workspace.workspace[], gradInput_algo_workspace.sizeInBytes, addr beta, gradInputTensorDesc, - grad_input.data.data[] + grad_input.get_offset_ptr ) diff --git a/src/nn_primitives/nnp_convolution.nim b/src/nn_primitives/nnp_convolution.nim index b7bb4a29e..f092d359d 100644 --- a/src/nn_primitives/nnp_convolution.nim +++ b/src/nn_primitives/nnp_convolution.nim @@ -91,7 +91,7 @@ proc conv2d_backward*[T](input, weight, bias: Tensor[T], # Bias gradient if bias.rank > 0: # TODO make bias truly optional and not just a tensor of rank 0 # TODO: sum over many axes - grad_bias = grad_output.sum(3).sum(2).sum(0).unsafeReshape(bias.shape) + grad_bias = grad_output.sum(3).sum(2).sum(0).reshape(bias.shape) case algorithm: of NNPackAuto: diff --git a/src/nn_primitives/nnp_linear.nim b/src/nn_primitives/nnp_linear.nim index 614fd00fa..f3a94faaf 100644 --- a/src/nn_primitives/nnp_linear.nim +++ b/src/nn_primitives/nnp_linear.nim @@ -34,8 +34,8 @@ proc linear_backward*[T]( cached_tensor, weight, bias: Tensor[T], dW, db: var Tensor[T]): Tensor[T] {.inline.} = - result = weight.unsafeTranspose * gradient - gemm(gradient, cached_tensor.unsafeTranspose, dW) + result = weight.transpose * gradient + gemm(gradient, cached_tensor.transpose, dW) db = sum(gradient, axis=0) # https://mlxai.github.io/2017/01/10/a-modular-approach-to-implementing-fully-connected-neural-networks.html @@ -44,6 +44,6 @@ proc linear_backward*[T]( cached_tensor, weight: Tensor[T], dW: var Tensor[T]): Tensor[T] {.inline.} = - result = weight.unsafeTranspose * gradient - gemm(gradient, cached_tensor.unsafeTranspose, dW) + result = weight.transpose * gradient + gemm(gradient, cached_tensor.transpose, dW) diff --git a/src/nn_primitives/nnp_softmax_cross_entropy.nim b/src/nn_primitives/nnp_softmax_cross_entropy.nim index 39e7ecd11..c1ec458d3 100644 --- a/src/nn_primitives/nnp_softmax_cross_entropy.nim +++ b/src/nn_primitives/nnp_softmax_cross_entropy.nim @@ -97,12 +97,12 @@ proc sparse_softmax_cross_entropy*[T](input: Tensor[T], target: Tensor[int]): T # ∑i(- ti * yi) is either -yi or 0 in the sparse case. # Since target holds coordinates: ∑i(- ti * yi) = - yi[ti] for i in 0||(input.shape[1]-1): - let lse = input.unsafeSlice(_,i).logsumexp + let lse = input[_,i].logsumexp when not declared(openmp): - result += lse - input.unsafeSlice(target.unsafeSlice(i), i) + result += lse - input[target[i], i] else: - let tmp = lse - input.unsafeSlice(target.unsafeSlice(i), i) + let tmp = lse - input[target[i], i] {.emit:"#pragma omp atomic".} {.emit:"`result` += `tmp`;".} @@ -140,7 +140,7 @@ proc softmax_cross_entropy_backward*[T]( elif gradient is Tensor: let grad = gradient.data[gradient.offset] - let axis_max_sumexp = cached_tensor.streaming_max_sumexp(axis = 1).unsafeBroadcast(cached_tensor.shape) + let axis_max_sumexp = cached_tensor.streaming_max_sumexp(axis = 1).broadcast(cached_tensor.shape) result = map3_inline(cached_tensor, target, axis_max_sumexp): grad * (stable_softmax(x, z.max, z.sumexp) - y) / T(batch_size) @@ -176,8 +176,8 @@ proc sparse_softmax_cross_entropy_backward*[T]( for i, truth_idx in enumerate(target): result[truth_idx, i] = -1 - let axis_max_sumexp = cached_tensor.streaming_max_sumexp(axis = 1).unsafeBroadcast(cached_tensor.shape) - # let axis_max_sumexp = cached_tensor.classic_max_sumexp(axis = 1).unsafeBroadcast(cached_tensor.shape) + let axis_max_sumexp = cached_tensor.streaming_max_sumexp(axis = 1).broadcast(cached_tensor.shape) + # let axis_max_sumexp = cached_tensor.classic_max_sumexp(axis = 1).broadcast(cached_tensor.shape) apply3_inline(result, cached_tensor, axis_max_sumexp): diff --git a/src/nn_primitives/private/p_logsumexp.nim b/src/nn_primitives/private/p_logsumexp.nim index c9c329e5d..4a0f8f85e 100644 --- a/src/nn_primitives/private/p_logsumexp.nim +++ b/src/nn_primitives/private/p_logsumexp.nim @@ -45,13 +45,13 @@ proc streaming_max_sumexp*[T](t: Tensor[T], axis: int): Tensor[tuple[max:T, sume result = newTensorUninit[tuple[max:T, sumexp: T]](t.shape[axis]) for i in `||`(0, t.shape[axis]-1, "simd"): - result.data[i] = t.unsafeAtAxisIndex(axis, i).streaming_max_sumexp + result.data[i] = t.atAxisIndex(axis, i).streaming_max_sumexp # Reexpand the tensor to be consistent with fold_axis/reduce_axis if axis == 0: - result = result.unsafeUnsqueeze(1) + result = result.unsqueeze(1) else: - result = result.unsafeUnsqueeze(0) + result = result.unsqueeze(0) diff --git a/src/tensor/accessors.nim b/src/tensor/accessors.nim index 35910fa37..ef820e504 100644 --- a/src/tensor/accessors.nim +++ b/src/tensor/accessors.nim @@ -26,22 +26,16 @@ proc atContiguousIndex*[T](t: var Tensor[T], idx: int): var T {.noSideEffect,inl ## i.e. as treat the tensor as flattened return t.data[t.getContiguousIndex(idx)] -proc unsafeAtAxisIndex*[T](t: Tensor[T], axis, idx: int): Tensor[T] {.noInit,inline.} = - ## Returns a sliced tensor in the given axis index (unsafe) +proc atAxisIndex*[T](t: Tensor[T], axis, idx: int): Tensor[T] {.noInit,inline.} = + ## Returns a sliced tensor in the given axis index + when compileOption("boundChecks"): check_axis_index(t, axis, idx) - result = t.unsafeView() + result = t result.shape[axis] = 1 result.offset += result.strides[axis]*idx -proc atAxisIndex*[T](t: Tensor[T], axis, idx: int): Tensor[T] {.noInit,inline.} = - ## Returns a sliced tensor in the given axis index - - # As contiguous is called to force a copy of the slice - # otherwise the result would copy the whole parent tensor data - t.unsafeAtAxisIndex(axis, idx).clone() - iterator items*[T](t: Tensor[T]): T {.inline,noSideEffect.} = ## Inline iterator on Tensor values ## @@ -285,8 +279,7 @@ template axisIterator[T](t: Tensor[T], axis, iter_offset, iter_size: int): untyp ## - A slice along the given axis at each iteration. ## ## Note: The slice dimension is not collapsed by default. - ## You can use ``unsafeSqueeze`` to collapse it without copy. - ## In this case ``unsafeSqueeze`` is safe. + ## You can use ``squeeze`` to collapse it. ## ## Usage: ## .. code:: nim @@ -296,7 +289,7 @@ template axisIterator[T](t: Tensor[T], axis, iter_offset, iter_size: int): untyp check_axis_index(t, axis, iter_offset) check_axis_index(t, axis, iter_offset+iter_size-1) - var out_t = t.unsafeAtAxisIndex(axis, iter_offset) + var out_t = t.atAxisIndex(axis, iter_offset) for _ in 0.. 1: withMemoryOptimHints() - var results{.align64.}: array[OMP_MAX_REDUCE_BLOCKS * maxItemsPerCacheLine, type(reduced)] + var results{.align64, noInit.}: array[OMP_MAX_REDUCE_BLOCKS * maxItemsPerCacheLine, type(reduced)] let bsize = size div num_blocks if bsize > 1: diff --git a/src/tensor/comparison.nim b/src/tensor/comparison.nim index 47d04bca5..417ea2423 100644 --- a/src/tensor/comparison.nim +++ b/src/tensor/comparison.nim @@ -39,7 +39,7 @@ proc `.==`*[T](a, b: Tensor[T]): Tensor[bool] {.noInit.} = ## ## Returns: ## - A tensor of boolean - let (tmp_a, tmp_b) = unsafeBroadcast2(a, b) + let (tmp_a, tmp_b) = broadcast2(a, b) result = map2_inline(tmp_a, tmp_b, x == y) proc `.!=`*[T](a, b: Tensor[T]): Tensor[bool] {.noInit.} = @@ -49,7 +49,7 @@ proc `.!=`*[T](a, b: Tensor[T]): Tensor[bool] {.noInit.} = ## ## Returns: ## - A tensor of boolean - let (tmp_a, tmp_b) = unsafeBroadcast2(a, b) + let (tmp_a, tmp_b) = broadcast2(a, b) result = map2_inline(tmp_a, tmp_b, x != y) proc `.<=`*[T](a, b: Tensor[T]): Tensor[bool] {.noInit.} = @@ -59,7 +59,7 @@ proc `.<=`*[T](a, b: Tensor[T]): Tensor[bool] {.noInit.} = ## ## Returns: ## - A tensor of boolean - let (tmp_a, tmp_b) = unsafeBroadcast2(a, b) + let (tmp_a, tmp_b) = broadcast2(a, b) result = map2_inline(tmp_a, tmp_b, x <= y) proc `.<`*[T](a, b: Tensor[T]): Tensor[bool] {.noInit.} = @@ -69,7 +69,7 @@ proc `.<`*[T](a, b: Tensor[T]): Tensor[bool] {.noInit.} = ## ## Returns: ## - A tensor of boolean - let (tmp_a, tmp_b) = unsafeBroadcast2(a, b) + let (tmp_a, tmp_b) = broadcast2(a, b) result = map2_inline(tmp_a, tmp_b, x < y) proc `.>=`*[T](a, b: Tensor[T]): Tensor[bool] {.noInit.} = @@ -79,7 +79,7 @@ proc `.>=`*[T](a, b: Tensor[T]): Tensor[bool] {.noInit.} = ## ## Returns: ## - A tensor of boolean - let (tmp_a, tmp_b) = unsafeBroadcast2(a, b) + let (tmp_a, tmp_b) = broadcast2(a, b) result = map2_inline(tmp_a, tmp_b, x >= y) proc `.>`*[T](a, b: Tensor[T]): Tensor[bool] {.noInit.} = @@ -89,5 +89,5 @@ proc `.>`*[T](a, b: Tensor[T]): Tensor[bool] {.noInit.} = ## ## Returns: ## - A tensor of boolean - let (tmp_a, tmp_b) = unsafeBroadcast2(a, b) + let (tmp_a, tmp_b) = broadcast2(a, b) result = map2_inline(tmp_a, tmp_b, x > y) \ No newline at end of file diff --git a/src/tensor/data_structure.nim b/src/tensor/data_structure.nim index 9ec329f73..599b9e54c 100644 --- a/src/tensor/data_structure.nim +++ b/src/tensor/data_structure.nim @@ -26,43 +26,87 @@ type Cpu, Cuda + CpuStorage* {.shallow.} [T] = object + ## Opaque data storage for Tensors + ## Currently implemented as a seq with reference semantics (shallow copy on assignment). + ## It may change in the future for a custom memory managed and 64 bit aligned solution. + ## + ## Warning ⚠: + ## Do not use Fdata directly, direct access will be removed in 0.4.0. + + # `Fdata` will be transformed into an opaque type once `unsafeToTensorReshape` is removed. + Fdata*: seq[T] + Tensor*[T] = object ## Tensor data structure stored on Cpu ## - ``shape``: Dimensions of the tensor ## - ``strides``: Numbers of items to skip to get the next item along a dimension. - ## - ``offset``: Offset to get the first item of the Tensor. Note: offset can be negative, in particular for slices. - ## - ``data``: A sequence that holds the actual data + ## - ``offset``: Offset to get the first item of the tensor. Note: offset can be negative, in particular for slices. + ## - ``storage``: An opaque data storage for the tensor ## Fields are public so that external libraries can easily construct a Tensor. + ## You can use ``.data`` to access the opaque data storage. + ## + ## Warning ⚠: + ## Assignment ```var a = b``` does not copy the data. Data modification on one tensor will be reflected on the other. + ## However modification on metadata (shape, strides or offset) will not affect the other tensor. + ## Explicit copies can be made with ``clone``: ```var a = b.clone``` shape*: MetadataArray strides*: MetadataArray offset*: int - data*: seq[T] # Perf note: seq are always deep copied on "var" assignement. + storage*: CpuStorage[T] - CudaSeq* [T: SomeReal] = object - ## Seq-like structure on the Cuda backend. +type + CudaStorage*[T: SomeReal] = object + ## Opaque seq-like structure for storage on the Cuda backend. + ## + ## Nim garbage collector will automatically ask cuda to clear GPU memory if data becomes unused. ## - ## Nim garbage collector will automatically ask cuda to clear GPU memory if ``data`` becomes unused. - len*: int - data*: ref[ptr UncheckedArray[T]] + # TODO: Forward declaring this and making this completely private prevent assignment in newCudaStorage from working + Flen*: int + Fdata*: ptr UncheckedArray[T] + Fref_tracking*: ref[ptr UncheckedArray[T]] # We keep ref tracking for the GC in a separate field to avoid double indirection. CudaTensor*[T: SomeReal] = object ## Tensor data structure stored on Nvidia GPU (Cuda) - ## - ``shape``: Dimensions of the tensor + ## - ``shape``: Dimensions of the CudaTensor ## - ``strides``: Numbers of items to skip to get the next item along a dimension. - ## - ``offset``: Offset to get the first item of the Tensor. Note: offset can be negative, in particular for slices. - ## - ``data``: A cuda seq-like object that points to the data location - ## Note: currently ``=`` assignement for CudaTensor does not copy. Both CudaTensors will share a view of the same data location. - ## Modifying the data in one will modify the data in the other. + ## - ``offset``: Offset to get the first item of the CudaTensor. Note: offset can be negative, in particular for slices. + ## - ``storage``: An opaque data storage for the CudaTensor ## - ## In the future CudaTensor will leverage Nim compiler to automatically - ## copy if a memory location would be used more than once in a mutable manner. + ## Warning ⚠: + ## Assignment ```var a = b``` does not copy the data. Data modification on one CudaTensor will be reflected on the other. + ## However modification on metadata (shape, strides or offset) will not affect the other tensor. + ## Explicit copies can be made with ``clone``: ```var a = b.clone``` shape*: MetadataArray strides*: MetadataArray offset*: int - data*: CudaSeq[T] # Memory on Cuda device will be automatically garbage-collected + storage*: CudaStorage[T] AnyTensor*[T] = Tensor[T] or CudaTensor[T] +# ############### +# Field accessors +# ############### + +proc data*[T](t: Tensor[T]): seq[T] {.inline, noSideEffect, noInit.} = + # Get tensor raw data + # This is intended for library writer + shallowCopy(result, t.storage.Fdata) + +proc data*[T](t: var Tensor[T]): var seq[T] {.inline, noSideEffect, noInit.} = + # Get mutable tensor raw data + # This is intended for library writer + shallowCopy(result, t.storage.Fdata) + +proc `data=`*[T](t: var Tensor[T], s: seq[T]) {.inline, noSideEffect.}= + # Set tensor raw data + # This is intended for library writer + t.storage.Fdata = s + +# ################ +# Tensor Metadata +# ################ + template rank*(t: AnyTensor): int = ## Input: ## - A tensor @@ -105,7 +149,7 @@ proc shape_to_strides*(shape: MetadataArray, layout: OrderType = rowMajor, resul accum *= shape[i] return -proc is_C_contiguous*(t: AnyTensor): bool {.noSideEffect,inline.}= +proc is_C_contiguous*(t: AnyTensor): bool {.noSideEffect, inline.}= ## Check if the tensor follows C convention / is row major var z = 1 for i in countdown(t.shape.high,0): @@ -116,7 +160,7 @@ proc is_C_contiguous*(t: AnyTensor): bool {.noSideEffect,inline.}= z *= t.shape[i] return true -proc is_F_contiguous*(t: AnyTensor): bool {.noSideEffect,inline.}= +proc is_F_contiguous*(t: AnyTensor): bool {.noSideEffect, inline.}= ## Check if the tensor follows Fortran convention / is column major var z = 1 for i in 0.. U): Tensor[U] {.deprecated, inline.}= diff --git a/src/tensor/deprecated/init_cpu_deprecated_0_2_0.nim b/src/tensor/deprecated/init_cpu_deprecated_0_3_0.nim similarity index 58% rename from src/tensor/deprecated/init_cpu_deprecated_0_2_0.nim rename to src/tensor/deprecated/init_cpu_deprecated_0_3_0.nim index 9c92f7f21..3d9eb8ffe 100644 --- a/src/tensor/deprecated/init_cpu_deprecated_0_2_0.nim +++ b/src/tensor/deprecated/init_cpu_deprecated_0_3_0.nim @@ -13,6 +13,14 @@ # limitations under the License. +# Deprecated on 2017-09-07 by https://github.com/mratsim/Arraymancer/commit/ea7508c0724a7df7559b68cf8c8470d9ee0d1588 +# First release with deprecated tag: 0.2.0 + +import ../private/p_init_cpu, + ../data_structure, + ../init_cpu, + sequtils + proc newTensor*(shape: openarray[int], T: typedesc): Tensor[T] {.noSideEffect, inline, deprecated.} = ## Creates a new Tensor on Cpu backend ## Input: @@ -44,4 +52,29 @@ proc ones*[T: SomeNumber](shape: openarray[int], typ: typedesc[T]): Tensor[T] {. ## Result: ## - A one-ed Tensor of the same shape tensorCpu(shape, result) - result.data = newSeqWith(result.size, 1.T) \ No newline at end of file + result.data = newSeqWith(result.size, 1.T) + +proc unsafeView*[T](t: Tensor[T]): Tensor[T] {.noSideEffect,noInit,inline, deprecated.}= + ## DEPRECATED: With the switch to reference semantics, ``unsafe`` is now the default. + ## + ## Input: + ## - A tensor + ## Returns: + ## - A shallow copy. Both tensors share the same memory location. + ## + ## Warning ⚠ + ## Both tensors shares the same memory. Data modification on one will be reflected on the other. + ## However modifying the shape, strides or offset will not affect the other. + result = t + +proc unsafeToTensor*[T: SomeNumber](data: seq[T]): Tensor[T] {.noInit,noSideEffect, deprecated.} = + ## DEPRECATED + ## + ## Convert a seq to a Tensor, sharing the seq data + ## Input: + ## - A seq with the tensor data + ## Result: + ## - A rank 1 tensor with the same size of the input + ## WARNING: result share storage with input + tensorCpu([data.len], result) + shallowCopy(result.data, data) \ No newline at end of file diff --git a/src/tensor/deprecated/init_deprecated_0_1_0.nim b/src/tensor/deprecated/init_deprecated_0_2_0.nim similarity index 95% rename from src/tensor/deprecated/init_deprecated_0_1_0.nim rename to src/tensor/deprecated/init_deprecated_0_2_0.nim index cb578c7ce..3287b4787 100644 --- a/src/tensor/deprecated/init_deprecated_0_1_0.nim +++ b/src/tensor/deprecated/init_deprecated_0_2_0.nim @@ -16,6 +16,13 @@ # init procs will not offer the backend parameter anymore. # Full rationale in the Design_Document on Github. +# Deprecated on 2017-09-07 by https://github.com/mratsim/Arraymancer/commit/58f2aff4fbd670d03d5b9c64cbd0e5467d24037b#diff-687a8a0e558961410c916e5cfe045d97 +# First release with deprecated tag: 0.2.0 + +import ../private/p_init_cpu, + ../data_structure, + ../init_cpu + proc newTensor*(shape: openarray[int], T: typedesc, backend: static[Backend]): auto {.noSideEffect, deprecated.} = ## DEPRECATED - The backend: static[Backend] argument has been deprecated for easier maintenance. ## diff --git a/src/tensor/deprecated/optim_ops_fusion_deprecated_0_3_0.nim b/src/tensor/deprecated/optim_ops_fusion_deprecated_0_3_0.nim new file mode 100644 index 000000000..ad642e45d --- /dev/null +++ b/src/tensor/deprecated/optim_ops_fusion_deprecated_0_3_0.nim @@ -0,0 +1,40 @@ +# Copyright 2017 the Arraymancer contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ../private/p_checks, + ../private/p_init_cpu, + ../data_structure + + +proc unsafeToTensorReshape*[T](data: seq[T], shape: varargs[int]): Tensor[T] {.noSideEffect, deprecated.} = + ## Deprecated + ## + ## Fuse unsafeToTensor and unsafeReshape in one operation + ## + ## With move semantics + reference semantics this is not needed. + # Note: once this is removed, CpuStorage can be changed to not expose Fdata. + + when compileOption("boundChecks"): + check_nested_elements(shape.toMetadataArray, data.len) + + tensorCpu(shape, result) + shallowCopy(result.storage.Fdata, data) + +template rewriteUnsafeToTensorReshape*{unsafeReshape(unsafeToTensor(s), shape)}( + s: seq, + shape: varargs[int]): auto = + ## Fuse ``sequence.unsafeToTensor().unsafeReshape(new_shape)`` into a single operation. + ## + ## Operation fusion leverage the Nim compiler and should not be called explicitly. + unsafeToTensorReshape(s, shape, dummy_bugfix) diff --git a/src/tensor/deprecated/shapeshifting_deprecated_0_3_0.nim b/src/tensor/deprecated/shapeshifting_deprecated_0_3_0.nim new file mode 100644 index 000000000..8cee99554 --- /dev/null +++ b/src/tensor/deprecated/shapeshifting_deprecated_0_3_0.nim @@ -0,0 +1,208 @@ +# Copyright 2017 the Arraymancer contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ../backend/metadataArray, + ../private/p_shapeshifting, + ../private/p_checks, + ../private/p_accessors_macros_write, + ../data_structure, ../init_cpu, ../higher_order + + +proc unsafeTranspose*(t: Tensor): Tensor {.noInit,noSideEffect,inline, deprecated.} = + ## DEPRECATED + ## + ## Transpose a Tensor without copy. + ## + ## Warning ⚠: + ## This is a no-copy operation, data is shared with the input. + ## This proc does not guarantee that a ``let`` value is immutable. + ## + ## For N-d Tensor with shape (0, 1, 2 ... n-1) the resulting tensor will have shape (n-1, ... 2, 1, 0) + t.shape.reversed(result.shape) + t.strides.reversed(result.strides) + result.offset = t.offset + shallowCopy(result.data, t.data) + +proc unsafeContiguous*[T](t: Tensor[T], layout: OrderType = rowMajor, force: bool = false): Tensor[T] {.noInit, deprecated.} = + ## DEPRECATED + ## + ## Transform a tensor with general striding to a Tensor with contiguous layout. + ## + ## If the tensor is already contiguous it is returned without copy, underlying data is shared between the input and the output. + ## + ## Warning ⚠: + ## This may be a no-copy operation with result data shared with the input. + ## This proc does not guarantee that a ``let`` value is immutable. + ## + ## By default tensor will be rowMajor. + ## + ## By default nothing is done if the tensor is already contiguous (C Major or F major) + ## The "force" parameter can force re-ordering to a specific layout + + let cCont = t.is_C_contiguous + let fCont = t.is_F_contiguous + + if (cCont or fCont) and not force: + return t + elif cCont and layout == rowMajor: + return t + elif fCont and layout == colMajor: + return t + contiguousT(result, t, layout) + +proc unsafeReshape*(t: Tensor, new_shape: varargs[int]): Tensor {.noInit, deprecated.} = + ## DEPRECATED + ## + ## Reshape a tensor without copy. + ## + ## ⚠ Reshaping without copy is only possible on contiguous Tensors + ## + ## Warning ⚠: + ## This is a no-copy operation, data is shared with the input. + ## This proc does not guarantee that a ``let`` value is immutable. + + t.reshape_no_copy(new_shape, result) + result.storage = t.storage + +proc unsafeReshape*(t: Tensor, new_shape: MetadataArray): Tensor {.noInit, deprecated.} = + ## DEPRECATED + ## + ## Reshape a tensor without copy. + ## + ## ⚠ Reshaping without copy is only possible on contiguous Tensors + ## + ## Warning ⚠: + ## This is a no-copy operation, data is shared with the input. + ## This proc does not guarantee that a ``let`` value is immutable. + + t.reshape_no_copy(new_shape, result) + result.storage = t.storage + + +proc unsafeBroadcast*[T](t: Tensor[T], shape: varargs[int]): Tensor[T] {.noInit,noSideEffect, deprecated.}= + ## DEPRECATED + ## + ## Explicitly broadcast a Tensor to the specified shape. + ## The returned broadcasted Tensor share the underlying data with the input. + ## + ## Dimension(s) of size 1 can be expanded to arbitrary size by replicating + ## values along that dimension. + ## + ## Warning ⚠: + ## This is a no-copy operation, data is shared with the input. + ## This proc does not guarantee that a ``let`` value is immutable. + ## A broadcasted tensor should not be modified and only used for computation. + result = t + result.broadcastT(shape) + +proc unsafeBroadcast*[T](t: Tensor[T], shape: MetadataArray): Tensor[T] {.noInit,noSideEffect, deprecated.}= + ## DEPRECATED + ## + ## Explicitly broadcast a Tensor to the specified shape. + ## The returned broadcasted Tensor share the underlying data with the input. + ## + ## Dimension(s) of size 1 can be expanded to arbitrary size by replicating + ## values along that dimension. + ## + ## Warning ⚠: + ## This is a no-copy operation, data is shared with the input. + ## This proc does not guarantee that a ``let`` value is immutable. + ## A broadcasted tensor should not be modified and only used for computation. + result = t + result.broadcastT(shape) + +proc unsafeBroadcast2*[T](a, b: Tensor[T]): tuple[a, b: Tensor[T]] {.noSideEffect, deprecated.}= + ## DEPRECATED + ## + ## Broadcast 2 tensors so they have compatible shapes for element-wise computations. + ## + ## Tensors in the tuple can be accessed with output.a and output.b + ## + ## The returned broadcasted Tensors share the underlying data with the input. + ## + ## Dimension(s) of size 1 can be expanded to arbitrary size by replicating + ## values along that dimension. + ## + ## Warning ⚠: + ## This is a no-copy operation, data is shared with the input. + ## This proc does not guarantee that a ``let`` value is immutable. + ## A broadcasted tensor should not be modified and only used for computation. + + broadcast2T(a,b, result) + + shallowCopy(result.a.data, a.data) + shallowCopy(result.b.data, b.data) + + +proc unsafePermute*(t: Tensor, dims: varargs[int]): Tensor {.noInit,noSideEffect, deprecated.}= + ## DEPRECATED + ## + ## Permute dimensions of a tensors + ## Input: + ## - a tensor + ## - the new dimension order + ## Returns: + ## - a tensor with re-order dimension + ## Usage: + ## .. code:: nim + ## a.permute(0,2,1) # dim 0 stays at 0, dim 1 becomes dim 2 and dim 2 becomes dim 1 + ## + ## Warning ⚠: + ## This is a no-copy operation, data is shared with the input. + ## This proc does not guarantee that a ``let`` value is immutable. + ## A broadcasted tensor should not be modified and only used for computation. + + # TODO: bounds check + result = t + permuteT(result, dims) + +proc unsafeSqueeze*(t: Tensor): Tensor {.noInit,noSideEffect, deprecated.}= + ## DEPRECATED + ## + ## Squeeze tensors. For example a Tensor of shape [4,1,3] will become [4,3] + ## Input: + ## - a tensor + ## Returns: + ## - a tensor with singleton dimensions collapsed that share the same underlying storage + ## Warning ⚠: + ## This is a no-copy operation, data is shared with the input. + ## This proc does not guarantee that a ``let`` value is immutable. + result = t + result.squeezeT + +proc unsafeSqueeze*(t: Tensor, axis: int): Tensor {.noInit,noSideEffect, deprecated.}= + ## DEPRECATED + ## + ## Collapse the given axis, if the dimension is not 1; it does nothing + ## Input: + ## - a tensor + ## - an axis (dimension) + ## Returns: + ## - a tensor with singleton dimensions collapsed + ## Warning ⚠: + ## This is a no-copy operation, data is shared with the input. + ## This proc does not guarantee that a ``let`` value is immutable. + result = t + result.squeezeT(axis) + +proc unsafeUnsqueeze*(t: Tensor, axis: int): Tensor {.noInit,noSideEffect, deprecated.}= + ## DEPRECATED + ## + ## Insert a new axis just before the given axis, increasing the tensor + ## dimension (rank) by 1 + ## - a tensor with that new axis + ## WARNING: result share storage with input + ## This does not guarantee `let` variable immutability + result = t + result.unsqueezeT(axis) \ No newline at end of file diff --git a/src/tensor/deprecated/syntactic_sugar_deprecated_0_3_0.nim b/src/tensor/deprecated/syntactic_sugar_deprecated_0_3_0.nim new file mode 100644 index 000000000..affe00236 --- /dev/null +++ b/src/tensor/deprecated/syntactic_sugar_deprecated_0_3_0.nim @@ -0,0 +1,35 @@ +# Copyright 2017 the Arraymancer contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +template unsafeAt*[T](t: Tensor[T], args: varargs[untyped]): untyped {.deprecated.}= + ## DEPRECATED: use `at` instead. + ## + ## Slice a Tensor and collapse singleton dimension. + ## + ## Data is shared between input and output. + ## Input: + ## - a Tensor + ## - and: + ## - specific coordinates (``varargs[int]``) + ## - or a slice (cf. tutorial) + ## Returns: + ## - a value or a view of the Tensor corresponding to the slice + ## Singleton dimension are collapsed + ## Warning ⚠: + ## This is a no-copy operation, data is shared with the input. + ## This proc does not guarantee that a ``let`` value is immutable. + ## Usage: + ## See the ``[]`` macro + t.unsafeSlice(args).unsafeSqueeze \ No newline at end of file diff --git a/src/tensor/exporting.nim b/src/tensor/exporting.nim index 26eaf3b0c..06a77922b 100644 --- a/src/tensor/exporting.nim +++ b/src/tensor/exporting.nim @@ -33,7 +33,7 @@ proc export_tensor*[T](t: Tensor[T]): ## If the tensor was not contiguous (a slice for example), it is reshaped. ## Data is exported in C order (last index changes the fastest, column in 2D case) - let contig_t = t.unsafeContiguous + let contig_t = t.asContiguous result.shape = contig_t.shape result.strides = contig_t.strides diff --git a/src/tensor/fallback/naive_l2_gemv.nim b/src/tensor/fallback/naive_l2_gemv.nim index 247d65db6..75df8366b 100644 --- a/src/tensor/fallback/naive_l2_gemv.nim +++ b/src/tensor/fallback/naive_l2_gemv.nim @@ -47,12 +47,12 @@ proc naive_gemv_fallback*[T: SomeInteger]( # Naive implementation: split the matrices along vertical axis - let cont_A = A.unsafeContiguous(rowMajor, force=true) + let cont_A = A.asContiguous(rowMajor, force=true) # if A is C_contiguous (row-major) slices along the row are also contiguous - # so we can use unsafeReshape and avoid allocation inside the for loop + # so we can use reshape and avoid allocation inside the for loop let colA = cont_A.shape[1] var i: int = 0 for ai in cont_A.axis(0): - y[i] += alpha * dot(ai.unsafeReshape(colA), x) + y[i] += alpha * dot(ai.reshape(colA), x) i += 1 \ No newline at end of file diff --git a/src/tensor/higher_order.nim b/src/tensor/higher_order.nim index 5ae6611a7..44cbd7713 100644 --- a/src/tensor/higher_order.nim +++ b/src/tensor/higher_order.nim @@ -48,7 +48,7 @@ template map_inline*[T](t: Tensor[T], op:untyped): untyped = omp_parallel_blocks(block_offset, block_size, dest.size): for i, x {.inject.} in enumerate(t, block_offset, block_size): data[i] = op - dest.unsafeView() + dest template map2_inline*[T, U](t1: Tensor[T], t2: Tensor[U], op:untyped): untyped = when compileOption("boundChecks"): @@ -61,7 +61,7 @@ template map2_inline*[T, U](t1: Tensor[T], t2: Tensor[U], op:untyped): untyped = omp_parallel_blocks(block_offset, block_size, t1.size): for i, x {.inject.}, y {.inject.} in enumerateZip(t1, t2, block_offset, block_size): data[i] = op - dest.unsafeView() + dest template map3_inline*[T, U, V](t1: Tensor[T], t2: Tensor[U], t3: Tensor[V], op:untyped): untyped = when compileOption("boundChecks"): @@ -76,7 +76,7 @@ template map3_inline*[T, U, V](t1: Tensor[T], t2: Tensor[U], t3: Tensor[V], op:u for i, x {.inject.}, y {.inject.}, z {.inject.} in enumerateZip(t1, t2, t3, block_offset, block_size): data[i] = op - dest.unsafeView() + dest template reduce_inline*[T](t: Tensor[T], op: untyped): untyped = var reduced : T @@ -101,17 +101,17 @@ template reduce_axis_inline*[T](t: Tensor[T], reduction_axis: int, op: untyped): var reduced : type(t) let weight = t.size div t.shape[reduction_axis] omp_parallel_reduce_blocks(reduced, block_offset, block_size, t.shape[reduction_axis], weight, op) do: - x = t.atAxisIndex(reduction_axis, block_offset).unsafeView() + x = t.atAxisIndex(reduction_axis, block_offset).clone() do: for y {.inject.} in t.axis(reduction_axis, block_offset, block_size): op - reduced.unsafeView() + reduced template fold_axis_inline*[T](t: Tensor[T], result_type: typedesc, fold_axis: int, op_initial, op_middle, op_final: untyped): untyped = var reduced : result_type let weight = t.size div t.shape[fold_axis] omp_parallel_reduce_blocks(reduced, block_offset, block_size, t.shape[fold_axis], weight, op_final) do: - let y {.inject.} = t.atAxisIndex(fold_axis, block_offset).unsafeView() + let y {.inject.} = t.atAxisIndex(fold_axis, block_offset) op_initial do: for y {.inject.} in t.axis(fold_axis, block_offset, block_size): @@ -119,7 +119,7 @@ template fold_axis_inline*[T](t: Tensor[T], result_type: typedesc, fold_axis: in # If the result is a Tensor, return without copy when reduced is AnyTensor: - reduced.unsafeView() + reduced else: reduced diff --git a/src/tensor/init_cpu.nim b/src/tensor/init_cpu.nim index 276783bff..1b63faa50 100644 --- a/src/tensor/init_cpu.nim +++ b/src/tensor/init_cpu.nim @@ -22,20 +22,6 @@ import ../private/[functional, nested_containers, sequninit], random, math -proc unsafeView*[T](t: Tensor[T]): Tensor[T] {.noSideEffect,noInit,inline.}= - ## Input: - ## - A tensor - ## Returns: - ## - A shallow copy. Both tensors share the same memory location. - ## - ## Warning ⚠ - ## Both tensors shares the same memory. Data modification on one will be reflected on the other. - ## However modifying the shape, strides or offset will not affect the other. - result.shape = t.shape - result.strides = t.strides - result.offset = t.offset - shallowCopy(result.data, t.data) - proc newTensorUninit*[T](shape: varargs[int]): Tensor[T] {.noSideEffect,noInit, inline.} = ## Creates a new Tensor on Cpu backend ## Input: @@ -95,16 +81,6 @@ proc toTensor*(s:openarray, dummy_bugfix: static[int] = 0 ): auto {.noSideEffect # TODO: remove 'dummy_bugfix' - https://github.com/nim-lang/Nim/issues/6343 toTensorCpu(s) -proc unsafeToTensor*[T: SomeNumber](data: seq[T]): Tensor[T] {.noInit,noSideEffect.} = - ## Convert a seq to a Tensor, sharing the seq data - ## Input: - ## - A seq with the tensor data - ## Result: - ## - A rank 1 tensor with the same size of the input - ## WARNING: result share storage with input - tensorCpu([data.len], result) - shallowCopy(result.data, data) - proc toTensor*(s:string): auto {.noSideEffect.} = ## Convert a string to a Tensor ## diff --git a/src/tensor/init_cpu_copy.nim b/src/tensor/init_cpu_copy.nim index 205187987..9d0150a0c 100644 --- a/src/tensor/init_cpu_copy.nim +++ b/src/tensor/init_cpu_copy.nim @@ -18,7 +18,7 @@ import ./data_structure, # Unfortunately higher_order depends on init_cpu and "clone" depends on higher_order, so we need an extra file # to deal with circular dependencies -proc clone*[T](t: Tensor[T]): Tensor[T] {.noSideEffect,noInit,inline.}= +proc clone*[T](t: Tensor[T]): Tensor[T] {.noSideEffect,noInit.}= ## Input: ## - A tensor ## Returns: diff --git a/src/tensor/init_cuda.nim b/src/tensor/init_cuda.nim index bf9ea5a4a..23e5e85a0 100644 --- a/src/tensor/init_cuda.nim +++ b/src/tensor/init_cuda.nim @@ -21,26 +21,12 @@ import ../private/sequninit, ./init_cpu, nimcuda/[cuda_runtime_api, driver_types] -proc unsafeView*[T](t: CudaTensor[T]): CudaTensor[T] {.inline,noSideEffect.}= - ## Input: - ## - A CudaTensor - ## Returns: - ## - A shallow copy. - ## - ## Warning ⚠ - ## Both tensors shares the same memory. Data modification on one will be reflected on the other. - ## However modifying the shape, strides or offset will not affect the other. - - # shape and strides fields have value semantics by default - # CudaSeq has ref semantics - system.`=`(result, t) - proc clone*[T](t: CudaTensor[T]): CudaTensor[T] {.noInit.}= ## Clone (deep copy) a CudaTensor. ## Copy will not share its data with the original. ## ## Tensor is copied as is. For example it will not be made contiguous. - ## Use `unsafeContiguous` for this case + ## Use `asContiguous` for this case # Note: due to modifying the defaultStream global var for async memcopy # proc cannot be tagged noSideEffect @@ -48,8 +34,8 @@ proc clone*[T](t: CudaTensor[T]): CudaTensor[T] {.noInit.}= result.shape = t.shape result.strides = t.strides result.offset = t.offset - result.data = newCudaSeq[T](t.data.len) - let size = t.data.len * sizeof(T) + result.storage = newCudaStorage[T](t.storage.Flen) + let size = t.storage.Flen * sizeof(T) check cudaMemCpyAsync(result.get_data_ptr, t.get_data_ptr, @@ -57,34 +43,6 @@ proc clone*[T](t: CudaTensor[T]): CudaTensor[T] {.noInit.}= cudaMemcpyDeviceToDevice, defaultStream) # defaultStream is a cudaStream_t global var -# ########################################################### -# Implement value semantics for CudaTensor -# Pending https://github.com/nim-lang/Nim/issues/6348 -# Tracked in https://github.com/mratsim/Arraymancer/issues/19 -# -# proc `=`*[T](dest: var CudaTensor[T]; src: CudaTensor[T]) = -# ## Overloading the assignment operator -# ## It will have value semantics by default -# dest.shape = src.shape -# dest.strides = src.strides -# dest.offset = src.offset -# dest.data = newCudaSeq(src.data.len) -# -# let size = dest.size * sizeof(T) -# -# check cudaMemCpy(dest.get_data_ptr, -# src.get_data_ptr, -# size, -# cudaMemcpyDeviceToDevice) -# echo "Value copied" -# -# proc `=`*[T](dest: var CudaTensor[T]; src: CudaTensor[T]{call}) {.inline.}= -# ## Overloading the assignment operator -# ## Optimized version that knows that -# ## the source CudaTensor is unique and thus don't need to be copied -# system.`=`(result, t) -# echo "Value moved" - proc cuda*[T:SomeReal](t: Tensor[T]): CudaTensor[T] {.noInit.}= ## Convert a tensor on Cpu to a tensor on a Cuda device. # Note: due to modifying the defaultStream global var for async copy @@ -93,7 +51,7 @@ proc cuda*[T:SomeReal](t: Tensor[T]): CudaTensor[T] {.noInit.}= result = newCudaTensor[T](t.shape) # TODO: avoid reordering rowMajor tensors. This is only needed for inplace operation in CUBLAS. - let contig_t = t.unsafeContiguous(colMajor, force = true) + let contig_t = t.asContiguous(colMajor, force = true) let size = csize(result.size * sizeof(T)) # For host to device we use non-blocking copy @@ -113,9 +71,9 @@ proc cpu*[T:SomeReal](t: CudaTensor[T]): Tensor[T] {.noSideEffect, noInit.}= result.shape = t.shape result.strides = t.strides result.offset = t.offset - result.data = newSeqUninit[T](t.data.len) # We copy over all the memory allocated + result.data = newSeqUninit[T](t.storage.Flen) # We copy over all the memory allocated - let size = csize(t.data.len * sizeof(T)) + let size = csize(t.storage.Flen * sizeof(T)) check cudaMemCpy( result.get_data_ptr, t.get_data_ptr, diff --git a/src/tensor/operators_blas_l1_cuda.nim b/src/tensor/operators_blas_l1_cuda.nim index 4a5af0897..95e806fb7 100644 --- a/src/tensor/operators_blas_l1_cuda.nim +++ b/src/tensor/operators_blas_l1_cuda.nim @@ -87,7 +87,7 @@ proc `*=`*[T:SomeReal](t: var CudaTensor[T]; a: T) {.inline.}= # We multiply all elements of the CudaTensor regardless of shape/strides # So this operation can be applied to tensors of all ranks. # Hence we use the whole allocated length and a stride of 1 - cublas_scal(t.data.len, a, t.get_data_ptr, 1) + cublas_scal(t.storage.Flen, a, t.get_data_ptr, 1) proc `*`*[T:SomeReal](a: T, t: CudaTensor[T]): CudaTensor[T] {.noInit, inline.}= ## CudaTensor multiplication by a scalar diff --git a/src/tensor/operators_broadcasted.nim b/src/tensor/operators_broadcasted.nim index b743c77dd..7a86293c3 100644 --- a/src/tensor/operators_broadcasted.nim +++ b/src/tensor/operators_broadcasted.nim @@ -23,12 +23,12 @@ import ./data_structure, proc `.+`*[T: SomeNumber](a, b: Tensor[T]): Tensor[T] {.noInit,inline.} = ## Broadcasted addition for tensors of incompatible but broadcastable shape. - let (tmp_a, tmp_b) = unsafeBroadcast2(a, b) + let (tmp_a, tmp_b) = broadcast2(a, b) result = tmp_a + tmp_b proc `.-`*[T: SomeNumber](a, b: Tensor[T]): Tensor[T] {.noInit,inline.} = ## Broadcasted addition for tensors of incompatible but broadcastable shape. - let (tmp_a, tmp_b) = unsafeBroadcast2(a, b) + let (tmp_a, tmp_b) = broadcast2(a, b) result = tmp_a - tmp_b proc `.*`*[T: SomeNumber](a, b: Tensor[T]): Tensor[T] {.noInit.} = @@ -36,21 +36,21 @@ proc `.*`*[T: SomeNumber](a, b: Tensor[T]): Tensor[T] {.noInit.} = ## ## And broadcasted element-wise multiplication. - let (tmp_a, tmp_b) = unsafeBroadcast2(a, b) + let (tmp_a, tmp_b) = broadcast2(a, b) result = map2_inline(tmp_a, tmp_b, x * y) proc `./`*[T: SomeInteger](a, b: Tensor[T]): Tensor[T] {.noInit.} = ## Tensor element-wise division for integer numbers. ## ## And broadcasted element-wise division. - let (tmp_a, tmp_b) = unsafeBroadcast2(a, b) + let (tmp_a, tmp_b) = broadcast2(a, b) result = map2_inline(tmp_a, tmp_b, x div y) proc `./`*[T: SomeReal](a, b: Tensor[T]): Tensor[T] {.noInit.} = ## Tensor element-wise division for real numbers. ## ## And broadcasted element-wise division. - let (tmp_a, tmp_b) = unsafeBroadcast2(a, b) + let (tmp_a, tmp_b) = broadcast2(a, b) result = map2_inline(tmp_a, tmp_b, x / y ) # ############################################## @@ -62,7 +62,7 @@ proc `.+=`*[T: SomeNumber](a: var Tensor[T], b: Tensor[T]) = ## Only the right hand side tensor can be broadcasted. # shape check done in apply2 proc - let tmp_b = b.unsafeBroadcast(a.shape) + let tmp_b = b.broadcast(a.shape) apply2_inline(a, tmp_b, x + y) proc `.-=`*[T: SomeNumber](a: var Tensor[T], b: Tensor[T]) = @@ -71,7 +71,7 @@ proc `.-=`*[T: SomeNumber](a: var Tensor[T], b: Tensor[T]) = ## Only the right hand side tensor can be broadcasted. # shape check done in apply2 proc - let tmp_b = b.unsafeBroadcast(a.shape) + let tmp_b = b.broadcast(a.shape) apply2_inline(a, tmp_b, x - y) proc `.*=`*[T: SomeNumber](a: var Tensor[T], b: Tensor[T]) = @@ -80,7 +80,7 @@ proc `.*=`*[T: SomeNumber](a: var Tensor[T], b: Tensor[T]) = ## Only the right hand side tensor can be broadcasted # shape check done in apply2 proc - let tmp_b = b.unsafeBroadcast(a.shape) + let tmp_b = b.broadcast(a.shape) apply2_inline(a, tmp_b, x * y) proc `./=`*[T: SomeInteger](a: var Tensor[T], b: Tensor[T]) = @@ -89,7 +89,7 @@ proc `./=`*[T: SomeInteger](a: var Tensor[T], b: Tensor[T]) = ## Only the right hand side tensor can be broadcasted. # shape check done in apply2 proc - let tmp_b = b.unsafeBroadcast(a.shape) + let tmp_b = b.broadcast(a.shape) apply2_inline(a, tmp_b, x div y) proc `./=`*[T: SomeReal](a: var Tensor[T], b: Tensor[T]) = @@ -98,7 +98,7 @@ proc `./=`*[T: SomeReal](a: var Tensor[T], b: Tensor[T]) = ## Only the right hand side tensor can be broadcasted. # shape check done in apply2 proc - let tmp_b = b.unsafeBroadcast(a.shape) + let tmp_b = b.broadcast(a.shape) apply2_inline(a, tmp_b, x / y) diff --git a/src/tensor/operators_broadcasted_cuda.nim b/src/tensor/operators_broadcasted_cuda.nim index a52bb5b99..e1e5cae32 100644 --- a/src/tensor/operators_broadcasted_cuda.nim +++ b/src/tensor/operators_broadcasted_cuda.nim @@ -32,12 +32,12 @@ cuda_binary_glue("cuda_Div", "DivOp", cuda_Div) proc `.+`*[T: SomeReal](a, b: CudaTensor[T]): CudaTensor[T] {.noInit,inline.} = ## Broadcasted addition for tensors of incompatible but broadcastable shape. - let (tmp_a, tmp_b) = unsafeBroadcast2(a, b) + let (tmp_a, tmp_b) = broadcast2(a, b) result = tmp_a + tmp_b proc `.-`*[T: SomeReal](a, b: CudaTensor[T]): CudaTensor[T] {.noInit,inline.} = ## Broadcasted addition for tensors of incompatible but broadcastable shape. - let (tmp_a, tmp_b) = unsafeBroadcast2(a, b) + let (tmp_a, tmp_b) = broadcast2(a, b) result = tmp_a - tmp_b @@ -46,7 +46,7 @@ proc `.*`*[T: SomeReal](a,b: CudaTensor[T]): CudaTensor[T] {.noInit.} = ## ## And broadcasted element-wise multiplication. - let (tmp_a, tmp_b) = unsafeBroadcast2(a, b) + let (tmp_a, tmp_b) = broadcast2(a, b) result = newCudaTensor[T](tmp_a.shape) cuda_binary_call(cuda_Mul, result, tmp_a, tmp_b) @@ -54,7 +54,7 @@ proc `.*`*[T: SomeReal](a,b: CudaTensor[T]): CudaTensor[T] {.noInit.} = proc `./`*[T: SomeReal](a,b: CudaTensor[T]): CudaTensor[T] {.noInit.} = ## CudaTensor substraction - let (tmp_a, tmp_b) = unsafeBroadcast2(a, b) + let (tmp_a, tmp_b) = broadcast2(a, b) result = newCudaTensor[T](tmp_a.shape) cuda_binary_call(cuda_Div, result, tmp_a, tmp_b) @@ -71,7 +71,7 @@ proc `.+=`*[T: SomeReal](a: var CudaTensor[T], b: CudaTensor[T]) = ## Only the right hand side tensor can be broadcasted. # shape check done in apply2 proc - let tmp_b = b.unsafeBroadcast(a.shape) + let tmp_b = b.broadcast(a.shape) a += tmp_b proc `.-=`*[T: SomeReal](a: var CudaTensor[T], b: CudaTensor[T]) = @@ -80,7 +80,7 @@ proc `.-=`*[T: SomeReal](a: var CudaTensor[T], b: CudaTensor[T]) = ## Only the right hand side tensor can be broadcasted. # shape check done in apply2 proc - let tmp_b = b.unsafeBroadcast(a.shape) + let tmp_b = b.broadcast(a.shape) a -= tmp_b proc `.*=`*[T: SomeReal](a: var CudaTensor[T], b: CudaTensor[T]) = @@ -89,7 +89,7 @@ proc `.*=`*[T: SomeReal](a: var CudaTensor[T], b: CudaTensor[T]) = ## Only the right hand side tensor can be broadcasted # shape check done in apply2 proc - let tmp_b = b.unsafeBroadcast(a.shape) + let tmp_b = b.broadcast(a.shape) cuda_assign_call(cuda_mMulOp, a, tmp_b) proc `./=`*[T: SomeReal](a: var CudaTensor[T], b: CudaTensor[T]) = @@ -98,7 +98,7 @@ proc `./=`*[T: SomeReal](a: var CudaTensor[T], b: CudaTensor[T]) = ## Only the right hand side tensor can be broadcasted. # shape check done in apply2 proc - let tmp_b = b.unsafeBroadcast(a.shape) + let tmp_b = b.broadcast(a.shape) cuda_assign_call(cuda_mDivOp, a, tmp_b) # ############################################## diff --git a/src/tensor/optim_move.nim b/src/tensor/optim_move.nim deleted file mode 100644 index 2a10075fd..000000000 --- a/src/tensor/optim_move.nim +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright 2017 Mamy André-Ratsimbazafy -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import ./data_structure, - ./shapeshifting - - -# TODO, find a way to test that that moves works properly (global counter for testing?) - -template asContiguous*[T](t: Tensor[T]{call}, layout: OrderType = rowMajor, force: bool = false): Tensor[T] = - ## Transform a tensor with general striding to a Tensor with contiguous layout. - ## - ## By default tensor will be rowMajor. - ## - ## By default nothing is done if the tensor is already contiguous (C Major or F major) - ## The "force" parameter can force re-ordering to a specific layout - ## - ## This is a move optimization for function chaining to avoid copying value returned by the previous function - - unsafeContiguous(t, layout, force) - -template permute*[T](t: Tensor[T]{call}, dims: varargs[int]): Tensor[T] = - ## Permute dimensions of a tensors - ## Input: - ## - a tensor - ## - the new dimension order - ## Returns: - ## - a tensor with re-order dimension - ## Usage: - ## .. code:: nim - ## a.permute(0,2,1) # dim 0 stays at 0, dim 1 becomes dim 2 and dim 2 becomes dim 1 - ## - ## This is a move optimization for function chaining to avoid copying value returned by the previous function - unsafePermute(t, dims) \ No newline at end of file diff --git a/src/tensor/optim_ops_fusion.nim b/src/tensor/optim_ops_fusion.nim index c90519e74..04eec4405 100644 --- a/src/tensor/optim_ops_fusion.nim +++ b/src/tensor/optim_ops_fusion.nim @@ -1,4 +1,4 @@ -# Copyright 2017 Mamy André-Ratsimbazafy +# Copyright 2017 the Arraymancer contributors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -112,19 +112,3 @@ template rewriteToTensorReshape*{reshape(toTensor(oa, dummy_bugfix), shape)}( ## Operation fusion leverage the Nim compiler and should not be called explicitly. toTensorReshape(oa, shape, dummy_bugfix) -proc unsafeToTensorReshape*[T](data: seq[T], shape: varargs[int]): Tensor[T] {.noSideEffect.} = - ## Fuse unsafeToTensor and unsafeReshape in one operation - - when compileOption("boundChecks"): - check_nested_elements(shape.toMetadataArray, data.len) - - tensorCpu(shape, result) - shallowCopy(result.data, data) - -template rewriteUnsafeToTensorReshape*{unsafeReshape(unsafeToTensor(s), shape)}( - s: seq, - shape: varargs[int]): auto = - ## Fuse ``sequence.unsafeToTensor().unsafeReshape(new_shape)`` into a single operation. - ## - ## Operation fusion leverage the Nim compiler and should not be called explicitly. - unsafeToTensorReshape(s, shape, dummy_bugfix) diff --git a/src/tensor/private/p_accessors.nim b/src/tensor/private/p_accessors.nim index d5b8e69ec..5a4d4343b 100644 --- a/src/tensor/private/p_accessors.nim +++ b/src/tensor/private/p_accessors.nim @@ -98,8 +98,9 @@ type template initStridedIteration*(coord, backstrides, iter_pos: untyped, t, iter_offset, iter_size: typed): untyped = ## Iterator init var iter_pos = 0 - var coord {.noInit.}: array[MAXRANK, int] - var backstrides {.noInit.}: array[MAXRANK, int] + withMemoryOptimHints() # MAXRANK = 8, 8 ints = 64 Bytes, cache line = 64 Bytes --> profit ! + var coord {.align64, noInit.}: array[MAXRANK, int] + var backstrides {.align64, noInit.}: array[MAXRANK, int] for i in 0.. x+1) # out of place check: tmp1 == t2 - var tmp2 = t[_,2] + var tmp2 = t[_,2].clone proc plus_one[T](x: var T) = x += 1 tmp2.apply(plus_one) # in-place diff --git a/tests/tensor/test_operators_blas.nim b/tests/tensor/test_operators_blas.nim index f25d2239e..a6b780b77 100644 --- a/tests/tensor/test_operators_blas.nim +++ b/tests/tensor/test_operators_blas.nim @@ -269,9 +269,9 @@ suite "BLAS (Basic Linear Algebra Subprograms)": # [-56, -2]] * [-91]] # http://www.calcul.com/show/calculator/matrix-multiplication?matrix1=[[%2253%22,%22-70%22],[%22-56%22,%22-2%22]]&matrix2=[[%2269%22],[%2281%22]]&operator=* - let b2 = b.unsafeSlice(4..2|-2, 3..1|-2) + let b2 = b[4..2|-2, 3..1|-2] - let u2 = u.unsafeSlice(2..0|-2) + let u2 = u[2..0|-2] check: b2*u2 == [10027, -3682].toTensor() @@ -282,7 +282,7 @@ suite "BLAS (Basic Linear Algebra Subprograms)": - let b3 = b.unsafeSlice(2..3, 3..1|-2) + let b3 = b[2..3, 3..1|-2] check: b3*u2 == [-3682, -4783].toTensor diff --git a/tests/tensor/test_operators_blas_cuda.nim b/tests/tensor/test_operators_blas_cuda.nim index bea7acd7d..8c4c19180 100644 --- a/tests/tensor/test_operators_blas_cuda.nim +++ b/tests/tensor/test_operators_blas_cuda.nim @@ -219,7 +219,7 @@ suite "CUDA CuBLAS backend (Basic Linear Algebra Subprograms)": test "Addition-Substraction - slices": let a = @[@[1.0,2,3],@[4.0,5,6], @[7.0,8,9]].toTensor().cuda - let a_t = a.unsafeTranspose() + let a_t = a.transpose() check: (a[0..1, 0..1] + a_t[0..1, 0..1]).cpu == [[2.0, 6], [6.0, 10]].toTensor() check: (a[1..2, 1..2] - a_t[1..2, 1..2]).cpu == [[0.0, -2], [2.0, 0]].toTensor() diff --git a/tests/tensor/test_shapeshifting.nim b/tests/tensor/test_shapeshifting.nim index d737533c7..03fb2ced2 100644 --- a/tests/tensor/test_shapeshifting.nim +++ b/tests/tensor/test_shapeshifting.nim @@ -54,7 +54,7 @@ suite "Shapeshifting": test "Unsafe reshape": block: let a = toSeq(1..4).toTensor() - var a_view = a.unsafeReshape(2,2) + var a_view = a.reshape(2,2) check: a_view == [[1,2],[3,4]].toTensor() a_view[_, _] = 0 check: a == [0,0,0,0].toTensor() @@ -62,10 +62,10 @@ suite "Shapeshifting": # on slices block: # not that 'a' here a let variable, however - # unsafeView and unsafeReshape allow us to + # unsafeView and reshape allow us to # modify its elements value let a = toSeq(1..4).toTensor() - var a_view = a.unsafeSlice(1..2).unsafeReshape(1,2) + var a_view = a[1..2].reshape(1,2) check: a_view == [[2,3]].toTensor() a_view[_, _] = 0 check: a == [1,0,0,4].toTensor() diff --git a/tests/tensor/test_shapeshifting_cuda.nim b/tests/tensor/test_shapeshifting_cuda.nim index 3017c3254..815da9035 100644 --- a/tests/tensor/test_shapeshifting_cuda.nim +++ b/tests/tensor/test_shapeshifting_cuda.nim @@ -27,17 +27,17 @@ suite "CUDA: Shapeshifting": # |8 1 6 2 6 6| # |2 0 4 3 2 0| - let b = a.unsafeContiguous() + let b = a.asContiguous() check: b.cpu.toRawSeq == @[7.0, 8, 2, 4, 1, 0, 3, 6, 4, 1, 2, 3, 8, 6, 2, 6, 6, 0] # a is already contiguous, even if wrong layout. # Nothing should be done - let c = a.unsafeContiguous(colMajor) + let c = a.asContiguous(colMajor) check: c.cpu.toRawSeq == @[7.0, 8, 2, 4, 1, 0, 3, 6, 4, 1, 2, 3, 8, 6, 2, 6, 6, 0] # force parameter has been used. # Layout will change even if a was contiguous - let d = a.unsafeContiguous(colMajor, force = true) + let d = a.asContiguous(colMajor, force = true) check: d.cpu.toRawSeq == @[7.0, 8, 2, 4, 1, 0, 3, 6, 4, 1, 2, 3, 8, 6, 2, 6, 6, 0] @@ -46,12 +46,12 @@ suite "CUDA: Shapeshifting": check: u.cpu.toRawSeq == @[7.0, 8, 2, 4, 1, 0, 3, 6, 4, 1, 2, 3, 8, 6, 2, 6, 6, 0] check: u.cpu == [7.0,4,8,1,2,0].toTensor.reshape([3,2]) - check: u.unsafeContiguous(rowMajor, force=true).cpu.toRawSeq == @[7.0,4,8,1,2,0] + check: u.asContiguous(rowMajor, force=true).cpu.toRawSeq == @[7.0,4,8,1,2,0] test "Unsafe reshape": block: let a = toSeq(1..4).toTensor().astype(float).cuda - var a_view = a.unsafeReshape(2,2) + var a_view = a.reshape(2,2) check: a_view.cpu == [[1.0,2],[3.0,4]].toTensor() # TODO @@ -61,10 +61,10 @@ suite "CUDA: Shapeshifting": # on slices block: # not that 'a' here a let variable, however - # unsafeView and unsafeReshape allow us to + # unsafeView and reshape allow us to # modify its elements value let a = toSeq(1..4).toTensor().astype(float).cuda - var a_view = a[1..2].unsafeReshape(1,2) # a[1..2] == a.unsafeSlice(1..2) for CudaTensors + var a_view = a[1..2].reshape(1,2) check: a_view.cpu == [[2.0,3]].toTensor() # TODO: pending slice assignation diff --git a/tests/tests_cpu_deprecated.nim b/tests/tests_cpu_deprecated.nim index 22839e666..0f9dd23f6 100644 --- a/tests/tests_cpu_deprecated.nim +++ b/tests/tests_cpu_deprecated.nim @@ -13,12 +13,12 @@ # limitations under the License. import ../src/arraymancer, - ./tensor/test_init_deprecated, - ./tensor/test_comparison_deprecated, - ./tensor/test_accessors_deprecated, - ./tensor/test_accessors_slicer_deprecated, - ./tensor/test_display_deprecated, - ./tensor/test_operators_blas_deprecated, - ./tensor/test_aggregate_deprecated, - ./tensor/test_shapeshifting_deprecated, - ./tensor/test_ufunc_deprecated + ./tensor/deprecated/test_init_deprecated, + ./tensor/deprecated/test_comparison_deprecated, + ./tensor/deprecated/test_accessors_deprecated, + ./tensor/deprecated/test_accessors_slicer_deprecated, + ./tensor/deprecated/test_display_deprecated, + ./tensor/deprecated/test_operators_blas_deprecated, + ./tensor/deprecated/test_aggregate_deprecated, + ./tensor/deprecated/test_shapeshifting_deprecated, + ./tensor/deprecated/test_ufunc_deprecated