diff --git a/docs/source/sumtensor.rst b/docs/source/sumtensor.rst index 4851133c..fcaf8d00 100644 --- a/docs/source/sumtensor.rst +++ b/docs/source/sumtensor.rst @@ -1,2 +1,8 @@ pyttb.sumtensor -======================= \ No newline at end of file +----------------------- + +.. autoclass:: pyttb.sumtensor + :members: + :special-members: + :exclude-members: __dict__, __weakref__, __slots__, __init__ + :show-inheritance: \ No newline at end of file diff --git a/docs/source/tensor_classes.rst b/docs/source/tensor_classes.rst index 70d93643..f9c17802 100644 --- a/docs/source/tensor_classes.rst +++ b/docs/source/tensor_classes.rst @@ -6,6 +6,7 @@ Tensor Classes ktensor.rst sptensor.rst + sumtensor.rst tensor.rst ttensor.rst tenmat.rst diff --git a/docs/source/tutorial/class_sumtensor.ipynb b/docs/source/tutorial/class_sumtensor.ipynb new file mode 100644 index 00000000..a554999d --- /dev/null +++ b/docs/source/tutorial/class_sumtensor.ipynb @@ -0,0 +1,346 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a2683b4e", + "metadata": {}, + "source": [ + "# Sum of Structured Tensors\n", + "```\n", + "Copyright 2022 National Technology & Engineering Solutions of Sandia,\n", + "LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the\n", + "U.S. Government retains certain rights in this software.\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "6c96f0b4", + "metadata": {}, + "source": [ + "When certain operations are performed on a tensor which is formed as a sum of tensors, it can be beneficial to avoid explicitly forming the sum. For example, if a tensor is formed as a sum of a low rank tensor and a sparse tensor, the structure of the summands can make storage, decomposition and operations with other tensors significantly more efficient. A `sumtensor` exploits this structure. Here we explain the basics of defining and using sumtensors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c2ad211", + "metadata": {}, + "outputs": [], + "source": [ + "import pyttb as ttb\n", + "import numpy as np\n", + "import pickle" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a808d286", + "metadata": {}, + "outputs": [], + "source": [ + "def estimate_mem_usage(variable) -> int:\n", + " \"\"\"\n", + " Python variables contain references to memory.\n", + " Quickly estimate memory usage of custom types.\n", + " \"\"\"\n", + " return len(pickle.dumps(variable))" + ] + }, + { + "cell_type": "markdown", + "id": "d169e48b", + "metadata": {}, + "source": [ + "## Creating sumtensors\n", + "A sumtensor `T` can only be delared as a sum of same-shaped tensors T1, T2,...,TN. The summand tensors are stored internally, which define the \"parts\" of the `sumtensor`. The parts of a `sumtensor` can be (dense) tensors (`tensor`), sparse tensors (` sptensor`), Kruskal tensors (`ktensor`), or Tucker tensors (`ttensor`). An example of the use of the sumtensor constructor follows." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75797e16", + "metadata": {}, + "outputs": [], + "source": [ + "T1 = ttb.tenones((3, 3, 3))\n", + "T2 = ttb.sptensor(\n", + " subs=np.array([[0, 0, 0], [1, 1, 1], [2, 2, 1], [1, 0, 0]]),\n", + " vals=np.ones((4, 1)),\n", + " shape=(3, 3, 3),\n", + ")\n", + "T = ttb.sumtensor([T1, T2])\n", + "print(T)" + ] + }, + { + "cell_type": "markdown", + "id": "0a17f6a8", + "metadata": {}, + "source": [ + "## A Magnitude Example\n", + "For large-scale problems, the `sumtensor` class may make the difference as to whether or not a tensor can be stored in memory. Consider the following example, where $\\mathcal{T}$ is of size $1000 x 1000 x 1000$, formed from the sum of a `ktensor` and an `sptensor`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1215e3d2", + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(0)\n", + "X1 = np.random.rand(50, 3)\n", + "X2 = np.random.rand(50, 3)\n", + "X3 = np.random.rand(50, 3)\n", + "K = ttb.ktensor([X1, X2, X3], np.ones((3,)), copy=False)\n", + "S = ttb.sptenrand((50, 50, 50), 1e-100)\n", + "\n", + "ST = ttb.sumtensor([K, S])\n", + "TT = ST.full()\n", + "print(\n", + " f\"Size of sumtensor: {estimate_mem_usage(ST)}\\n\"\n", + " f\"Size of tensor: {estimate_mem_usage(TT)}\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "9650dccc", + "metadata": {}, + "source": [ + "## Further examples of the sumtensor constructor\n", + "We can declare an empty sumtensor, with no parts." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36ea6eb5", + "metadata": {}, + "outputs": [], + "source": [ + "P = ttb.sumtensor()\n", + "print(P)" + ] + }, + { + "cell_type": "markdown", + "id": "346374a7", + "metadata": {}, + "source": [ + "`sumtensor` also supports a copy constructor." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f309064", + "metadata": {}, + "outputs": [], + "source": [ + "S = P.copy()\n", + "print(S)" + ] + }, + { + "cell_type": "markdown", + "id": "c28b7cc6", + "metadata": {}, + "source": [ + "## Ndims and shape for dimensions of a sumtensor\n", + "For a given `sumtensor`, `ndims` returns the number of modes and `shape` returns the shape in each dimension of the `sumtensor`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e484181", + "metadata": {}, + "outputs": [], + "source": [ + "print(f\"Ndims: {T.ndims}\\n\" f\"Shape: {T.shape}\")" + ] + }, + { + "cell_type": "markdown", + "id": "ec82fc57", + "metadata": {}, + "source": [ + "## Use full to convert sumtensor to a dense tensor\n", + "The `full` method can convert all the parts of a `sumtensor` to a dense tensor. Note that for large tensors, this can use a large amount of memory to expand then sum the parts." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d247884d", + "metadata": {}, + "outputs": [], + "source": [ + "print(T.full())" + ] + }, + { + "cell_type": "markdown", + "id": "b1cfbffa", + "metadata": {}, + "source": [ + "## Use double to convert to a numpy array\n", + "The `double` method can convert the parts of a `sumtensor` to a dense numpy array. Similar warnings for memory usages as `full`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3be2ba03", + "metadata": {}, + "outputs": [], + "source": [ + "print(T.double())" + ] + }, + { + "cell_type": "markdown", + "id": "bd9f0550", + "metadata": {}, + "source": [ + "## Matricized Khatri-Rao product of a sumtensor\n", + "The `mttkrp` method computes the Khatri-Rao product of a matricized tensor and `sumtensor`. The required arguments are:\n", + "* A list of matrices (or a `ktensor`)\n", + "* A mode n\n", + "\n", + "The list of matrices must consist of m matrices, where m is the number of modes in the `sumtensor`. The number of columns in all matrices should be the same and the number of rows of matrix i should match the dimension of the `sumtensor` shape in mode i. For more details see the documentation of `tensor.mttkrp`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3dd198fa", + "metadata": {}, + "outputs": [], + "source": [ + "matrices = [np.eye(3), np.ones((3, 3)), np.random.rand(3, 3)]\n", + "n = 1\n", + "T.mttkrp(matrices, n)" + ] + }, + { + "cell_type": "markdown", + "id": "dd35fc03", + "metadata": {}, + "source": [ + "## Innerproducts of sumtensors\n", + "The `innerprod` method computes the inner product of a `sumtensors` parts with other tensor types." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dfa6d728", + "metadata": {}, + "outputs": [], + "source": [ + "S = ttb.sptensor(\n", + " subs=np.array([[0, 0, 0], [1, 1, 1], [2, 2, 1], [1, 0, 0]]),\n", + " vals=np.ones((4, 1)),\n", + " shape=(3, 3, 3),\n", + ")\n", + "T.innerprod(S)" + ] + }, + { + "cell_type": "markdown", + "id": "802902bb", + "metadata": {}, + "source": [ + "## Norm compatibility interface\n", + "The `norm` method just returns 0 and issues a warning. Norm cannot be distributed, but some algorithms access the norm for verbose details." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f945dea9", + "metadata": {}, + "outputs": [], + "source": [ + "T.norm()" + ] + }, + { + "cell_type": "markdown", + "id": "4647329a", + "metadata": {}, + "source": [ + "## Use CP-ALS with sumtensor\n", + "One of the primary motivations for defining the `sumtensor` class is for efficient decomposition. In particular, when trying to find a CP decomposition of a tensor using alternating least squares, the subproblems can be efficiently created and solved using mttkrp and innerprod. Both of these operations can be performed more efficiently by exploiting extra structure in the tensors which form the sum, so the performance of `cp_als` is also improved. Consider the following example, where a cp_als is run on a sumtensor." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8d980252", + "metadata": {}, + "outputs": [], + "source": [ + "result, _, _ = ttb.cp_als(T, 2, maxiters=10)\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "id": "b93b33f9", + "metadata": {}, + "source": [ + "It follows that in cases where $\\mathcal{T}$ is too large for its full expansion to be stored in memory, we may still be able find a CP decomposition by exploiting the sumtensor structure.\n", + "\n", + "_Note_ that the fit returned by `cp_als` is not correct for `sumtensor`, because the norm operation is not supported." + ] + }, + { + "cell_type": "markdown", + "id": "3b3793eb", + "metadata": {}, + "source": [ + "## Addition with sumtensors\n", + "Sumtensors can be added to any other type of tensor. The result is a new `sumtensor` with the tensor appended to the parts of the original `sumtensor`. Note that the tensor is always appended, despite the order of the operation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3514aa4b", + "metadata": {}, + "outputs": [], + "source": [ + "# Equivalent additions despite the order\n", + "print(f\"T+S:\\n{T+S}\\n\")\n", + "print(f\"S+T:\\n{S+T}\")" + ] + }, + { + "cell_type": "markdown", + "id": "bd401c2d", + "metadata": {}, + "source": [ + "## Accessing sumtensor parts\n", + "Subscripted reference can be used to access individual parts of the `sumtensor`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "47938e4f", + "metadata": {}, + "outputs": [], + "source": [ + "print(f\"Part 0:\\n{T.parts[0]}\\n\\n\" f\"Part 1:\\n{T.parts[1]}\")" + ] + } + ], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pyttb/cp_als.py b/pyttb/cp_als.py index ca263031..c7ae2c67 100644 --- a/pyttb/cp_als.py +++ b/pyttb/cp_als.py @@ -12,7 +12,7 @@ def cp_als( # noqa: PLR0912,PLR0913,PLR0915 - input_tensor: Union[ttb.tensor, ttb.sptensor, ttb.ttensor], + input_tensor: Union[ttb.tensor, ttb.sptensor, ttb.ttensor, ttb.sumtensor], rank: int, stoptol: float = 1e-4, maxiters: int = 1000, @@ -156,6 +156,9 @@ def cp_als( # noqa: PLR0912,PLR0913,PLR0915 ) init = ttb.ktensor(factor_matrices) elif isinstance(init, str) and init.lower() == "nvecs": + assert not isinstance( + input_tensor, ttb.sumtensor + ), "Sumtensor doesn't support nvecs" factor_matrices = [] for n in range(N): factor_matrices.append(input_tensor.nvecs(n, rank)) diff --git a/pyttb/ktensor.py b/pyttb/ktensor.py index 46f8537c..986ca688 100644 --- a/pyttb/ktensor.py +++ b/pyttb/ktensor.py @@ -24,7 +24,13 @@ from typing_extensions import Self import pyttb as ttb -from pyttb.pyttb_utils import isrow, isvector, tt_dimscheck, tt_ind2sub +from pyttb.pyttb_utils import ( + get_mttkrp_factors, + isrow, + isvector, + tt_dimscheck, + tt_ind2sub, +) class ktensor: @@ -1080,7 +1086,7 @@ def mask(self, W: Union[ttb.tensor, ttb.sptensor]) -> np.ndarray: vals = vals + tmpvals return vals - def mttkrp(self, U: List[np.ndarray], n: int) -> np.ndarray: + def mttkrp(self, U: Union[ktensor, List[np.ndarray]], n: int) -> np.ndarray: """ Matricized tensor times Khatri-Rao product for :class:`pyttb.ktensor`. @@ -1107,11 +1113,7 @@ def mttkrp(self, U: List[np.ndarray], n: int) -> np.ndarray: [[24. 24.] [24. 24.]] """ - if not isinstance(U, list): - assert False, "Second argument must be list of numpy.ndarray's" - - if len(U) != self.ndims: - assert False, "List of factor matrices is the wrong length" + U = get_mttkrp_factors(U, n, self.ndims) # Number of columns in input matrices if n == 0: @@ -2142,7 +2144,8 @@ def __add__(self, other): ------- :class:`pyttb.ktensor` """ - # TODO include test of other as sumtensor and call sumtensor.__add__ + if isinstance(other, ttb.sumtensor): + return other.__add__(self) if not isinstance(other, ktensor): assert False, "Cannot add instance of this type to a ktensor" diff --git a/pyttb/pyttb_utils.py b/pyttb/pyttb_utils.py index e7414642..06ea875f 100644 --- a/pyttb/pyttb_utils.py +++ b/pyttb/pyttb_utils.py @@ -6,7 +6,7 @@ from enum import Enum from inspect import signature -from typing import Optional, Tuple, Union, get_args, overload +from typing import List, Optional, Tuple, Union, get_args, overload import numpy as np @@ -856,3 +856,27 @@ def get_index_variant(indices: IndexType) -> IndexVariant: if len(key.shape) == 1 or key.shape[1] == 1: variant = IndexVariant.LINEAR return variant + + +def get_mttkrp_factors( + U: Union[ttb.ktensor, List[np.ndarray]], n: int, ndims: int +) -> List[np.ndarray]: + """Apply standard checks and type conversions for mttkrp factors""" + if isinstance(U, ttb.ktensor): + U = U.copy() + # Absorb lambda into one of the factors but not the one that is skipped + if n == 0: + U.redistribute(1) + else: + U.redistribute(0) + + # Extract the factor matrices + U = U.factor_matrices + + assert isinstance( + U, (list, np.ndarray) + ), "Second argument must be list of numpy.ndarray's or a ktensor" + + assert len(U) == ndims, "List of factor matrices is the wrong length" + + return U diff --git a/pyttb/sptensor.py b/pyttb/sptensor.py index 5ba774ee..da044af0 100644 --- a/pyttb/sptensor.py +++ b/pyttb/sptensor.py @@ -30,6 +30,7 @@ from pyttb.pyttb_utils import ( IndexVariant, get_index_variant, + get_mttkrp_factors, tt_dimscheck, tt_ind2sub, tt_intersect_rows, @@ -889,23 +890,7 @@ def mttkrp(self, U: Union[ttb.ktensor, List[np.ndarray]], n: int) -> np.ndarray: # In the sparse case, it is most efficient to do a series of TTV operations # rather than forming the Khatri-Rao product. - N = self.ndims - - if isinstance(U, ttb.ktensor): - # Absorb lambda into one of the factors but not the one that is skipped - if n == 0: - U.redistribute(1) - else: - U.redistribute(0) - - # Extract the factor matrices - U = U.factor_matrices - - if not isinstance(U, np.ndarray) and not isinstance(U, list): - assert False, "Second argument must be ktensor or array" - - if len(U) != N: - assert False, "List is the wrong length" + U = get_mttkrp_factors(U, n, self.ndims) if n == 0: R = U[1].shape[1] @@ -916,13 +901,18 @@ def mttkrp(self, U: Union[ttb.ktensor, List[np.ndarray]], n: int) -> np.ndarray: for r in range(R): # Set up list with appropriate vectors for ttv multiplication Z = [] - for i in range(N): + for i in range(self.ndims): if i != n: Z.append(U[i][:, r]) else: Z.append(np.array([])) # Perform ttv multiplication - V[:, r] = self.ttv(Z, exclude_dims=n).double() + ttv = self.ttv(Z, exclude_dims=n) + # TODO is is possible to hit the float condition here? + if isinstance(ttv, float): # pragma: no cover + V[:, r] = ttv + else: + V[:, r] = ttv.double() return V @@ -1226,7 +1216,7 @@ def ttv( # noqa: PLR0912 vector: Union[np.ndarray, List[np.ndarray]], dims: Optional[Union[int, np.ndarray]] = None, exclude_dims: Optional[Union[int, np.ndarray]] = None, - ) -> Union[sptensor, ttb.tensor]: + ) -> Union[sptensor, ttb.tensor, float]: """ Sparse tensor times vector @@ -2017,7 +2007,7 @@ def __add__(self, other): :class:`pyttb.sptensor` """ # If other is sumtensor perform sumtensor add - if isinstance(other, ttb.sumtensor): # pragma: no cover + if isinstance(other, ttb.sumtensor): return other.__add__(self) # Otherwise return negated sub return self.__sub__(-other) diff --git a/pyttb/sumtensor.py b/pyttb/sumtensor.py index dd9d5373..4dbf8043 100644 --- a/pyttb/sumtensor.py +++ b/pyttb/sumtensor.py @@ -2,6 +2,16 @@ # Copyright 2022 National Technology & Engineering Solutions of Sandia, # LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # U.S. Government retains certain rights in this software. +from __future__ import annotations + +import warnings +from copy import deepcopy +from textwrap import indent +from typing import List, Optional, Tuple, Union + +import numpy as np + +import pyttb as ttb class sumtensor: @@ -10,5 +20,410 @@ class sumtensor: """ - def __init__(self): - assert False, "SUMTENSOR class not yet implemented" + def __init__( + self, + tensors: Optional[ + List[Union[ttb.tensor, ttb.sptensor, ttb.ktensor, ttb.ttensor]] + ] = None, + copy: bool = True, + ): + """ + Creates a :class:`pyttb.sumtensor` from a collection of tensors. + Each provided tensor is explicitly retained. All provided tensors + must have the same shape but can be combinations of types. + + Parameters + ---------- + tensors: + Tensor source data. + copy: + Whether to make a copy of provided data or just reference it. + + Examples + ------- + Create an empty :class:`pyttb.tensor`: + + >>> T1 = ttb.tenones((3,4,5)) + >>> T2 = ttb.sptensor(shape=(3,4,5)) + >>> S = ttb.sumtensor([T1, T2]) + """ + if tensors is None: + tensors = [] + assert isinstance(tensors, list), ( + "Collection of tensors must be provided as a list " + f"but received: {type(tensors)}" + ) + assert all( + tensors[0].shape == tensor_i.shape for tensor_i in tensors[1:] + ), "All tensors must be the same shape" + if copy: + tensors = deepcopy(tensors) + self.parts = tensors + + def copy(self) -> sumtensor: + """Make a deep copy of a :class:`pyttb.sumtensor`. + + Returns + ------- + Copy of original sumtensor. + + Examples + -------- + >>> T1 = ttb.tensor(np.ones((3,2))) + >>> S1 = ttb.sumtensor([T1, T1]) + >>> S2 = S1 + >>> S3 = S2.copy() + >>> S1.parts[0][0,0] = 3 + >>> S1.parts[0][0,0] == S2.parts[0][0,0] + True + >>> S1.parts[0][0,0] == S3.parts[0][0,0] + False + """ + return ttb.sumtensor(self.parts, copy=True) + + def __deepcopy__(self, memo): + return self.copy() + + @property + def shape(self) -> Tuple[int, ...]: + if len(self.parts) == 0: + return () + return self.parts[0].shape + + def __repr__(self): + """ + String representation of the sumtensor. + + Returns + ------- + String displaying shape and constituent parts. + + Examples + -------- + >>> T1 = ttb.tenones((2,2)) + >>> T2 = ttb.sptensor(shape=(2,2)) + >>> ttb.sumtensor([T1, T2]) # doctest: +NORMALIZE_WHITESPACE + sumtensor of shape (2, 2) with 2 parts: + Part 0: + tensor of shape (2, 2) + data[:, :] = + [[1. 1.] + [1. 1.]] + Part 1: + All-zero sparse tensor of shape 2 x 2 + """ + if len(self.parts) == 0: + return "Empty sumtensor" + s = f"sumtensor of shape {self.shape} with {len(self.parts)} parts:" + for i, part in enumerate(self.parts): + s += f"\nPart {i}: \n" + s += indent(str(part), prefix="\t") + return s + + __str__ = __repr__ + + @property + def ndims(self) -> int: + """ + Number of dimensions of the sumtensor. + + Examples + -------- + >>> T1 = ttb.tenones((2, 2)) + >>> S = ttb.sumtensor([T1,T1]) + >>> S.ndims + 2 + """ + return self.parts[0].ndims + + def __pos__(self): + """ + Unary plus (+) for tensors. + + Returns + ------- + Copy of sumtensor. + + Examples + -------- + >>> T = ttb.tensor(np.array([[1, 2], [3, 4]])) + >>> S = ttb.sumtensor([T, T]) + >>> S2 = +S + """ + return self.copy() + + def __neg__(self): + """ + Unary minus (-) for tensors. + + Returns + ------- + Copy of negated sumtensor. + + Examples + -------- + >>> T = ttb.tensor(np.array([[1, 2], [3, 4]])) + >>> S = ttb.sumtensor([T, T]) + >>> S2 = -S + >>> S2.parts[0].isequal(-1 * S.parts[0]) + True + """ + return ttb.sumtensor([-part for part in self.parts], copy=False) + + def __add__(self, other): + """ + Binary addition (+) for sumtensors. + + Parameters + ---------- + other: :class:`pyttb.tensor`, :class:`pyttb.sptensor` + :class:`pyttb.ktensor`, :class:`pyttb.ttensor`, or list + containing those classes + + Returns + ------- + :class:`pyttb.sumtensor` + + Examples + -------- + >>> T = ttb.tenones((2,2)) + >>> S = ttb.sumtensor([T]) + >>> len(S.parts) + 1 + >>> S2 = S + T + >>> len(S2.parts) + 2 + >>> S3 = S2 + [T, T] + >>> len(S3.parts) + 4 + """ + updated_parts = self.parts.copy() + if isinstance(other, (ttb.tensor, ttb.sptensor, ttb.ktensor, ttb.ttensor)): + updated_parts.append(other) + elif isinstance(other, list) and all( + isinstance(part, (ttb.tensor, ttb.sptensor, ttb.ktensor, ttb.ttensor)) + for part in other + ): + updated_parts.extend(other) + else: + raise TypeError( + "Sumtensor only supports collections of tensor, sptensor, ktensor, " + f"and ttensor but received: {type(other)}" + ) + return ttb.sumtensor(updated_parts, copy=False) + + def __radd__(self, other): + """ + Right Binary addition (+) for sumtensors. + + Parameters + ---------- + other: :class:`pyttb.tensor`, :class:`pyttb.sptensor` + :class:`pyttb.ktensor`, :class:`pyttb.ttensor`, or list + containing those classes + + Returns + ------- + :class:`pyttb.sumtensor` + + Examples + -------- + >>> T = ttb.tenones((2,2)) + >>> S = ttb.sumtensor([T]) + >>> len(S.parts) + 1 + >>> S2 = T + S + >>> len(S2.parts) + 2 + >>> S3 = [T, T] + S2 + >>> len(S3.parts) + 4 + """ + return self.__add__(other) + + def full(self) -> ttb.tensor: + """ + Convert a :class:`pyttb.sumtensor` to a :class:`pyttb.tensor`. + + Returns + ------- + Re-assembled dense tensor. + + Examples + -------- + >>> T = ttb.tenones((2,2)) + >>> S = ttb.sumtensor([T, T]) + >>> print(S.full()) # doctest: +NORMALIZE_WHITESPACE + tensor of shape (2, 2) + data[:, :] = + [[2. 2.] + [2. 2.]] + + """ + result = self.parts[0].full() + for part in self.parts[1:]: + result += part + return result + + def double(self) -> np.ndarray: + """ + Convert `:class:pyttb.tensor` to an `:class:numpy.ndarray` of doubles. + + Returns + ------- + Copy of tensor data. + + Examples + -------- + >>> T = ttb.tenones((2,2)) + >>> S = ttb.sumtensor([T, T]) + >>> S.double() + array([[2., 2.], + [2., 2.]]) + """ + return self.full().double() + + def innerprod( + self, other: Union[ttb.tensor, ttb.sptensor, ttb.ktensor, ttb.ttensor] + ) -> float: + """ + Efficient inner product between a sumtensor and other `pyttb` tensors + (`tensor`, `sptensor`, `ktensor`, or `ttensor`). + + Parameters + ---------- + other: + Tensor to take an innerproduct with. + + Examples + -------- + >>> T1 = ttb.tensor(np.array([[1, 0], [0, 4]])) + >>> T2 = T1.to_sptensor() + >>> S = ttb.sumtensor([T1, T2]) + >>> T1.innerprod(T1) + 17 + >>> T1.innerprod(T2) + 17 + >>> S.innerprod(T1) + 34 + """ + result = self.parts[0].innerprod(other) + for part in self.parts[1:]: + result += part.innerprod(other) + return result + + def mttkrp(self, U: Union[ttb.ktensor, List[np.ndarray]], n: int) -> np.ndarray: + """ + Matricized tensor times Khatri-Rao product. The matrices used in the + Khatri-Rao product are passed as a :class:`pyttb.ktensor` (where the + factor matrices are used) or as a list of :class:`numpy.ndarray` objects. + + Parameters + ---------- + U: + Matrices to create the Khatri-Rao product. + n: + Mode used to matricize tensor. + + Returns + ------- + Array containing matrix product. + + Examples + -------- + >>> T1 = ttb.tenones((2,2,2)) + >>> T2 = T1.to_sptensor() + >>> S = ttb.sumtensor([T1, T2]) + >>> U = [np.ones((2,2))] * 3 + >>> T1.mttkrp(U, 2) + array([[4., 4.], + [4., 4.]]) + >>> S.mttkrp(U, 2) + array([[8., 8.], + [8., 8.]]) + """ + result = self.parts[0].mttkrp(U, n) + for part in self.parts[1:]: + result += part.mttkrp(U, n) + return result + + def ttv( + self, + vector: Union[np.ndarray, List[np.ndarray]], + dims: Optional[Union[int, np.ndarray]] = None, + exclude_dims: Optional[Union[int, np.ndarray]] = None, + ) -> Union[float, sumtensor]: + """ + Tensor times vector. + + Computes the n-mode product of `parts` with the vector `vector`; i.e., + `self x_n vector`. The integer `n` specifies the dimension (or mode) + along which the vector should be multiplied. If `vector.shape = (I,)`, + then the sumtensor must have `self.shape[n] = I`. The result will be the + same order and shape as `self` except that the size of dimension `n` + will be `J`. The resulting parts of the sum tensor have one less dimension, + as dimension `n` is removed in the multiplication. + + Multiplication with more than one vector is provided using a list of + vectors and corresponding dimensions in the tensor to use. + + The dimensions of the tensor with which to multiply can be provided as + `dims`, or the dimensions to exclude from `[0, ..., self.ndims]` can be + specified using `exclude_dims`. + + Parameters + ---------- + vector: + Vector or vectors to multiple by. + dims: + Dimensions to multiply against. + exclude_dims: + Use all dimensions but these. + + Returns + ------- + Sumtensor containing individual products or a single sum if every + product is a single value. + + Examples + -------- + >>> T = ttb.tensor(np.array([[1, 2], [3, 4]])) + >>> S = ttb.sumtensor([T, T]) + >>> T.ttv(np.ones(2),0) + tensor of shape (2,) + data[:] = + [4. 6.] + >>> S.ttv(np.ones(2),0) # doctest: +NORMALIZE_WHITESPACE + sumtensor of shape (2,) with 2 parts: + Part 0: + tensor of shape (2,) + data[:] = + [4. 6.] + Part 1: + tensor of shape (2,) + data[:] = + [4. 6.] + >>> T.ttv([np.ones(2), np.ones(2)]) + 10.0 + >>> S.ttv([np.ones(2), np.ones(2)]) + 20.0 + """ + new_parts = [] + scalar_sum = 0.0 + for part in self.parts: + result = part.ttv(vector, dims, exclude_dims) + if isinstance(result, float): + scalar_sum += result + else: + new_parts.append(result) + if len(new_parts) == 0: + return scalar_sum + assert scalar_sum == 0.0 + return ttb.sumtensor(new_parts, copy=False) + + def norm(self) -> float: + """Compatibility Interface. Just returns 0""" + warnings.warn( + "Sumtensor doesn't actually support norm. " "Returning 0 for compatibility." + ) + return 0.0 diff --git a/pyttb/tensor.py b/pyttb/tensor.py index 633b5fc0..41a0630f 100644 --- a/pyttb/tensor.py +++ b/pyttb/tensor.py @@ -20,6 +20,7 @@ from pyttb.pyttb_utils import ( IndexVariant, get_index_variant, + get_mttkrp_factors, tt_dimscheck, tt_ind2sub, tt_sub2ind, @@ -193,7 +194,7 @@ def copy(self) -> tensor: >>> T2 = T1 >>> T3 = T2.copy() >>> T1[0,0] = 3 - >>> T2[0,0] == T2[0,0] + >>> T1[0,0] == T2[0,0] True >>> T1[0,0] == T3[0,0] False @@ -738,9 +739,7 @@ def mask(self, W: tensor) -> np.ndarray: # Extract those non-zero values return self.data[tuple(wsubs.transpose())] - def mttkrp( # noqa: PLR0912 - self, U: Union[ttb.ktensor, List[np.ndarray]], n: int - ) -> np.ndarray: + def mttkrp(self, U: Union[ttb.ktensor, List[np.ndarray]], n: int) -> np.ndarray: """ Matricized tensor times Khatri-Rao product. The matrices used in the Khatri-Rao product are passed as a :class:`pyttb.ktensor` (where the @@ -770,23 +769,7 @@ def mttkrp( # noqa: PLR0912 if self.ndims < 2: assert False, "MTTKRP is invalid for tensors with fewer than 2 dimensions" - # extract the list of factor matrices if given a ktensor - if isinstance(U, ttb.ktensor): - U = U.copy() - if n == 0: - U.redistribute(1) - else: - U.redistribute(0) - # Extract the factor matrices - U = U.factor_matrices - - # check that we have a list (or list extracted from a ktensor) - if not isinstance(U, list): - assert False, "Second argument should be a list of arrays or a ktensor" - - # check that list is the correct length - if len(U) != self.ndims: - assert False, "Second argument contains the wrong number of arrays" + U = get_mttkrp_factors(U, n, self.ndims) if n == 0: R = U[1].shape[1] @@ -1482,7 +1465,7 @@ def ttv( vector: Union[np.ndarray, List[np.ndarray]], dims: Optional[Union[int, np.ndarray]] = None, exclude_dims: Optional[Union[int, np.ndarray]] = None, - ) -> tensor: + ) -> Union[float, tensor]: """ Tensor times vector. @@ -1576,7 +1559,7 @@ def ttsv( vector: np.ndarray, skip_dim: Optional[int] = None, version: Optional[int] = None, - ) -> Union[np.ndarray, tensor]: + ) -> Union[float, np.ndarray, tensor]: """ Tensor times same vector in multiple modes. @@ -1614,8 +1597,10 @@ def ttsv( if version == 1: # Calculate the old way P = self.ndims X = np.array([vector for i in range(P)]) - if skip_dim in (0, 1): # Return scalar or matrix - return self.ttv(X, exclude_dims=exclude_dims).double() + if skip_dim in (0, 1): # Return matrix + result = self.ttv(X, exclude_dims=exclude_dims) + assert not isinstance(result, float) + return result.double() return self.ttv(X, exclude_dims=exclude_dims) if version == 2 or version is None: # Calculate the new way @@ -2159,7 +2144,7 @@ def __add__(self, other): [4 5]] """ # If rhs is sumtensor, treat as such - if isinstance(other, ttb.sumtensor): # pragma: no cover + if isinstance(other, ttb.sumtensor): return other.__add__(self) def tensor_add(x, y): @@ -2358,7 +2343,7 @@ def __pos__(self): [[1 2] [3 4]] """ - return ttb.tensor(self.data) + return self.copy() def __neg__(self): """ diff --git a/pyttb/ttensor.py b/pyttb/ttensor.py index 017301fc..93f7778f 100644 --- a/pyttb/ttensor.py +++ b/pyttb/ttensor.py @@ -251,7 +251,9 @@ def __neg__(self): """ return ttensor(-self.core, self.factor_matrices) - def innerprod(self, other: Union[ttb.tensor, ttb.sptensor, ttb.ktensor]) -> float: + def innerprod( + self, other: Union[ttb.tensor, ttb.sptensor, ttb.ktensor, ttb.ttensor] + ) -> float: """ Efficient inner product with a ttensor @@ -395,6 +397,7 @@ def ttv( if remdims.size == 0: assert not isinstance(newcore, (ttb.tensor, ttb.sptensor)) return float(newcore) + assert not isinstance(newcore, float) return ttensor(newcore, [self.factor_matrices[dim] for dim in remdims]) def mttkrp(self, U: Union[ttb.ktensor, List[np.ndarray]], n: int) -> np.ndarray: diff --git a/tests/test_sptensor.py b/tests/test_sptensor.py index 131d78b8..2938ed76 100644 --- a/tests/test_sptensor.py +++ b/tests/test_sptensor.py @@ -1611,11 +1611,13 @@ def test_sptensor_mttkrp(sample_sptensor): # Wrong length input with pytest.raises(AssertionError) as excinfo: sptensorInstance.mttkrp(np.array([matrix, matrix, matrix, matrix]), 0) - assert "List is the wrong length" in str(excinfo) + assert "List of factor matrices is the wrong length" in str(excinfo) with pytest.raises(AssertionError) as excinfo: sptensorInstance.mttkrp("string", 0) - assert "Second argument must be ktensor or array" in str(excinfo) + assert "Second argument must be list of numpy.ndarray's or a ktensor" in str( + excinfo + ) def test_sptensor_nvecs(sample_sptensor): diff --git a/tests/test_sumtensor.py b/tests/test_sumtensor.py index f654345e..004d40da 100644 --- a/tests/test_sumtensor.py +++ b/tests/test_sumtensor.py @@ -1,13 +1,191 @@ # Copyright 2022 National Technology & Engineering Solutions of Sandia, # LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # U.S. Government retains certain rights in this software. +from copy import deepcopy +import numpy as np import pytest import pyttb as ttb -def test_sumtensor_initialization_empty(): - with pytest.raises(AssertionError) as excinfo: - ttb.sumtensor() - assert "SUMTENSOR class not yet implemented" in str(excinfo) +@pytest.fixture() +def example_ttensor(): + """Simple TTENSOR to verify by hand""" + core_values = np.ones((2, 2)) + core = ttb.tensor(core_values) + factors = [np.ones((2, 2))] * len(core_values.shape) + return ttb.ttensor(core, factors) + + +@pytest.fixture() +def example_kensor(): + """Simple KTENSOR to verify by hand""" + weights = np.array([1.0, 2.0]) + fm0 = np.array([[1.0, 2.0], [3.0, 4.0]]) + fm1 = np.array([[5.0, 6.0], [7.0, 8.0]]) + return ttb.ktensor([fm0, fm1], weights) + + +def test_sumtensor_initialization_init(): + # Create empty sumtensor + S = ttb.sumtensor() + assert len(S.parts) == 0 + + # Basic smoke test + T1 = ttb.tenones((3, 4, 5)) + T2 = ttb.sptensor(shape=(3, 4, 5)) + ttb.sumtensor([T1, T2]) + + # Verify copy + T1 = ttb.tenones((3, 4, 5)) + T2 = ttb.tenones((3, 4, 5)) + S = ttb.sumtensor([T1, T2], copy=True) + S.parts[0] *= 2 + assert not S.parts[0].isequal(T1) + + # Negative Tests + ## Mismatched shapes + T1 = ttb.tenones((3, 4, 5)) + T2 = ttb.tenones((3, 4, 6)) + with pytest.raises(AssertionError): + ttb.sumtensor([T1, T2]) + + +def test_sumtensor_initialization_shape(): + # Basic smoke test + shape = (3, 4, 5) + T1 = ttb.tenones(shape) + T2 = ttb.sptensor(shape=shape) + S = ttb.sumtensor([T1, T2]) + assert S.shape == shape + + # Empty case + assert ttb.sumtensor().shape == () + + +def test_sumtensor_initialization_str(): + shape = (3, 4, 5) + T1 = ttb.tenones(shape) + T2 = ttb.tenones(shape) + S = ttb.sumtensor([T1, T2]) + assert str(S) == S.__repr__() + assert f"sumtensor of shape {shape}" in str(S) + + assert "Empty sumtensor" in str(ttb.sumtensor()) + + +def test_sumtensor_deepcopy(): + T1 = ttb.tenones((3, 4, 5)) + T2 = ttb.tenones((3, 4, 5)) + S1 = ttb.sumtensor([T1, T2]) + S2 = S1.copy() + S2.parts[0] *= 2 + assert not S1.parts[0].isequal(S2.parts[0]) + + T1 = ttb.tenones((3, 4, 5)) + T2 = ttb.tenones((3, 4, 5)) + S1 = ttb.sumtensor([T1, T2]) + S2 = deepcopy(S1) + S2.parts[0] *= 2 + assert not S1.parts[0].isequal(S2.parts[0]) + + +def test_sumtensor_pos(): + T1 = ttb.tenones((3, 4, 5)) + T2 = ttb.tenones((3, 4, 5)) + S1 = ttb.sumtensor([T1, T2]) + S2 = +S1 + assert S1 is not S2 + assert S1.parts == S2.parts + + +def test_sumtensor_neg(): + T1 = ttb.tenones((3, 4, 5)) + T2 = ttb.tenones((3, 4, 5)) + S1 = ttb.sumtensor([T1, T2]) + S2 = -S1 + assert S1 is not S2 + assert S1.parts == [-part for part in S2.parts] + + +def test_sumtensor_add_tensors(): + T1 = ttb.tenones((3, 4, 5)) + T2 = ttb.tenones((3, 4, 5)) + S1 = ttb.sumtensor([T1]) + assert len(S1.parts) == 1 + assert len((S1 + T2).parts) == 2 + assert len((T2 + S1).parts) == 2 + assert len((S1 + [T1, T2]).parts) == 3 + + +def test_sumtensor_add_sptensors(): + T1 = ttb.sptensor(shape=(3, 4, 5)) + S1 = ttb.sumtensor([T1]) + assert len(S1.parts) == 1 + assert len((S1 + T1).parts) == 2 + assert len((T1 + S1).parts) == 2 + assert len((S1 + [T1, T1]).parts) == 3 + + +def test_sumtensor_add_ktensors(example_kensor): + K = example_kensor + S1 = ttb.sumtensor([K]) + assert len(S1.parts) == 1 + assert len((S1 + K).parts) == 2 + assert len((K + S1).parts) == 2 + assert len((S1 + [K, K]).parts) == 3 + + +def test_sumtensor_add_ttensors(example_ttensor): + K = example_ttensor + S1 = ttb.sumtensor([K]) + assert len(S1.parts) == 1 + assert len((S1 + K).parts) == 2 + assert len((K + S1).parts) == 2 + assert len((S1 + [K, K]).parts) == 3 + + +def test_sumtensor_add_incorrect_type(): + T1 = ttb.tenones((3, 4, 5)) + S1 = ttb.sumtensor([T1]) + non_tensor_object = 5 + with pytest.raises(TypeError): + S1 + non_tensor_object + + +def test_sumtensor_full_double(example_ttensor, example_kensor): + T1 = ttb.tenones((2, 2)) + T2 = ttb.sptensor(shape=(2, 2)) + K = example_kensor + TT = example_ttensor + S = ttb.sumtensor([T1, T2, K, TT]) + # Smoke test that all type combine + assert isinstance(S.full(), ttb.tensor) + assert isinstance(S.double(), np.ndarray) + + +def test_sumtensor_innerprod(example_ttensor, example_kensor): + T1 = ttb.tenones((2, 2)) + T2 = T1.to_sptensor() + K = example_kensor + TT = example_ttensor + S = ttb.sumtensor([T1, T2, K, TT]) + result = S.innerprod(T1) + expected = sum(part.innerprod(T1) for part in S.parts) + assert result == expected + + +def test_sumtensor_ttv(example_ttensor, example_kensor): + T1 = ttb.tenones((2, 2)) + T2 = ttb.sptensor(shape=(2, 2)) + K = example_kensor + TT = example_ttensor + S = ttb.sumtensor([T1, T2, K, TT]) + S.ttv(np.ones(2), 0) + + +def test_sumtensor_initialization_norm(): + with pytest.warns(Warning): + norm = ttb.sumtensor().norm() + assert norm == 0.0 diff --git a/tests/test_tensor.py b/tests/test_tensor.py index ab4be3eb..47d2fb7f 100644 --- a/tests/test_tensor.py +++ b/tests/test_tensor.py @@ -1624,13 +1624,15 @@ def test_tensor_mttkrp(sample_tensor_2way): # second argument not a ktensor or list with pytest.raises(AssertionError) as excinfo: tensorInstance.mttkrp(5, 0) - assert "Second argument should be a list of arrays or a ktensor" in str(excinfo) + assert "Second argument must be list of numpy.ndarray's or a ktensor" in str( + excinfo + ) # second argument list is not the correct length with pytest.raises(AssertionError) as excinfo: m0 = np.ones((2, 2)) tensorInstance.mttkrp([m0, m0, m0, m0], 0) - assert "Second argument contains the wrong number of arrays" in str(excinfo) + assert "List of factor matrices is the wrong length" in str(excinfo) # arrays not the correct shape with pytest.raises(AssertionError) as excinfo: