From 650cfe3e906d4e28ea7f2c451c00ec464245631d Mon Sep 17 00:00:00 2001 From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com> Date: Sun, 15 Oct 2023 13:29:25 -0400 Subject: [PATCH 01/10] SUMTENSOR: Add __init__, str, ndims, shape --- pyttb/sumtensor.py | 88 ++++++++++++++++++++++++++++++++++++++++- tests/test_sumtensor.py | 43 +++++++++++++++++--- 2 files changed, 123 insertions(+), 8 deletions(-) diff --git a/pyttb/sumtensor.py b/pyttb/sumtensor.py index dd9d5373..c0f7ac10 100644 --- a/pyttb/sumtensor.py +++ b/pyttb/sumtensor.py @@ -2,6 +2,13 @@ # Copyright 2022 National Technology & Engineering Solutions of Sandia, # LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # U.S. Government retains certain rights in this software. +from __future__ import annotations + +from copy import deepcopy +from textwrap import indent +from typing import List, Tuple, Union + +import pyttb as ttb class sumtensor: @@ -10,5 +17,82 @@ class sumtensor: """ - def __init__(self): - assert False, "SUMTENSOR class not yet implemented" + def __init__( + self, + tensors: List[Union[ttb.tensor, ttb.sptensor, ttb.ktensor, ttb.ttensor]], + copy: bool = True, + ): + """ + Creates a :class:`pyttb.sumtensor` from a collection of tensors. + Each provided tensor is explicitly retained. All provided tensors + must have the same shape but can be combinations of types. + + Parameters + ---------- + tensors: + Tensor source data. + copy: + Whether to make a copy of provided data or just reference it. + + Examples + ------- + Create an empty :class:`pyttb.tensor`: + + >>> T1 = ttb.tenones((3,4,5)) + >>> T2 = ttb.sptensor(shape=(3,4,5)) + >>> S = ttb.sumtensor([T1, T2]) + """ + assert all( + tensors[0].shape == tensor_i.shape for tensor_i in tensors[1:] + ), "All tensors must be the same shape" + if copy: + tensors = deepcopy(tensors) + self.parts = tensors + + @property + def shape(self) -> Tuple[int, ...]: + return self.parts[0].shape + + def __repr__(self): + """ + String representation of the sumtensor. + + Returns + ------- + String displaying shape and constituent parts. + + Examples + -------- + >>> T1 = ttb.tenones((2,2)) + >>> T2 = ttb.sptensor(shape=(2,2)) + >>> ttb.sumtensor([T1, T2]) # doctest: +NORMALIZE_WHITESPACE + sumtensor of shape (2, 2) with 2 parts: + Part 0: + tensor of shape (2, 2) + data[:, :] = + [[1. 1.] + [1. 1.]] + Part 1: + All-zero sparse tensor of shape 2 x 2 + """ + s = f"sumtensor of shape {self.shape} with {len(self.parts)} parts:" + for i, part in enumerate(self.parts): + s += f"\nPart {i}: \n" + s += indent(str(part), prefix="\t") + return s + + __str__ = __repr__ + + @property + def ndims(self) -> int: + """ + Number of dimensions of the sumtensor. + + Examples + -------- + >>> T1 = ttb.tenones((2, 2)) + >>> S = ttb.sumtensor([T1,T1]) + >>> S.ndims + 2 + """ + return self.parts[0].ndims diff --git a/tests/test_sumtensor.py b/tests/test_sumtensor.py index ae41fe9c..a64bf47e 100644 --- a/tests/test_sumtensor.py +++ b/tests/test_sumtensor.py @@ -1,14 +1,45 @@ # Copyright 2022 National Technology & Engineering Solutions of Sandia, # LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # U.S. Government retains certain rights in this software. - import pytest import pyttb as ttb -@pytest.mark.indevelopment -def test_sumtensor_initialization_empty(): - with pytest.raises(AssertionError) as excinfo: - ttb.sumtensor() - assert "SUMTENSOR class not yet implemented" in str(excinfo) +def test_sumtensor_initialization_init(): + # Basic smoke test + T1 = ttb.tenones((3, 4, 5)) + T2 = ttb.sptensor(shape=(3, 4, 5)) + ttb.sumtensor([T1, T2]) + + # Verify copy + T1 = ttb.tenones((3, 4, 5)) + T2 = ttb.tenones((3, 4, 5)) + S = ttb.sumtensor([T1, T2], copy=True) + S.parts[0] *= 2 + assert not S.parts[0].isequal(T1) + + # Negative Tests + ## Mismatched shapes + T1 = ttb.tenones((3, 4, 5)) + T2 = ttb.tenones((3, 4, 6)) + with pytest.raises(AssertionError): + ttb.sumtensor([T1, T2]) + + +def test_sumtensor_initialization_shape(): + # Basic smoke test + shape = (3, 4, 5) + T1 = ttb.tenones(shape) + T2 = ttb.sptensor(shape=shape) + S = ttb.sumtensor([T1, T2]) + assert S.shape == shape + + +def test_sumtensor_initialization_str(): + shape = (3, 4, 5) + T1 = ttb.tenones(shape) + T2 = ttb.tenones(shape) + S = ttb.sumtensor([T1, T2]) + assert str(S) == S.__repr__() + assert f"sumtensor of shape {shape}" in str(S) From 4e83101ca7891796a2ed8d9b26666951c6202b45 Mon Sep 17 00:00:00 2001 From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com> Date: Sun, 15 Oct 2023 13:37:55 -0400 Subject: [PATCH 02/10] SUMTENSOR: Add copy and fix typo in tensor doctest --- pyttb/sumtensor.py | 24 ++++++++++++++++++++++++ pyttb/tensor.py | 2 +- tests/test_sumtensor.py | 18 ++++++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/pyttb/sumtensor.py b/pyttb/sumtensor.py index c0f7ac10..a416017e 100644 --- a/pyttb/sumtensor.py +++ b/pyttb/sumtensor.py @@ -49,6 +49,30 @@ def __init__( tensors = deepcopy(tensors) self.parts = tensors + def copy(self) -> sumtensor: + """Make a deep copy of a :class:`pyttb.sumtensor`. + + Returns + ------- + Copy of original sumtensor. + + Examples + -------- + >>> T1 = ttb.tensor(np.ones((3,2))) + >>> S1 = ttb.sumtensor([T1, T1]) + >>> S2 = S1 + >>> S3 = S2.copy() + >>> S1.parts[0][0,0] = 3 + >>> S1.parts[0][0,0] == S2.parts[0][0,0] + True + >>> S1.parts[0][0,0] == S3.parts[0][0,0] + False + """ + return ttb.sumtensor(self.parts, copy=True) + + def __deepcopy__(self, memo): + return self.copy() + @property def shape(self) -> Tuple[int, ...]: return self.parts[0].shape diff --git a/pyttb/tensor.py b/pyttb/tensor.py index 633b5fc0..82a46f90 100644 --- a/pyttb/tensor.py +++ b/pyttb/tensor.py @@ -193,7 +193,7 @@ def copy(self) -> tensor: >>> T2 = T1 >>> T3 = T2.copy() >>> T1[0,0] = 3 - >>> T2[0,0] == T2[0,0] + >>> T1[0,0] == T2[0,0] True >>> T1[0,0] == T3[0,0] False diff --git a/tests/test_sumtensor.py b/tests/test_sumtensor.py index a64bf47e..63e0fb25 100644 --- a/tests/test_sumtensor.py +++ b/tests/test_sumtensor.py @@ -1,6 +1,8 @@ # Copyright 2022 National Technology & Engineering Solutions of Sandia, # LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # U.S. Government retains certain rights in this software. +from copy import deepcopy + import pytest import pyttb as ttb @@ -43,3 +45,19 @@ def test_sumtensor_initialization_str(): S = ttb.sumtensor([T1, T2]) assert str(S) == S.__repr__() assert f"sumtensor of shape {shape}" in str(S) + + +def test_sumtensor_deepcopy(): + T1 = ttb.tenones((3, 4, 5)) + T2 = ttb.tenones((3, 4, 5)) + S1 = ttb.sumtensor([T1, T2]) + S2 = S1.copy() + S2.parts[0] *= 2 + assert not S1.parts[0].isequal(S2.parts[0]) + + T1 = ttb.tenones((3, 4, 5)) + T2 = ttb.tenones((3, 4, 5)) + S1 = ttb.sumtensor([T1, T2]) + S2 = deepcopy(S1) + S2.parts[0] *= 2 + assert not S1.parts[0].isequal(S2.parts[0]) From 95c077323050f119752ffdebd8938e693e8565b0 Mon Sep 17 00:00:00 2001 From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com> Date: Sun, 15 Oct 2023 15:52:27 -0400 Subject: [PATCH 03/10] SUMTENSOR: Add pos, neg, and add. Update other class references to adding sumtensor --- pyttb/ktensor.py | 3 +- pyttb/sptensor.py | 2 +- pyttb/sumtensor.py | 105 ++++++++++++++++++++++++++++++++++++++++ pyttb/tensor.py | 4 +- tests/test_sumtensor.py | 70 +++++++++++++++++++++++++++ 5 files changed, 180 insertions(+), 4 deletions(-) diff --git a/pyttb/ktensor.py b/pyttb/ktensor.py index 847cd098..fdd28771 100644 --- a/pyttb/ktensor.py +++ b/pyttb/ktensor.py @@ -2140,7 +2140,8 @@ def __add__(self, other): ------- :class:`pyttb.ktensor` """ - # TODO include test of other as sumtensor and call sumtensor.__add__ + if isinstance(other, ttb.sumtensor): + return other.__add__(self) if not isinstance(other, ktensor): assert False, "Cannot add instance of this type to a ktensor" diff --git a/pyttb/sptensor.py b/pyttb/sptensor.py index 5ba774ee..ca6efeaa 100644 --- a/pyttb/sptensor.py +++ b/pyttb/sptensor.py @@ -2017,7 +2017,7 @@ def __add__(self, other): :class:`pyttb.sptensor` """ # If other is sumtensor perform sumtensor add - if isinstance(other, ttb.sumtensor): # pragma: no cover + if isinstance(other, ttb.sumtensor): return other.__add__(self) # Otherwise return negated sub return self.__sub__(-other) diff --git a/pyttb/sumtensor.py b/pyttb/sumtensor.py index a416017e..d1039eca 100644 --- a/pyttb/sumtensor.py +++ b/pyttb/sumtensor.py @@ -120,3 +120,108 @@ def ndims(self) -> int: 2 """ return self.parts[0].ndims + + def __pos__(self): + """ + Unary plus (+) for tensors. + + Returns + ------- + Copy of sumtensor. + + Examples + -------- + >>> T = ttb.tensor(np.array([[1, 2], [3, 4]])) + >>> S = ttb.sumtensor([T, T]) + >>> S2 = +S + """ + return self.copy() + + def __neg__(self): + """ + Unary minus (-) for tensors. + + Returns + ------- + Copy of negated sumtensor. + + Examples + -------- + >>> T = ttb.tensor(np.array([[1, 2], [3, 4]])) + >>> S = ttb.sumtensor([T, T]) + >>> S2 = -S + >>> S2.parts[0].isequal(-1 * S.parts[0]) + True + """ + return ttb.sumtensor([-part for part in self.parts], copy=False) + + def __add__(self, other): + """ + Binary addition (+) for sumtensors. + + Parameters + ---------- + other: :class:`pyttb.tensor`, :class:`pyttb.sptensor` + :class:`pyttb.ktensor`, :class:`pyttb.ttensor`, or list + containing those classes + + Returns + ------- + :class:`pyttb.sumtensor` + + Examples + -------- + >>> T = ttb.tenones((2,2)) + >>> S = ttb.sumtensor([T]) + >>> len(S.parts) + 1 + >>> S2 = S + T + >>> len(S2.parts) + 2 + >>> S3 = S2 + [T, T] + >>> len(S3.parts) + 4 + """ + updated_parts = self.parts.copy() + if isinstance(other, (ttb.tensor, ttb.sptensor, ttb.ktensor, ttb.ttensor)): + updated_parts.append(other) + elif isinstance(other, list) and all( + isinstance(part, (ttb.tensor, ttb.sptensor, ttb.ktensor, ttb.ttensor)) + for part in other + ): + updated_parts.extend(other) + else: + raise TypeError( + "Sumtensor only supports collections of tensor, sptensor, ktensor, " + f"and ttensor but received: {type(other)}" + ) + return ttb.sumtensor(updated_parts, copy=False) + + def __radd__(self, other): + """ + Right Binary addition (+) for sumtensors. + + Parameters + ---------- + other: :class:`pyttb.tensor`, :class:`pyttb.sptensor` + :class:`pyttb.ktensor`, :class:`pyttb.ttensor`, or list + containing those classes + + Returns + ------- + :class:`pyttb.sumtensor` + + Examples + -------- + >>> T = ttb.tenones((2,2)) + >>> S = ttb.sumtensor([T]) + >>> len(S.parts) + 1 + >>> S2 = T + S + >>> len(S2.parts) + 2 + >>> S3 = [T, T] + S2 + >>> len(S3.parts) + 4 + """ + return self.__add__(other) diff --git a/pyttb/tensor.py b/pyttb/tensor.py index 82a46f90..740002df 100644 --- a/pyttb/tensor.py +++ b/pyttb/tensor.py @@ -2159,7 +2159,7 @@ def __add__(self, other): [4 5]] """ # If rhs is sumtensor, treat as such - if isinstance(other, ttb.sumtensor): # pragma: no cover + if isinstance(other, ttb.sumtensor): return other.__add__(self) def tensor_add(x, y): @@ -2358,7 +2358,7 @@ def __pos__(self): [[1 2] [3 4]] """ - return ttb.tensor(self.data) + return self.copy() def __neg__(self): """ diff --git a/tests/test_sumtensor.py b/tests/test_sumtensor.py index 63e0fb25..e9d5d8ab 100644 --- a/tests/test_sumtensor.py +++ b/tests/test_sumtensor.py @@ -3,6 +3,7 @@ # U.S. Government retains certain rights in this software. from copy import deepcopy +import numpy as np import pytest import pyttb as ttb @@ -61,3 +62,72 @@ def test_sumtensor_deepcopy(): S2 = deepcopy(S1) S2.parts[0] *= 2 assert not S1.parts[0].isequal(S2.parts[0]) + + +def test_sumtensor_pos(): + T1 = ttb.tenones((3, 4, 5)) + T2 = ttb.tenones((3, 4, 5)) + S1 = ttb.sumtensor([T1, T2]) + S2 = +S1 + assert S1 is not S2 + assert S1.parts == S2.parts + + +def test_sumtensor_neg(): + T1 = ttb.tenones((3, 4, 5)) + T2 = ttb.tenones((3, 4, 5)) + S1 = ttb.sumtensor([T1, T2]) + S2 = -S1 + assert S1 is not S2 + assert S1.parts == [-part for part in S2.parts] + + +def test_sumtensor_add_tensors(): + T1 = ttb.tenones((3, 4, 5)) + T2 = ttb.tenones((3, 4, 5)) + S1 = ttb.sumtensor([T1]) + assert len(S1.parts) == 1 + assert len((S1 + T2).parts) == 2 + assert len((T2 + S1).parts) == 2 + assert len((S1 + [T1, T2]).parts) == 3 + + +def test_sumtensor_add_sptensors(): + T1 = ttb.sptensor(shape=(3, 4, 5)) + S1 = ttb.sumtensor([T1]) + assert len(S1.parts) == 1 + assert len((S1 + T1).parts) == 2 + assert len((T1 + S1).parts) == 2 + assert len((S1 + [T1, T1]).parts) == 3 + + +def test_sumtensor_add_ktensors(): + weights = np.array([1.0, 2.0]) + fm0 = np.array([[1.0, 2.0], [3.0, 4.0]]) + fm1 = np.array([[5.0, 6.0], [7.0, 8.0]]) + K = ttb.ktensor([fm0, fm1], weights) + S1 = ttb.sumtensor([K]) + assert len(S1.parts) == 1 + assert len((S1 + K).parts) == 2 + assert len((K + S1).parts) == 2 + assert len((S1 + [K, K]).parts) == 3 + + +def test_sumtensor_add_ttensors(): + core_values = np.ones((2, 2, 2)) + core = ttb.tensor(core_values) + factors = [np.ones((1, 2))] * len(core_values.shape) + K = ttb.ttensor(core, factors) + S1 = ttb.sumtensor([K]) + assert len(S1.parts) == 1 + assert len((S1 + K).parts) == 2 + assert len((K + S1).parts) == 2 + assert len((S1 + [K, K]).parts) == 3 + + +def test_sumtensor_add_incorrect_type(): + T1 = ttb.tenones((3, 4, 5)) + S1 = ttb.sumtensor([T1]) + non_tensor_object = 5 + with pytest.raises(TypeError): + S1 + non_tensor_object From 5bd273d5e7bf42b56bc6c6b7aa5961c0a1f17d6f Mon Sep 17 00:00:00 2001 From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com> Date: Sun, 15 Oct 2023 16:05:46 -0400 Subject: [PATCH 04/10] SUMTENSOR: Add full and double --- pyttb/sumtensor.py | 44 +++++++++++++++++++++++++++++++++++++++++ tests/test_sumtensor.py | 18 +++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/pyttb/sumtensor.py b/pyttb/sumtensor.py index d1039eca..0eb31aea 100644 --- a/pyttb/sumtensor.py +++ b/pyttb/sumtensor.py @@ -8,6 +8,8 @@ from textwrap import indent from typing import List, Tuple, Union +import numpy as np + import pyttb as ttb @@ -225,3 +227,45 @@ def __radd__(self, other): 4 """ return self.__add__(other) + + def full(self) -> ttb.tensor: + """ + Convert a :class:`pyttb.sumtensor` to a :class:`pyttb.tensor`. + + Returns + ------- + Re-assembled dense tensor. + + Examples + -------- + >>> T = ttb.tenones((2,2)) + >>> S = ttb.sumtensor([T, T]) + >>> print(S.full()) # doctest: +NORMALIZE_WHITESPACE + tensor of shape (2, 2) + data[:, :] = + [[2. 2.] + [2. 2.]] + + """ + result = self.parts[0].full() + for part in self.parts[1:]: + result += part + return result + + def double(self) -> np.ndarray: + """ + Convert `:class:pyttb.tensor` to an `:class:numpy.ndarray` of doubles. + + Returns + ------- + Copy of tensor data. + + Examples + -------- + >>> T = ttb.tenones((2,2)) + >>> S = ttb.sumtensor([T, T]) + >>> S.double() + array([[2., 2.], + [2., 2.]]) + """ + return self.full().double() diff --git a/tests/test_sumtensor.py b/tests/test_sumtensor.py index e9d5d8ab..a25779cb 100644 --- a/tests/test_sumtensor.py +++ b/tests/test_sumtensor.py @@ -131,3 +131,21 @@ def test_sumtensor_add_incorrect_type(): non_tensor_object = 5 with pytest.raises(TypeError): S1 + non_tensor_object + + +def test_sumtensor_full_double(): + T1 = ttb.tenones((2, 2)) + T2 = ttb.sptensor(shape=(2, 2)) + weights = np.array([1.0, 2.0]) + fm0 = np.array([[1.0, 2.0], [3.0, 4.0]]) + fm1 = np.array([[5.0, 6.0], [7.0, 8.0]]) + K = ttb.ktensor([fm0, fm1], weights) + # TODO there are probably already fixtures for these values + core_values = np.ones((2, 2)) + core = ttb.tensor(core_values) + factors = [np.ones((2, 2))] * len(core_values.shape) + TT = ttb.ttensor(core, factors) + S = ttb.sumtensor([T1, T2, K, TT]) + # Smoke test that all type combine + assert isinstance(S.full(), ttb.tensor) + assert isinstance(S.double(), np.ndarray) From 79aaee2054d19d883cd45f461b1f67b9f5ac66b9 Mon Sep 17 00:00:00 2001 From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com> Date: Sun, 15 Oct 2023 17:36:18 -0400 Subject: [PATCH 05/10] SUMTENSOR: Add innerprod and mttkrp --- pyttb/ktensor.py | 13 ++++++++- pyttb/sptensor.py | 1 + pyttb/sumtensor.py | 64 +++++++++++++++++++++++++++++++++++++++++ pyttb/ttensor.py | 4 ++- tests/test_sumtensor.py | 18 ++++++++++++ 5 files changed, 98 insertions(+), 2 deletions(-) diff --git a/pyttb/ktensor.py b/pyttb/ktensor.py index fdd28771..6e16de83 100644 --- a/pyttb/ktensor.py +++ b/pyttb/ktensor.py @@ -1080,7 +1080,7 @@ def mask(self, W: Union[ttb.tensor, ttb.sptensor]) -> np.ndarray: vals = vals + tmpvals return vals - def mttkrp(self, U: List[np.ndarray], n: int) -> np.ndarray: + def mttkrp(self, U: Union[ktensor, List[np.ndarray]], n: int) -> np.ndarray: """ Matricized tensor times Khatri-Rao product for :class:`pyttb.ktensor`. @@ -1107,6 +1107,17 @@ def mttkrp(self, U: List[np.ndarray], n: int) -> np.ndarray: [[24. 24.] [24. 24.]] """ + if isinstance(U, ttb.ktensor): + U = U.copy() + # Absorb lambda into one of the factors but not the one that is skipped + if n == 0: + U.redistribute(1) + else: + U.redistribute(0) + + # Extract the factor matrices + U = U.factor_matrices + if not isinstance(U, list): assert False, "Second argument must be list of numpy.ndarray's" diff --git a/pyttb/sptensor.py b/pyttb/sptensor.py index ca6efeaa..318f7978 100644 --- a/pyttb/sptensor.py +++ b/pyttb/sptensor.py @@ -892,6 +892,7 @@ def mttkrp(self, U: Union[ttb.ktensor, List[np.ndarray]], n: int) -> np.ndarray: N = self.ndims if isinstance(U, ttb.ktensor): + U = U.copy() # Absorb lambda into one of the factors but not the one that is skipped if n == 0: U.redistribute(1) diff --git a/pyttb/sumtensor.py b/pyttb/sumtensor.py index 0eb31aea..57131c00 100644 --- a/pyttb/sumtensor.py +++ b/pyttb/sumtensor.py @@ -269,3 +269,67 @@ def double(self) -> np.ndarray: [2., 2.]]) """ return self.full().double() + + def innerprod( + self, other: Union[ttb.tensor, ttb.sptensor, ttb.ktensor, ttb.ttensor] + ) -> float: + """ + Efficient inner product between a sumtensor and other `pyttb` tensors + (`tensor`, `sptensor`, `ktensor`, or `ttensor`). + + Parameters + ---------- + other: + Tensor to take an innerproduct with. + + Examples + -------- + >>> T1 = ttb.tensor(np.array([[1, 0], [0, 4]])) + >>> T2 = T1.to_sptensor() + >>> S = ttb.sumtensor([T1, T2]) + >>> T1.innerprod(T1) + 17 + >>> T1.innerprod(T2) + 17 + >>> S.innerprod(T1) + 34 + """ + result = self.parts[0].innerprod(other) + for part in self.parts[1:]: + result += part.innerprod(other) + return result + + def mttkrp(self, U: Union[ttb.ktensor, List[np.ndarray]], n: int) -> np.ndarray: + """ + Matricized tensor times Khatri-Rao product. The matrices used in the + Khatri-Rao product are passed as a :class:`pyttb.ktensor` (where the + factor matrices are used) or as a list of :class:`numpy.ndarray` objects. + + Parameters + ---------- + U: + Matrices to create the Khatri-Rao product. + n: + Mode used to matricize tensor. + + Returns + ------- + Array containing matrix product. + + Examples + -------- + >>> T1 = ttb.tenones((2,2,2)) + >>> T2 = T1.to_sptensor() + >>> S = ttb.sumtensor([T1, T2]) + >>> U = [np.ones((2,2))] * 3 + >>> T1.mttkrp(U, 2) + array([[4., 4.], + [4., 4.]]) + >>> S.mttkrp(U, 2) + array([[8., 8.], + [8., 8.]]) + """ + result = self.parts[0].mttkrp(U, n) + for part in self.parts[1:]: + result += part.mttkrp(U, n) + return result diff --git a/pyttb/ttensor.py b/pyttb/ttensor.py index 017301fc..62bf8617 100644 --- a/pyttb/ttensor.py +++ b/pyttb/ttensor.py @@ -251,7 +251,9 @@ def __neg__(self): """ return ttensor(-self.core, self.factor_matrices) - def innerprod(self, other: Union[ttb.tensor, ttb.sptensor, ttb.ktensor]) -> float: + def innerprod( + self, other: Union[ttb.tensor, ttb.sptensor, ttb.ktensor, ttb.ttensor] + ) -> float: """ Efficient inner product with a ttensor diff --git a/tests/test_sumtensor.py b/tests/test_sumtensor.py index a25779cb..3bcc70b1 100644 --- a/tests/test_sumtensor.py +++ b/tests/test_sumtensor.py @@ -149,3 +149,21 @@ def test_sumtensor_full_double(): # Smoke test that all type combine assert isinstance(S.full(), ttb.tensor) assert isinstance(S.double(), np.ndarray) + + +def test_sumtensor_innerprod(): + T1 = ttb.tenones((2, 2)) + T2 = T1.to_sptensor() + weights = np.array([1.0, 2.0]) + fm0 = np.array([[1.0, 2.0], [3.0, 4.0]]) + fm1 = np.array([[5.0, 6.0], [7.0, 8.0]]) + K = ttb.ktensor([fm0, fm1], weights) + # TODO there are probably already fixtures for these values + core_values = np.ones((2, 2)) + core = ttb.tensor(core_values) + factors = [np.ones((2, 2))] * len(core_values.shape) + TT = ttb.ttensor(core, factors) + S = ttb.sumtensor([T1, T2, K, TT]) + result = S.innerprod(T1) + expected = sum(part.innerprod(T1) for part in S.parts) + assert result == expected From ba54af719a6dc36922da28180cb331e2e2d5d67d Mon Sep 17 00:00:00 2001 From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com> Date: Sun, 15 Oct 2023 17:49:59 -0400 Subject: [PATCH 06/10] Cleanup common mttkrp code for tensor, sptensor, and ktensor --- pyttb/ktensor.py | 25 ++++++++----------------- pyttb/pyttb_utils.py | 26 +++++++++++++++++++++++++- pyttb/sptensor.py | 22 +++------------------- pyttb/tensor.py | 23 +++-------------------- tests/test_sptensor.py | 6 ++++-- tests/test_tensor.py | 6 ++++-- 6 files changed, 47 insertions(+), 61 deletions(-) diff --git a/pyttb/ktensor.py b/pyttb/ktensor.py index 6e16de83..f9f93912 100644 --- a/pyttb/ktensor.py +++ b/pyttb/ktensor.py @@ -24,7 +24,13 @@ from typing_extensions import Self import pyttb as ttb -from pyttb.pyttb_utils import isrow, isvector, tt_dimscheck, tt_ind2sub +from pyttb.pyttb_utils import ( + get_mttkrp_factors, + isrow, + isvector, + tt_dimscheck, + tt_ind2sub, +) class ktensor: @@ -1107,22 +1113,7 @@ def mttkrp(self, U: Union[ktensor, List[np.ndarray]], n: int) -> np.ndarray: [[24. 24.] [24. 24.]] """ - if isinstance(U, ttb.ktensor): - U = U.copy() - # Absorb lambda into one of the factors but not the one that is skipped - if n == 0: - U.redistribute(1) - else: - U.redistribute(0) - - # Extract the factor matrices - U = U.factor_matrices - - if not isinstance(U, list): - assert False, "Second argument must be list of numpy.ndarray's" - - if len(U) != self.ndims: - assert False, "List of factor matrices is the wrong length" + U = get_mttkrp_factors(U, n, self.ndims) # Number of columns in input matrices if n == 0: diff --git a/pyttb/pyttb_utils.py b/pyttb/pyttb_utils.py index e7414642..06ea875f 100644 --- a/pyttb/pyttb_utils.py +++ b/pyttb/pyttb_utils.py @@ -6,7 +6,7 @@ from enum import Enum from inspect import signature -from typing import Optional, Tuple, Union, get_args, overload +from typing import List, Optional, Tuple, Union, get_args, overload import numpy as np @@ -856,3 +856,27 @@ def get_index_variant(indices: IndexType) -> IndexVariant: if len(key.shape) == 1 or key.shape[1] == 1: variant = IndexVariant.LINEAR return variant + + +def get_mttkrp_factors( + U: Union[ttb.ktensor, List[np.ndarray]], n: int, ndims: int +) -> List[np.ndarray]: + """Apply standard checks and type conversions for mttkrp factors""" + if isinstance(U, ttb.ktensor): + U = U.copy() + # Absorb lambda into one of the factors but not the one that is skipped + if n == 0: + U.redistribute(1) + else: + U.redistribute(0) + + # Extract the factor matrices + U = U.factor_matrices + + assert isinstance( + U, (list, np.ndarray) + ), "Second argument must be list of numpy.ndarray's or a ktensor" + + assert len(U) == ndims, "List of factor matrices is the wrong length" + + return U diff --git a/pyttb/sptensor.py b/pyttb/sptensor.py index 318f7978..6ae0aa9a 100644 --- a/pyttb/sptensor.py +++ b/pyttb/sptensor.py @@ -30,6 +30,7 @@ from pyttb.pyttb_utils import ( IndexVariant, get_index_variant, + get_mttkrp_factors, tt_dimscheck, tt_ind2sub, tt_intersect_rows, @@ -889,24 +890,7 @@ def mttkrp(self, U: Union[ttb.ktensor, List[np.ndarray]], n: int) -> np.ndarray: # In the sparse case, it is most efficient to do a series of TTV operations # rather than forming the Khatri-Rao product. - N = self.ndims - - if isinstance(U, ttb.ktensor): - U = U.copy() - # Absorb lambda into one of the factors but not the one that is skipped - if n == 0: - U.redistribute(1) - else: - U.redistribute(0) - - # Extract the factor matrices - U = U.factor_matrices - - if not isinstance(U, np.ndarray) and not isinstance(U, list): - assert False, "Second argument must be ktensor or array" - - if len(U) != N: - assert False, "List is the wrong length" + U = get_mttkrp_factors(U, n, self.ndims) if n == 0: R = U[1].shape[1] @@ -917,7 +901,7 @@ def mttkrp(self, U: Union[ttb.ktensor, List[np.ndarray]], n: int) -> np.ndarray: for r in range(R): # Set up list with appropriate vectors for ttv multiplication Z = [] - for i in range(N): + for i in range(self.ndims): if i != n: Z.append(U[i][:, r]) else: diff --git a/pyttb/tensor.py b/pyttb/tensor.py index 740002df..a15cf4f2 100644 --- a/pyttb/tensor.py +++ b/pyttb/tensor.py @@ -20,6 +20,7 @@ from pyttb.pyttb_utils import ( IndexVariant, get_index_variant, + get_mttkrp_factors, tt_dimscheck, tt_ind2sub, tt_sub2ind, @@ -738,9 +739,7 @@ def mask(self, W: tensor) -> np.ndarray: # Extract those non-zero values return self.data[tuple(wsubs.transpose())] - def mttkrp( # noqa: PLR0912 - self, U: Union[ttb.ktensor, List[np.ndarray]], n: int - ) -> np.ndarray: + def mttkrp(self, U: Union[ttb.ktensor, List[np.ndarray]], n: int) -> np.ndarray: """ Matricized tensor times Khatri-Rao product. The matrices used in the Khatri-Rao product are passed as a :class:`pyttb.ktensor` (where the @@ -770,23 +769,7 @@ def mttkrp( # noqa: PLR0912 if self.ndims < 2: assert False, "MTTKRP is invalid for tensors with fewer than 2 dimensions" - # extract the list of factor matrices if given a ktensor - if isinstance(U, ttb.ktensor): - U = U.copy() - if n == 0: - U.redistribute(1) - else: - U.redistribute(0) - # Extract the factor matrices - U = U.factor_matrices - - # check that we have a list (or list extracted from a ktensor) - if not isinstance(U, list): - assert False, "Second argument should be a list of arrays or a ktensor" - - # check that list is the correct length - if len(U) != self.ndims: - assert False, "Second argument contains the wrong number of arrays" + U = get_mttkrp_factors(U, n, self.ndims) if n == 0: R = U[1].shape[1] diff --git a/tests/test_sptensor.py b/tests/test_sptensor.py index 131d78b8..2938ed76 100644 --- a/tests/test_sptensor.py +++ b/tests/test_sptensor.py @@ -1611,11 +1611,13 @@ def test_sptensor_mttkrp(sample_sptensor): # Wrong length input with pytest.raises(AssertionError) as excinfo: sptensorInstance.mttkrp(np.array([matrix, matrix, matrix, matrix]), 0) - assert "List is the wrong length" in str(excinfo) + assert "List of factor matrices is the wrong length" in str(excinfo) with pytest.raises(AssertionError) as excinfo: sptensorInstance.mttkrp("string", 0) - assert "Second argument must be ktensor or array" in str(excinfo) + assert "Second argument must be list of numpy.ndarray's or a ktensor" in str( + excinfo + ) def test_sptensor_nvecs(sample_sptensor): diff --git a/tests/test_tensor.py b/tests/test_tensor.py index ab4be3eb..47d2fb7f 100644 --- a/tests/test_tensor.py +++ b/tests/test_tensor.py @@ -1624,13 +1624,15 @@ def test_tensor_mttkrp(sample_tensor_2way): # second argument not a ktensor or list with pytest.raises(AssertionError) as excinfo: tensorInstance.mttkrp(5, 0) - assert "Second argument should be a list of arrays or a ktensor" in str(excinfo) + assert "Second argument must be list of numpy.ndarray's or a ktensor" in str( + excinfo + ) # second argument list is not the correct length with pytest.raises(AssertionError) as excinfo: m0 = np.ones((2, 2)) tensorInstance.mttkrp([m0, m0, m0, m0], 0) - assert "Second argument contains the wrong number of arrays" in str(excinfo) + assert "List of factor matrices is the wrong length" in str(excinfo) # arrays not the correct shape with pytest.raises(AssertionError) as excinfo: From 356c6dceac3bb792b7069d66f0c7f7fe07a5fed4 Mon Sep 17 00:00:00 2001 From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com> Date: Sun, 15 Oct 2023 18:46:48 -0400 Subject: [PATCH 07/10] SUMTENSOR: TTV * Update other tensor TTV return types to be consistent --- pyttb/sptensor.py | 9 +++-- pyttb/sumtensor.py | 76 ++++++++++++++++++++++++++++++++++++++++- pyttb/tensor.py | 10 +++--- pyttb/ttensor.py | 1 + tests/test_sumtensor.py | 67 ++++++++++++++++++++---------------- 5 files changed, 126 insertions(+), 37 deletions(-) diff --git a/pyttb/sptensor.py b/pyttb/sptensor.py index 6ae0aa9a..da044af0 100644 --- a/pyttb/sptensor.py +++ b/pyttb/sptensor.py @@ -907,7 +907,12 @@ def mttkrp(self, U: Union[ttb.ktensor, List[np.ndarray]], n: int) -> np.ndarray: else: Z.append(np.array([])) # Perform ttv multiplication - V[:, r] = self.ttv(Z, exclude_dims=n).double() + ttv = self.ttv(Z, exclude_dims=n) + # TODO is is possible to hit the float condition here? + if isinstance(ttv, float): # pragma: no cover + V[:, r] = ttv + else: + V[:, r] = ttv.double() return V @@ -1211,7 +1216,7 @@ def ttv( # noqa: PLR0912 vector: Union[np.ndarray, List[np.ndarray]], dims: Optional[Union[int, np.ndarray]] = None, exclude_dims: Optional[Union[int, np.ndarray]] = None, - ) -> Union[sptensor, ttb.tensor]: + ) -> Union[sptensor, ttb.tensor, float]: """ Sparse tensor times vector diff --git a/pyttb/sumtensor.py b/pyttb/sumtensor.py index 57131c00..1e1dd690 100644 --- a/pyttb/sumtensor.py +++ b/pyttb/sumtensor.py @@ -6,7 +6,7 @@ from copy import deepcopy from textwrap import indent -from typing import List, Tuple, Union +from typing import List, Optional, Tuple, Union import numpy as np @@ -333,3 +333,77 @@ def mttkrp(self, U: Union[ttb.ktensor, List[np.ndarray]], n: int) -> np.ndarray: for part in self.parts[1:]: result += part.mttkrp(U, n) return result + + def ttv( + self, + vector: Union[np.ndarray, List[np.ndarray]], + dims: Optional[Union[int, np.ndarray]] = None, + exclude_dims: Optional[Union[int, np.ndarray]] = None, + ) -> Union[float, sumtensor]: + """ + Tensor times vector. + + Computes the n-mode product of `parts` with the vector `vector`; i.e., + `self x_n vector`. The integer `n` specifies the dimension (or mode) + along which the vector should be multiplied. If `vector.shape = (I,)`, + then the sumtensor must have `self.shape[n] = I`. The result will be the + same order and shape as `self` except that the size of dimension `n` + will be `J`. The resulting parts of the sum tensor have one less dimension, + as dimension `n` is removed in the multiplication. + + Multiplication with more than one vector is provided using a list of + vectors and corresponding dimensions in the tensor to use. + + The dimensions of the tensor with which to multiply can be provided as + `dims`, or the dimensions to exclude from `[0, ..., self.ndims]` can be + specified using `exclude_dims`. + + Parameters + ---------- + vector: + Vector or vectors to multiple by. + dims: + Dimensions to multiply against. + exclude_dims: + Use all dimensions but these. + + Returns + ------- + Sumtensor containing individual products or a single sum if every + product is a single value. + + Examples + -------- + >>> T = ttb.tensor(np.array([[1, 2], [3, 4]])) + >>> S = ttb.sumtensor([T, T]) + >>> T.ttv(np.ones(2),0) + tensor of shape (2,) + data[:] = + [4. 6.] + >>> S.ttv(np.ones(2),0) # doctest: +NORMALIZE_WHITESPACE + sumtensor of shape (2,) with 2 parts: + Part 0: + tensor of shape (2,) + data[:] = + [4. 6.] + Part 1: + tensor of shape (2,) + data[:] = + [4. 6.] + >>> T.ttv([np.ones(2), np.ones(2)]) + 10.0 + >>> S.ttv([np.ones(2), np.ones(2)]) + 20.0 + """ + new_parts = [] + scalar_sum = 0.0 + for part in self.parts: + result = part.ttv(vector, dims, exclude_dims) + if isinstance(result, float): + scalar_sum += result + else: + new_parts.append(result) + if len(new_parts) == 0: + return scalar_sum + assert scalar_sum == 0.0 + return ttb.sumtensor(new_parts, copy=False) diff --git a/pyttb/tensor.py b/pyttb/tensor.py index a15cf4f2..41a0630f 100644 --- a/pyttb/tensor.py +++ b/pyttb/tensor.py @@ -1465,7 +1465,7 @@ def ttv( vector: Union[np.ndarray, List[np.ndarray]], dims: Optional[Union[int, np.ndarray]] = None, exclude_dims: Optional[Union[int, np.ndarray]] = None, - ) -> tensor: + ) -> Union[float, tensor]: """ Tensor times vector. @@ -1559,7 +1559,7 @@ def ttsv( vector: np.ndarray, skip_dim: Optional[int] = None, version: Optional[int] = None, - ) -> Union[np.ndarray, tensor]: + ) -> Union[float, np.ndarray, tensor]: """ Tensor times same vector in multiple modes. @@ -1597,8 +1597,10 @@ def ttsv( if version == 1: # Calculate the old way P = self.ndims X = np.array([vector for i in range(P)]) - if skip_dim in (0, 1): # Return scalar or matrix - return self.ttv(X, exclude_dims=exclude_dims).double() + if skip_dim in (0, 1): # Return matrix + result = self.ttv(X, exclude_dims=exclude_dims) + assert not isinstance(result, float) + return result.double() return self.ttv(X, exclude_dims=exclude_dims) if version == 2 or version is None: # Calculate the new way diff --git a/pyttb/ttensor.py b/pyttb/ttensor.py index 62bf8617..93f7778f 100644 --- a/pyttb/ttensor.py +++ b/pyttb/ttensor.py @@ -397,6 +397,7 @@ def ttv( if remdims.size == 0: assert not isinstance(newcore, (ttb.tensor, ttb.sptensor)) return float(newcore) + assert not isinstance(newcore, float) return ttensor(newcore, [self.factor_matrices[dim] for dim in remdims]) def mttkrp(self, U: Union[ttb.ktensor, List[np.ndarray]], n: int) -> np.ndarray: diff --git a/tests/test_sumtensor.py b/tests/test_sumtensor.py index 3bcc70b1..d61167ac 100644 --- a/tests/test_sumtensor.py +++ b/tests/test_sumtensor.py @@ -9,6 +9,24 @@ import pyttb as ttb +@pytest.fixture() +def example_ttensor(): + """Simple TTENSOR to verify by hand""" + core_values = np.ones((2, 2)) + core = ttb.tensor(core_values) + factors = [np.ones((2, 2))] * len(core_values.shape) + return ttb.ttensor(core, factors) + + +@pytest.fixture() +def example_kensor(): + """Simple KTENSOR to verify by hand""" + weights = np.array([1.0, 2.0]) + fm0 = np.array([[1.0, 2.0], [3.0, 4.0]]) + fm1 = np.array([[5.0, 6.0], [7.0, 8.0]]) + return ttb.ktensor([fm0, fm1], weights) + + def test_sumtensor_initialization_init(): # Basic smoke test T1 = ttb.tenones((3, 4, 5)) @@ -101,11 +119,8 @@ def test_sumtensor_add_sptensors(): assert len((S1 + [T1, T1]).parts) == 3 -def test_sumtensor_add_ktensors(): - weights = np.array([1.0, 2.0]) - fm0 = np.array([[1.0, 2.0], [3.0, 4.0]]) - fm1 = np.array([[5.0, 6.0], [7.0, 8.0]]) - K = ttb.ktensor([fm0, fm1], weights) +def test_sumtensor_add_ktensors(example_kensor): + K = example_kensor S1 = ttb.sumtensor([K]) assert len(S1.parts) == 1 assert len((S1 + K).parts) == 2 @@ -113,11 +128,8 @@ def test_sumtensor_add_ktensors(): assert len((S1 + [K, K]).parts) == 3 -def test_sumtensor_add_ttensors(): - core_values = np.ones((2, 2, 2)) - core = ttb.tensor(core_values) - factors = [np.ones((1, 2))] * len(core_values.shape) - K = ttb.ttensor(core, factors) +def test_sumtensor_add_ttensors(example_ttensor): + K = example_ttensor S1 = ttb.sumtensor([K]) assert len(S1.parts) == 1 assert len((S1 + K).parts) == 2 @@ -133,37 +145,32 @@ def test_sumtensor_add_incorrect_type(): S1 + non_tensor_object -def test_sumtensor_full_double(): +def test_sumtensor_full_double(example_ttensor, example_kensor): T1 = ttb.tenones((2, 2)) T2 = ttb.sptensor(shape=(2, 2)) - weights = np.array([1.0, 2.0]) - fm0 = np.array([[1.0, 2.0], [3.0, 4.0]]) - fm1 = np.array([[5.0, 6.0], [7.0, 8.0]]) - K = ttb.ktensor([fm0, fm1], weights) - # TODO there are probably already fixtures for these values - core_values = np.ones((2, 2)) - core = ttb.tensor(core_values) - factors = [np.ones((2, 2))] * len(core_values.shape) - TT = ttb.ttensor(core, factors) + K = example_kensor + TT = example_ttensor S = ttb.sumtensor([T1, T2, K, TT]) # Smoke test that all type combine assert isinstance(S.full(), ttb.tensor) assert isinstance(S.double(), np.ndarray) -def test_sumtensor_innerprod(): +def test_sumtensor_innerprod(example_ttensor, example_kensor): T1 = ttb.tenones((2, 2)) T2 = T1.to_sptensor() - weights = np.array([1.0, 2.0]) - fm0 = np.array([[1.0, 2.0], [3.0, 4.0]]) - fm1 = np.array([[5.0, 6.0], [7.0, 8.0]]) - K = ttb.ktensor([fm0, fm1], weights) - # TODO there are probably already fixtures for these values - core_values = np.ones((2, 2)) - core = ttb.tensor(core_values) - factors = [np.ones((2, 2))] * len(core_values.shape) - TT = ttb.ttensor(core, factors) + K = example_kensor + TT = example_ttensor S = ttb.sumtensor([T1, T2, K, TT]) result = S.innerprod(T1) expected = sum(part.innerprod(T1) for part in S.parts) assert result == expected + + +def test_sumtensor_ttv(example_ttensor, example_kensor): + T1 = ttb.tenones((2, 2)) + T2 = ttb.sptensor(shape=(2, 2)) + K = example_kensor + TT = example_ttensor + S = ttb.sumtensor([T1, T2, K, TT]) + S.ttv(np.ones(2), 0) From 144b4142126b394d1664fd4fbb01b7a5994056e5 Mon Sep 17 00:00:00 2001 From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com> Date: Sun, 15 Oct 2023 19:06:26 -0400 Subject: [PATCH 08/10] SUMTENSOR: Add autodoc --- docs/source/sumtensor.rst | 8 +++++++- docs/source/tensor_classes.rst | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/source/sumtensor.rst b/docs/source/sumtensor.rst index 4851133c..fcaf8d00 100644 --- a/docs/source/sumtensor.rst +++ b/docs/source/sumtensor.rst @@ -1,2 +1,8 @@ pyttb.sumtensor -======================= \ No newline at end of file +----------------------- + +.. autoclass:: pyttb.sumtensor + :members: + :special-members: + :exclude-members: __dict__, __weakref__, __slots__, __init__ + :show-inheritance: \ No newline at end of file diff --git a/docs/source/tensor_classes.rst b/docs/source/tensor_classes.rst index 70d93643..f9c17802 100644 --- a/docs/source/tensor_classes.rst +++ b/docs/source/tensor_classes.rst @@ -6,6 +6,7 @@ Tensor Classes ktensor.rst sptensor.rst + sumtensor.rst tensor.rst ttensor.rst tenmat.rst From 56b5cbf0666f219818cf0c847b7dfab0f651753a Mon Sep 17 00:00:00 2001 From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com> Date: Sun, 15 Oct 2023 20:22:51 -0400 Subject: [PATCH 09/10] SUMTENSOR: Add tutorial and supported fixes --- docs/source/tutorial/class_sumtensor.ipynb | 345 +++++++++++++++++++++ pyttb/cp_als.py | 5 +- pyttb/sumtensor.py | 22 +- tests/test_sumtensor.py | 15 + 4 files changed, 385 insertions(+), 2 deletions(-) create mode 100644 docs/source/tutorial/class_sumtensor.ipynb diff --git a/docs/source/tutorial/class_sumtensor.ipynb b/docs/source/tutorial/class_sumtensor.ipynb new file mode 100644 index 00000000..1338fee9 --- /dev/null +++ b/docs/source/tutorial/class_sumtensor.ipynb @@ -0,0 +1,345 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a2683b4e", + "metadata": {}, + "source": [ + "# Sum of Structured Tensors\n", + "```\n", + "Copyright 2022 National Technology & Engineering Solutions of Sandia,\n", + "LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the\n", + "U.S. Government retains certain rights in this software.\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "6c96f0b4", + "metadata": {}, + "source": [ + "When certain operations are performed on a tensor which is formed as a sum of tensors, it can be beneficial to avoid explicitly forming the sum. For example, if a tensor is formed as a sum of a low rank tensor and a sparse tensor, the structure of the summands can make storage, decomposition and operations with other tensors significantly more efficient. A `sumtensor` exploits this structure. Here we explain the basics of defining and using sumtensors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c2ad211", + "metadata": {}, + "outputs": [], + "source": [ + "import pyttb as ttb\n", + "import numpy as np\n", + "import pickle" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a808d286", + "metadata": {}, + "outputs": [], + "source": [ + "def estimate_mem_usage(variable) -> int:\n", + " \"\"\"\n", + " Python variables contain references to memory.\n", + " Quickly estimate memory usage of custom types.\n", + " \"\"\"\n", + " return len(pickle.dumps(variable))" + ] + }, + { + "cell_type": "markdown", + "id": "d169e48b", + "metadata": {}, + "source": [ + "## Creating sumtensors\n", + "A sumtensor `T` can only be delared as a sum of same-shaped tensors T1, T2,...,TN. The summand tensors are stored internally, which define the \"parts\" of the `sumtensor`. The parts of a `sumtensor` can be (dense) tensors (`tensor`), sparse tensors (` sptensor`), Kruskal tensors (`ktensor`), or Tucker tensors (`ttensor`). An example of the use of the sumtensor constructor follows." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75797e16", + "metadata": {}, + "outputs": [], + "source": [ + "T1 = ttb.tenones((3, 3, 3))\n", + "T2 = ttb.sptensor(\n", + " subs=np.array([[0, 0, 0], [1, 1, 1], [2, 2, 1], [1, 0, 0]]),\n", + " vals=np.ones((4, 1)),\n", + " shape=(3, 3, 3),\n", + ")\n", + "T = ttb.sumtensor([T1, T2])\n", + "print(T)" + ] + }, + { + "cell_type": "markdown", + "id": "0a17f6a8", + "metadata": {}, + "source": [ + "## A Magnitude Example\n", + "For large-scale problems, the `sumtensor` class may make the difference as to whether or not a tensor can be stored in memory. Consider the following example, where $\\mathcal{T}$ is of size $1000 x 1000 x 1000$, formed from the sum of a `ktensor` and an `sptensor`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1215e3d2", + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(0)\n", + "X1 = np.random.rand(50, 3)\n", + "X2 = np.random.rand(50, 3)\n", + "X3 = np.random.rand(50, 3)\n", + "K = ttb.ktensor([X1, X2, X3], np.ones((3,)), copy=False)\n", + "S = ttb.sptenrand((50, 50, 50), 1e-100)\n", + "\n", + "ST = ttb.sumtensor([K, S])\n", + "TT = ST.full()\n", + "print(\n", + " f\"Size of sumtensor: {estimate_mem_usage(ST)}\\n\"\n", + " f\"Size of tensor: {estimate_mem_usage(TT)}\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "9650dccc", + "metadata": {}, + "source": [ + "## Further examples of the sumtensor constructor\n", + "We can declare an empty sumtensor, with no parts." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36ea6eb5", + "metadata": {}, + "outputs": [], + "source": [ + "P = ttb.sumtensor()\n", + "print(P)" + ] + }, + { + "cell_type": "markdown", + "id": "346374a7", + "metadata": {}, + "source": [ + "`sumtensor` also supports a copy constructor." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f309064", + "metadata": {}, + "outputs": [], + "source": [ + "S = P.copy()\n", + "print(S)" + ] + }, + { + "cell_type": "markdown", + "id": "c28b7cc6", + "metadata": {}, + "source": [ + "## Ndims and shape for dimensions of a sumtensor\n", + "For a given `sumtensor`, `ndims` returns the number of modes and `shape` returns the shape in each dimension of the `sumtensor`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e484181", + "metadata": {}, + "outputs": [], + "source": [ + "print(f\"Ndims: {T.ndims}\\n\" f\"Shape: {T.shape}\")" + ] + }, + { + "cell_type": "markdown", + "id": "ec82fc57", + "metadata": {}, + "source": [ + "## Use full to convert sumtensor to a dense tensor\n", + "The `full` method can convert all the parts of a `sumtensor` to a dense tensor. Note that for large tensors, this can use a large amount of memory to expand then sum the parts." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d247884d", + "metadata": {}, + "outputs": [], + "source": [ + "print(T.full())" + ] + }, + { + "cell_type": "markdown", + "id": "b1cfbffa", + "metadata": {}, + "source": [ + "## Use double to convert to a numpy array\n", + "The `double` method can convert the parts of a `sumtensor` to a dense numpy array. Similar warnings for memory usages as `full`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3be2ba03", + "metadata": {}, + "outputs": [], + "source": [ + "print(T.double())" + ] + }, + { + "cell_type": "markdown", + "id": "bd9f0550", + "metadata": {}, + "source": [ + "## Matricized Khatri-Rao product of a sumtensor\n", + "The `mttkrp` method computes the Khatri-Rao product of a matricized tensor and `sumtensor`. The required arguments are:\n", + "* A list of matrices (or a `ktensor`)\n", + "* A mode n\n", + "The list of matrices must consist of m matrices, where m is the number of modes in the `sumtensor`. The number of columns in all matrices should be the same and the number of rows of matrix i should match the dimension of the `sumtensor` shape in mode i. For more details see the documentation of `tensor.mttkrp`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3dd198fa", + "metadata": {}, + "outputs": [], + "source": [ + "matrices = [np.eye(3), np.ones((3, 3)), np.random.rand(3, 3)]\n", + "n = 1\n", + "T.mttkrp(matrices, n)" + ] + }, + { + "cell_type": "markdown", + "id": "dd35fc03", + "metadata": {}, + "source": [ + "## Innerproducts of sumtensors\n", + "The `innerprod` method computes the inner product of a `sumtensors` parts with other tensor types." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dfa6d728", + "metadata": {}, + "outputs": [], + "source": [ + "S = ttb.sptensor(\n", + " subs=np.array([[0, 0, 0], [1, 1, 1], [2, 2, 1], [1, 0, 0]]),\n", + " vals=np.ones((4, 1)),\n", + " shape=(3, 3, 3),\n", + ")\n", + "T.innerprod(S)" + ] + }, + { + "cell_type": "markdown", + "id": "802902bb", + "metadata": {}, + "source": [ + "## Norm compatibility interface\n", + "The `norm` method just returns 0 and issues a warning. Norm cannot be distributed, but some algorithms access the norm for verbose details." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f945dea9", + "metadata": {}, + "outputs": [], + "source": [ + "T.norm()" + ] + }, + { + "cell_type": "markdown", + "id": "4647329a", + "metadata": {}, + "source": [ + "## Use CP-ALS with sumtensor\n", + "One of the primary motivations for defining the `sumtensor` class is for efficient decomposition. In particular, when trying to find a CP decomposition of a tensor using alternating least squares, the subproblems can be efficiently created and solved using mttkrp and innerprod. Both of these operations can be performed more efficiently by exploiting extra structure in the tensors which form the sum, so the performance of `cp_als` is also improved. Consider the following example, where a cp_als is run on a sumtensor." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8d980252", + "metadata": {}, + "outputs": [], + "source": [ + "result, _, _ = ttb.cp_als(T, 2, maxiters=10)\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "id": "b93b33f9", + "metadata": {}, + "source": [ + "It follows that in cases where $\\mathcal{T}$ is too large for its full expansion to be stored in memory, we may still be able find a CP decomposition by exploiting the sumtensor structure.\n", + "\n", + "_Note_ that the fit returned by `cp_als` is not correct for `sumtensor`, because the norm operation is not supported." + ] + }, + { + "cell_type": "markdown", + "id": "3b3793eb", + "metadata": {}, + "source": [ + "## Addition with sumtensors\n", + "Sumtensors can be added to any other type of tensor. The result is a new `sumtensor` with the tensor appended to the parts of the original `sumtensor`. Note that the tensor is always appended, despite the order of the operation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3514aa4b", + "metadata": {}, + "outputs": [], + "source": [ + "# Equivalent additions despite the order\n", + "print(f\"T+S:\\n{T+S}\\n\")\n", + "print(f\"S+T:\\n{S+T}\")" + ] + }, + { + "cell_type": "markdown", + "id": "bd401c2d", + "metadata": {}, + "source": [ + "## Accessing sumtensor parts\n", + "Subscripted reference can be used to access individual parts of the `sumtensor`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "47938e4f", + "metadata": {}, + "outputs": [], + "source": [ + "print(f\"Part 0:\\n{T.parts[0]}\\n\\n\" f\"Part 1:\\n{T.parts[1]}\")" + ] + } + ], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pyttb/cp_als.py b/pyttb/cp_als.py index ca263031..c7ae2c67 100644 --- a/pyttb/cp_als.py +++ b/pyttb/cp_als.py @@ -12,7 +12,7 @@ def cp_als( # noqa: PLR0912,PLR0913,PLR0915 - input_tensor: Union[ttb.tensor, ttb.sptensor, ttb.ttensor], + input_tensor: Union[ttb.tensor, ttb.sptensor, ttb.ttensor, ttb.sumtensor], rank: int, stoptol: float = 1e-4, maxiters: int = 1000, @@ -156,6 +156,9 @@ def cp_als( # noqa: PLR0912,PLR0913,PLR0915 ) init = ttb.ktensor(factor_matrices) elif isinstance(init, str) and init.lower() == "nvecs": + assert not isinstance( + input_tensor, ttb.sumtensor + ), "Sumtensor doesn't support nvecs" factor_matrices = [] for n in range(N): factor_matrices.append(input_tensor.nvecs(n, rank)) diff --git a/pyttb/sumtensor.py b/pyttb/sumtensor.py index 1e1dd690..4dbf8043 100644 --- a/pyttb/sumtensor.py +++ b/pyttb/sumtensor.py @@ -4,6 +4,7 @@ # U.S. Government retains certain rights in this software. from __future__ import annotations +import warnings from copy import deepcopy from textwrap import indent from typing import List, Optional, Tuple, Union @@ -21,7 +22,9 @@ class sumtensor: def __init__( self, - tensors: List[Union[ttb.tensor, ttb.sptensor, ttb.ktensor, ttb.ttensor]], + tensors: Optional[ + List[Union[ttb.tensor, ttb.sptensor, ttb.ktensor, ttb.ttensor]] + ] = None, copy: bool = True, ): """ @@ -44,6 +47,12 @@ def __init__( >>> T2 = ttb.sptensor(shape=(3,4,5)) >>> S = ttb.sumtensor([T1, T2]) """ + if tensors is None: + tensors = [] + assert isinstance(tensors, list), ( + "Collection of tensors must be provided as a list " + f"but received: {type(tensors)}" + ) assert all( tensors[0].shape == tensor_i.shape for tensor_i in tensors[1:] ), "All tensors must be the same shape" @@ -77,6 +86,8 @@ def __deepcopy__(self, memo): @property def shape(self) -> Tuple[int, ...]: + if len(self.parts) == 0: + return () return self.parts[0].shape def __repr__(self): @@ -101,6 +112,8 @@ def __repr__(self): Part 1: All-zero sparse tensor of shape 2 x 2 """ + if len(self.parts) == 0: + return "Empty sumtensor" s = f"sumtensor of shape {self.shape} with {len(self.parts)} parts:" for i, part in enumerate(self.parts): s += f"\nPart {i}: \n" @@ -407,3 +420,10 @@ def ttv( return scalar_sum assert scalar_sum == 0.0 return ttb.sumtensor(new_parts, copy=False) + + def norm(self) -> float: + """Compatibility Interface. Just returns 0""" + warnings.warn( + "Sumtensor doesn't actually support norm. " "Returning 0 for compatibility." + ) + return 0.0 diff --git a/tests/test_sumtensor.py b/tests/test_sumtensor.py index d61167ac..004d40da 100644 --- a/tests/test_sumtensor.py +++ b/tests/test_sumtensor.py @@ -28,6 +28,10 @@ def example_kensor(): def test_sumtensor_initialization_init(): + # Create empty sumtensor + S = ttb.sumtensor() + assert len(S.parts) == 0 + # Basic smoke test T1 = ttb.tenones((3, 4, 5)) T2 = ttb.sptensor(shape=(3, 4, 5)) @@ -56,6 +60,9 @@ def test_sumtensor_initialization_shape(): S = ttb.sumtensor([T1, T2]) assert S.shape == shape + # Empty case + assert ttb.sumtensor().shape == () + def test_sumtensor_initialization_str(): shape = (3, 4, 5) @@ -65,6 +72,8 @@ def test_sumtensor_initialization_str(): assert str(S) == S.__repr__() assert f"sumtensor of shape {shape}" in str(S) + assert "Empty sumtensor" in str(ttb.sumtensor()) + def test_sumtensor_deepcopy(): T1 = ttb.tenones((3, 4, 5)) @@ -174,3 +183,9 @@ def test_sumtensor_ttv(example_ttensor, example_kensor): TT = example_ttensor S = ttb.sumtensor([T1, T2, K, TT]) S.ttv(np.ones(2), 0) + + +def test_sumtensor_initialization_norm(): + with pytest.warns(Warning): + norm = ttb.sumtensor().norm() + assert norm == 0.0 From 2e90e31460ed6f4b1469320f431fadeb9f2a77fa Mon Sep 17 00:00:00 2001 From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com> Date: Sun, 15 Oct 2023 20:32:06 -0400 Subject: [PATCH 10/10] SUMTENSOR: Fix formatting on tutorial --- docs/source/tutorial/class_sumtensor.ipynb | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/tutorial/class_sumtensor.ipynb b/docs/source/tutorial/class_sumtensor.ipynb index 1338fee9..a554999d 100644 --- a/docs/source/tutorial/class_sumtensor.ipynb +++ b/docs/source/tutorial/class_sumtensor.ipynb @@ -210,6 +210,7 @@ "The `mttkrp` method computes the Khatri-Rao product of a matricized tensor and `sumtensor`. The required arguments are:\n", "* A list of matrices (or a `ktensor`)\n", "* A mode n\n", + "\n", "The list of matrices must consist of m matrices, where m is the number of modes in the `sumtensor`. The number of columns in all matrices should be the same and the number of rows of matrix i should match the dimension of the `sumtensor` shape in mode i. For more details see the documentation of `tensor.mttkrp`." ] },