diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index 08fd3f73017..60530d10280 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -113,8 +113,9 @@ def binary_operator( for lists concatenation functions reflect : boolean, default False - If ``reflect`` is ``True``, swap the order of - the operands. + If ``True``, swap the order of the operands. See + https://docs.python.org/3/reference/datamodel.html#object.__ror__ + for more information on when this is necessary. Returns ------- diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index bfa604ec688..4239a55118f 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -974,6 +974,13 @@ def __array_function__(self, func, types, args, kwargs): # Handle case if cudf_func is same as numpy function if cudf_func is func: return NotImplemented + # numpy returns an array from the dot product of two dataframes + elif ( + func is np.dot + and isinstance(args[0], (DataFrame, pd.DataFrame)) + and isinstance(args[1], (DataFrame, pd.DataFrame)) + ): + return cudf_func(*args, **kwargs).values else: return cudf_func(*args, **kwargs) else: @@ -1657,8 +1664,9 @@ def add(self, other, axis="columns", level=None, fill_value=None): fill_value for missing data in one of the inputs. With reverse version, `radd`. - Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`) to - arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`. + Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`, + `dot`) to arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`, + `@`. Parameters ---------- @@ -1803,8 +1811,9 @@ def radd(self, other, axis=1, level=None, fill_value=None): fill_value for missing data in one of the inputs. With reverse version, `add`. - Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`) to - arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`. + Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`, + `dot`) to arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`, + `@`. Parameters ---------- @@ -1856,8 +1865,9 @@ def sub(self, other, axis="columns", level=None, fill_value=None): fill_value for missing data in one of the inputs. With reverse version, `rsub`. - Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`) to - arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`. + Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`, + `dot`) to arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`, + `@`. Parameters ---------- @@ -1909,8 +1919,9 @@ def rsub(self, other, axis="columns", level=None, fill_value=None): fill_value for missing data in one of the inputs. With reverse version, `sub`. - Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`) to - arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`. + Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`, + `dot`) to arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`, + `@`. Parameters ---------- @@ -1967,8 +1978,9 @@ def mul(self, other, axis="columns", level=None, fill_value=None): fill_value for missing data in one of the inputs. With reverse version, `rmul`. - Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`) to - arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`. + Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`, + `dot`) to arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`, + `@`. Parameters ---------- @@ -2022,8 +2034,9 @@ def rmul(self, other, axis="columns", level=None, fill_value=None): fill_value for missing data in one of the inputs. With reverse version, `mul`. - Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`) to - arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`. + Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`, + `dot`) to arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`, + `@`. Parameters ---------- @@ -2077,8 +2090,9 @@ def mod(self, other, axis="columns", level=None, fill_value=None): fill_value for missing data in one of the inputs. With reverse version, `rmod`. - Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`) to - arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`. + Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`, + `dot`) to arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`, + `@`. Parameters ---------- @@ -2130,8 +2144,9 @@ def rmod(self, other, axis="columns", level=None, fill_value=None): fill_value for missing data in one of the inputs. With reverse version, `mod`. - Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`) to - arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`. + Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`, + `dot`) to arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`, + `@`. Parameters ---------- @@ -2183,8 +2198,9 @@ def pow(self, other, axis="columns", level=None, fill_value=None): fill_value for missing data in one of the inputs. With reverse version, `rpow`. - Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`) to - arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`. + Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`, + `dot`) to arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`, + `@`. Parameters ---------- @@ -2236,8 +2252,9 @@ def rpow(self, other, axis="columns", level=None, fill_value=None): fill_value for missing data in one of the inputs. With reverse version, `pow`. - Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`) to - arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`. + Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`, + `dot`) to arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`, + `@`. Parameters ---------- @@ -2289,8 +2306,9 @@ def floordiv(self, other, axis="columns", level=None, fill_value=None): fill_value for missing data in one of the inputs. With reverse version, `rfloordiv`. - Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`) to - arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`. + Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`, + `dot`) to arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`, + `@`. Parameters ---------- @@ -2342,8 +2360,9 @@ def rfloordiv(self, other, axis="columns", level=None, fill_value=None): a fill_value for missing data in one of the inputs. With reverse version, `floordiv`. - Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`) to - arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`. + Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`, + `dot`) to arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`, + `@`. Parameters ---------- @@ -2405,8 +2424,9 @@ def truediv(self, other, axis="columns", level=None, fill_value=None): fill_value for missing data in one of the inputs. With reverse version, `rtruediv`. - Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`) to - arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`. + Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`, + `dot`) to arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`, + `@`. Parameters ---------- @@ -2466,8 +2486,9 @@ def rtruediv(self, other, axis="columns", level=None, fill_value=None): fill_value for missing data in one of the inputs. With reverse version, `truediv`. - Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`) to - arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`. + Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`, + `dot`) to arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`, + `@`. Parameters ---------- diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 33be14462d4..037f6f7ff94 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -3237,8 +3237,9 @@ def _binaryop( The value to replace null values with. If ``None``, nulls are not filled before the operation. reflect : bool, default False - If ``True`` the operation is reflected (i.e whether to swap the - left and right operands). + If ``True``, swap the order of the operands. See + https://docs.python.org/3/reference/datamodel.html#object.__ror__ + for more information on when this is necessary. Returns ------- @@ -3406,6 +3407,66 @@ def _colwise_binop( return output + def dot(self, other, reflect=False): + """ + Get dot product of frame and other, (binary operator `dot`). + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`, + `dot`) to arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`, + `@`. + + Parameters + ---------- + other : Sequence, Series, or DataFrame + Any multiple element data structure, or list-like object. + reflect : bool, default False + If ``True``, swap the order of the operands. See + https://docs.python.org/3/reference/datamodel.html#object.__ror__ + for more information on when this is necessary. + + Returns + ------- + scalar, Series, or DataFrame + The result of the operation. + + Examples + -------- + >>> import cudf + >>> df = cudf.DataFrame([[1, 2, 3, 4], + ... [5, 6, 7, 8]]) + >>> df @ df.T + 0 1 + 0 30 70 + 1 70 174 + >>> s = cudf.Series([1, 1, 1, 1]) + >>> df @ s + 0 10 + 1 26 + dtype: int64 + >>> [1, 2, 3, 4] @ s + 10 + """ + lhs = self.values + if isinstance(other, Frame): + rhs = other.values + elif isinstance(other, cupy.ndarray): + rhs = other + elif isinstance( + other, (abc.Sequence, np.ndarray, pd.DataFrame, pd.Series) + ): + rhs = cupy.asarray(other) + else: + return NotImplemented + if reflect: + lhs, rhs = rhs, lhs + + result = lhs.dot(rhs) + if len(result.shape) == 1: + return cudf.Series(result) + if len(result.shape) == 2: + return cudf.DataFrame(result) + return result.item() + # Binary arithmetic operations. def __add__(self, other): return self._binaryop(other, "add") @@ -3419,6 +3480,12 @@ def __sub__(self, other): def __rsub__(self, other): return self._binaryop(other, "sub", reflect=True) + def __matmul__(self, other): + return self.dot(other) + + def __rmatmul__(self, other): + return self.dot(other, reflect=True) + def __mul__(self, other): return self._binaryop(other, "mul") @@ -4923,8 +4990,9 @@ def _make_operands_for_binop( The value to replace null values with. If ``None``, nulls are not filled before the operation. reflect : bool, default False - If ``True`` the operation is reflected (i.e whether to swap the - left and right operands). + If ``True``, swap the order of the operands. See + https://docs.python.org/3/reference/datamodel.html#object.__ror__ + for more information on when this is necessary. Returns ------- diff --git a/python/cudf/cudf/tests/test_array_function.py b/python/cudf/cudf/tests/test_array_function.py index cd4dd28f179..ecd13b57ca4 100644 --- a/python/cudf/cudf/tests/test_array_function.py +++ b/python/cudf/cudf/tests/test_array_function.py @@ -43,7 +43,12 @@ def test_array_func_cudf_series(np_ar, func): ) @pytest.mark.parametrize( "func", - [lambda x: np.mean(x), lambda x: np.sum(x), lambda x: np.var(x, ddof=1)], + [ + lambda x: np.mean(x), + lambda x: np.sum(x), + lambda x: np.var(x, ddof=1), + lambda x: np.dot(x, x.transpose()), + ], ) def test_array_func_cudf_dataframe(pd_df, func): cudf_df = cudf.from_pandas(pd_df) @@ -60,7 +65,6 @@ def test_array_func_cudf_dataframe(pd_df, func): "func", [ lambda x: np.cov(x, x), - lambda x: np.dot(x, x), lambda x: np.linalg.norm(x), lambda x: np.linalg.det(x), ], @@ -74,7 +78,7 @@ def test_array_func_missing_cudf_dataframe(pd_df, func): # we only implement sum among all numpy non-ufuncs @pytest.mark.skipif(missing_arrfunc_cond, reason=missing_arrfunc_reason) @pytest.mark.parametrize("np_ar", [np.random.random(100)]) -@pytest.mark.parametrize("func", [lambda x: np.sum(x)]) +@pytest.mark.parametrize("func", [lambda x: np.sum(x), lambda x: np.dot(x, x)]) def test_array_func_cudf_index(np_ar, func): cudf_index = cudf.core.index.as_index(cudf.Series(np_ar)) expect = func(np_ar) @@ -88,7 +92,6 @@ def test_array_func_cudf_index(np_ar, func): "func", [ lambda x: np.cov(x, x), - lambda x: np.dot(x, x), lambda x: np.linalg.norm(x), lambda x: np.linalg.det(x), ], diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index abdac07d65d..f8063408e28 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -2915,3 +2915,43 @@ def test_empty_column(binop, data, scalar): expected = binop(pdf, scalar) utils.assert_eq(expected, got) + + +@pytest.mark.parametrize( + "df", + [ + cudf.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]]), + pytest.param( + cudf.DataFrame([[1, None, None, 4], [5, 6, 7, None]]), + marks=pytest.mark.xfail( + reason="Cannot access Frame.values if frame contains nulls" + ), + ), + cudf.DataFrame([[1.2, 2.3, 3.4, 4.5], [5.6, 6.7, 7.8, 8.9]]), + cudf.Series([14, 15, 16, 17]), + cudf.Series([14.15, 15.16, 16.17, 17.18]), + ], +) +@pytest.mark.parametrize( + "other", + [ + cudf.DataFrame([[9, 10], [11, 12], [13, 14], [15, 16]]), + cudf.DataFrame( + [[9.4, 10.5], [11.6, 12.7], [13.8, 14.9], [15.1, 16.2]] + ), + cudf.Series([5, 6, 7, 8]), + cudf.Series([5.6, 6.7, 7.8, 8.9]), + pd.DataFrame([[9, 10], [11, 12], [13, 14], [15, 16]]), + pd.Series([5, 6, 7, 8]), + np.array([5, 6, 7, 8]), + [25.5, 26.6, 27.7, 28.8], + ], +) +def test_binops_dot(df, other): + pdf = df.to_pandas() + host_other = other.to_pandas() if hasattr(other, "to_pandas") else other + + expected = pdf @ host_other + got = df @ other + + utils.assert_eq(expected, got)