From 59ec80b50e342e92e4f5ecfcb17a6bfef6764462 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Mon, 8 Mar 2021 14:15:22 -0500 Subject: [PATCH 1/2] Add repr for Column and ColumnAccessor --- python/cudf/cudf/core/column/column.py | 7 +++++++ python/cudf/cudf/core/column_accessor.py | 22 ++++++++++++++-------- python/cudf/cudf/utils/cudautils.py | 5 +++-- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 7e7b39816d8..2bb35c97d7c 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -109,6 +109,13 @@ def mask_array_view(self) -> "cuda.devicearray.DeviceNDArray": def __len__(self) -> int: return self.size + def __repr__(self): + return ( + f"{object.__repr__(self)}\n" + f"{self.to_arrow().to_string()}\n" + f"dtype: {self.dtype}" + ) + def to_pandas( self, index: ColumnLike = None, nullable: bool = False, **kwargs ) -> "pd.Series": diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py index ad1a0c80ef5..95ffb7d9d8e 100644 --- a/python/cudf/cudf/core/column_accessor.py +++ b/python/cudf/cudf/core/column_accessor.py @@ -16,6 +16,7 @@ ) import pandas as pd +import pyarrow as pa import cudf from cudf.utils.utils import ( @@ -86,15 +87,20 @@ def __len__(self) -> int: return len(self._data) def __repr__(self) -> str: - data_repr = self._data.__repr__() - multiindex_repr = self.multiindex.__repr__() - level_names_repr = self.level_names.__repr__() - return "{}({}, multiindex={}, level_names={})".format( - self.__class__.__name__, - data_repr, - multiindex_repr, - level_names_repr, + type_info = ( + f"{self.__class__.__name__}(" + f"multiindex={self.multiindex}, " + f"level_names={self.level_names})" ) + column_info = "\n".join( + [f"{name}: {col.dtype}" for name, col in self.items()] + ) + return f"{type_info}\n{column_info}" + + def _to_arrow_table(self): + arrays = [col.to_arrow() for col in self.values()] + names = list(self.keys()) + return pa.Table.from_arrays(arrays, names) @property def level_names(self) -> Tuple[Any, ...]: diff --git a/python/cudf/cudf/utils/cudautils.py b/python/cudf/cudf/utils/cudautils.py index f62ca862091..722e0b12183 100755 --- a/python/cudf/cudf/utils/cudautils.py +++ b/python/cudf/cudf/utils/cudautils.py @@ -1,9 +1,10 @@ # Copyright (c) 2018-2021, NVIDIA CORPORATION. +from pickle import dumps + import cachetools import cupy import numpy as np from numba import cuda -from pickle import dumps import cudf from cudf.utils.utils import check_equals_float, check_equals_int @@ -239,7 +240,7 @@ def grouped_window_sizes_from_offset(arr, group_starts, offset): # it can hit for distinct functions that are similar. The lru_cache wrapping # compile_udf misses for these similar functions, but doesn't need to serialize # closure variables to check for a hit. -_udf_code_cache = cachetools.LRUCache(maxsize=32) +_udf_code_cache: cachetools.LRUCache = cachetools.LRUCache(maxsize=32) def compile_udf(udf, type_signature): From 0afb9a2f7ba2a785e68885215f51175b267b2540 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Mon, 8 Mar 2021 14:28:29 -0500 Subject: [PATCH 2/2] Don't need to_arrow_table --- python/cudf/cudf/core/column_accessor.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py index 95ffb7d9d8e..03743e4464b 100644 --- a/python/cudf/cudf/core/column_accessor.py +++ b/python/cudf/cudf/core/column_accessor.py @@ -16,7 +16,6 @@ ) import pandas as pd -import pyarrow as pa import cudf from cudf.utils.utils import ( @@ -97,11 +96,6 @@ def __repr__(self) -> str: ) return f"{type_info}\n{column_info}" - def _to_arrow_table(self): - arrays = [col.to_arrow() for col in self.values()] - names = list(self.keys()) - return pa.Table.from_arrays(arrays, names) - @property def level_names(self) -> Tuple[Any, ...]: if self._level_names is None or len(self._level_names) == 0: