From f232ef1c369c877105f442f4ece4846b551d4c15 Mon Sep 17 00:00:00 2001 From: Nick Becker Date: Thu, 19 Sep 2019 10:48:53 -0400 Subject: [PATCH 1/3] add series/dataframe notnull for dask compatibility, an alias for notna --- python/cudf/cudf/core/dataframe.py | 17 +++++++++++------ python/cudf/cudf/core/series.py | 5 +++++ python/cudf/cudf/tests/test_dataframe.py | 17 ++++++++++++++++- 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index c9b6a35c7cc..f90fdfc7116 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -3063,20 +3063,25 @@ def _create_output_frame(data, percentiles=None): return output_frame - def isnull(self, **kwargs): + def isnull(self): """Identify missing values in a DataFrame. """ - return self._apply_support_method("isnull", **kwargs) + return self._apply_support_method("isnull") - def isna(self, **kwargs): + def isna(self): """Identify missing values in a DataFrame. Alias for isnull. """ - return self.isnull(**kwargs) + return self.isnull() - def notna(self, **kwargs): + def notna(self): """Identify non-missing values in a DataFrame. """ - return self._apply_support_method("notna", **kwargs) + return self._apply_support_method("notna") + + def notnull(self): + """Identify non-missing values in a DataFrame. Alias for notna. + """ + return self.notna() def to_pandas(self): """ diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index d5f8f8e3523..0c7d086f01d 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -1287,6 +1287,11 @@ def notna(self): mask = cudautils.notna_mask(self.data, self.nullmask.mem) return Series(mask, name=self.name, index=self.index) + def notnull(self): + """Identify non-missing values in a Series. Alias for notna. + """ + return self.notna() + def nans_to_nulls(self): """ Convert nans (if any) to nulls diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index cfcf7e96ebc..016771d3a77 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -2527,7 +2527,7 @@ def test_isnull_isna(): assert_eq(ps.isna(), gs.isna()) -def test_notna(): +def test_notna_notnull(): # float & strings some missing ps = pd.DataFrame( { @@ -2538,12 +2538,16 @@ def test_notna(): gs = DataFrame.from_pandas(ps) assert_eq(ps.notna(), gs.notna()) assert_eq(ps.a.notna(), gs.a.notna()) + assert_eq(ps.notnull(), gs.notnull()) + assert_eq(ps.a.notnull(), gs.a.notnull()) # integer & string none missing ps = pd.DataFrame({"a": [0, 1, 2, 3, 4], "b": ["a", "b", "u", "h", "d"]}) gs = DataFrame.from_pandas(ps) assert_eq(ps.notna(), gs.notna()) assert_eq(ps.a.notna(), gs.a.notna()) + assert_eq(ps.notnull(), gs.notnull()) + assert_eq(ps.a.notnull(), gs.a.notnull()) # all missing ps = pd.DataFrame( @@ -2552,35 +2556,46 @@ def test_notna(): gs = DataFrame.from_pandas(ps) assert_eq(ps.notna(), gs.notna()) assert_eq(ps.a.notna(), gs.a.notna()) + assert_eq(ps.notnull(), gs.notnull()) + assert_eq(ps.a.notnull(), gs.a.notnull()) # empty ps = pd.DataFrame({"a": []}) gs = DataFrame.from_pandas(ps) assert_eq(ps.notna(), gs.notna()) assert_eq(ps.a.notna(), gs.a.notna()) + assert_eq(ps.notnull(), gs.notnull()) + assert_eq(ps.a.notnull(), gs.a.notnull()) # one missing ps = pd.DataFrame({"a": [np.nan], "b": [None]}) gs = DataFrame.from_pandas(ps) assert_eq(ps.notna(), gs.notna()) assert_eq(ps.a.notna(), gs.a.notna()) + assert_eq(ps.notnull(), gs.notnull()) + assert_eq(ps.a.notnull(), gs.a.notnull()) # strings missing ps = pd.DataFrame({"a": ["a", "b", "c", None, "e"]}) gs = DataFrame.from_pandas(ps) assert_eq(ps.notna(), gs.notna()) assert_eq(ps.a.notna(), gs.a.notna()) + assert_eq(ps.notnull(), gs.notnull()) + assert_eq(ps.a.notnull(), gs.a.notnull()) # strings none missing ps = pd.DataFrame({"a": ["a", "b", "c", "d", "e"]}) gs = DataFrame.from_pandas(ps) assert_eq(ps.notna(), gs.notna()) assert_eq(ps.a.notna(), gs.a.notna()) + assert_eq(ps.notnull(), gs.notnull()) + assert_eq(ps.a.notnull(), gs.a.notnull()) # unnamed series ps = pd.Series([0, 1, 2, np.nan, 4, None, 6]) gs = Series.from_pandas(ps) assert_eq(ps.notna(), gs.notna()) + assert_eq(ps.notnull(), gs.notnull()) def test_ndim(): From 66d60995d3e6c1a17d0f5b8358394a9b4bb257ce Mon Sep 17 00:00:00 2001 From: Nick Becker Date: Thu, 19 Sep 2019 10:53:16 -0400 Subject: [PATCH 2/3] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2eb580cd947..984a6000d51 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ - PR #2786 Add benchmarks option to root build.sh - PR #2773 Add Fisher's unbiased kurtosis and skew for Series/DataFrame - PR #2748 Parquet Reader: Add option to specify loading of PANDAS index +- PR #2844 ADd Series/DataFrame notnull ## Improvements From a0501f08b46d69acb4beb395a829346a5ad8d112 Mon Sep 17 00:00:00 2001 From: Keith Kraus Date: Thu, 19 Sep 2019 12:45:58 -0400 Subject: [PATCH 3/3] Fix typo in changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 984a6000d51..c223f28d258 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,7 +24,7 @@ - PR #2786 Add benchmarks option to root build.sh - PR #2773 Add Fisher's unbiased kurtosis and skew for Series/DataFrame - PR #2748 Parquet Reader: Add option to specify loading of PANDAS index -- PR #2844 ADd Series/DataFrame notnull +- PR #2844 Add Series/DataFrame notnull ## Improvements