From 9840bd26977e586193c4c66f854da60e50fb9233 Mon Sep 17 00:00:00 2001 From: Amanda Potts Date: Fri, 8 Mar 2024 16:18:15 -0500 Subject: [PATCH 1/2] Closes #3023 to_markdown --- PROTO_tests/tests/dataframe_test.py | 28 +++++++++++++ PROTO_tests/tests/series_test.py | 50 +++++++++++++++++++++++ arkouda/dataframe.py | 60 +++++++++++++++++++++++++++ arkouda/series.py | 63 +++++++++++++++++++++++++++++ tests/dataframe_test.py | 31 ++++++++++++++ tests/series_test.py | 52 ++++++++++++++++++++++++ 6 files changed, 284 insertions(+) diff --git a/PROTO_tests/tests/dataframe_test.py b/PROTO_tests/tests/dataframe_test.py index 4ba7bb27ad..a3d7b793d3 100644 --- a/PROTO_tests/tests/dataframe_test.py +++ b/PROTO_tests/tests/dataframe_test.py @@ -1015,6 +1015,34 @@ def test_memory_usage(self): ) assert_series_equal(ak_memory_usage.to_pandas(), pd_memory_usage) + def test_to_markdown(self): + df = ak.DataFrame({"animal_1": ["elk", "pig"], "animal_2": ["dog", "quetzal"]}) + assert ( + df.to_markdown() == "+----+------------+------------+\n" + "| | animal_1 | animal_2 |\n" + "+====+============+============+\n" + "| 0 | elk | dog |\n" + "+----+------------+------------+\n" + "| 1 | pig | quetzal |\n" + "+----+------------+------------+" + ) + + assert ( + df.to_markdown(index=False) == "+------------+------------+\n" + "| animal_1 | animal_2 |\n" + "+============+============+\n" + "| elk | dog |\n" + "+------------+------------+\n" + "| pig | quetzal |\n" + "+------------+------------+" + ) + + assert df.to_markdown(tablefmt="grid") == df.to_pandas().to_markdown(tablefmt="grid") + assert df.to_markdown(tablefmt="grid", index=False) == df.to_pandas().to_markdown( + tablefmt="grid", index=False + ) + assert df.to_markdown(tablefmt="jira") == df.to_pandas().to_markdown(tablefmt="jira") + def pda_to_str_helper(pda): return ak.array([f"str {i}" for i in pda.to_list()]) diff --git a/PROTO_tests/tests/series_test.py b/PROTO_tests/tests/series_test.py index d5f684ffe8..3433d61dea 100644 --- a/PROTO_tests/tests/series_test.py +++ b/PROTO_tests/tests/series_test.py @@ -244,3 +244,53 @@ def test_map(self): result = c.map({1.0: "a", 2.2: "b", 4.4: "c", 5.0: "d", 6.0: "e"}) assert result.index.values.to_list() == [5, 4, 2, 3, 1] assert result.values.to_list() == ["a", "a", "b", "b", "c"] + + def test_to_markdown(self): + s = ak.Series(["elk", "pig", "dog", "quetzal"], name="animal") + assert ( + s.to_markdown() == "+----+----------+\n" + "| | animal |\n" + "+====+==========+\n" + "| 0 | elk |\n" + "+----+----------+\n" + "| 1 | pig |\n" + "+----+----------+\n" + "| 2 | dog |\n" + "+----+----------+\n" + "| 3 | quetzal |\n" + "+----+----------+" + ) + + assert ( + s.to_markdown(index=False) == "+----------+\n" + "| animal |\n" + "+==========+\n" + "| elk |\n" + "+----------+\n" + "| pig |\n" + "+----------+\n" + "| dog |\n" + "+----------+\n" + "| quetzal |\n" + "+----------+" + ) + + assert ( + s.to_markdown(tablefmt="grid") == "+----+----------+\n" + "| | animal |\n" + "+====+==========+\n" + "| 0 | elk |\n" + "+----+----------+\n" + "| 1 | pig |\n" + "+----+----------+\n" + "| 2 | dog |\n" + "+----+----------+\n" + "| 3 | quetzal |\n" + "+----+----------+" + ) + + assert s.to_markdown(tablefmt="grid") == s.to_pandas().to_markdown(tablefmt="grid") + assert s.to_markdown(tablefmt="grid", index=False) == s.to_pandas().to_markdown( + tablefmt="grid", index=False + ) + assert s.to_markdown(tablefmt="jira") == s.to_pandas().to_markdown(tablefmt="jira") diff --git a/arkouda/dataframe.py b/arkouda/dataframe.py index 24b959489f..829eae8334 100644 --- a/arkouda/dataframe.py +++ b/arkouda/dataframe.py @@ -2681,6 +2681,66 @@ def to_pandas(self, datalimit=maxTransferBytes, retain_index=False): else: return pd.DataFrame(data=pandas_data) + def to_markdown(self, mode="wt", index=True, tablefmt="grid", storage_options=None, **kwargs): + r""" + Print Series in Markdown-friendly format. + + Parameters + ---------- + mode : str, optional + Mode in which file is opened, "wt" by default. + index : bool, optional, default True + Add index (row) labels. + tablefmt: str = "grid" + Table format to call from tablulate: + https://pypi.org/project/tabulate/ + storage_options: dict, optional + Extra options that make sense for a particular storage connection, + e.g. host, port, username, password, etc., if using a URL that will be parsed by fsspec, + e.g., starting “s3://”, “gcs://”. + An error will be raised if providing this argument with a non-fsspec URL. + See the fsspec and backend storage implementation docs for the set + of allowed keys and values. + **kwargs + These parameters will be passed to tabulate. + + Note + ---- + This function calls pandas.DataFrame.to_markdown: + https://pandas.pydata.org/pandas-docs/version/1.2.4/reference/api/pandas.DataFrame.to_markdown.html + + Examples + -------- + + >>> import arkouda as ak + >>> ak.connect() + >>> df = ak.DataFrame({"animal_1": ["elk", "pig"], "animal_2": ["dog", "quetzal"]}) + >>> print(df.to_markdown()) + +----+------------+------------+ + | | animal_1 | animal_2 | + +====+============+============+ + | 0 | elk | dog | + +----+------------+------------+ + | 1 | pig | quetzal | + +----+------------+------------+ + + + Suppress the index: + + >>> print(df.to_markdown(index = False)) + +------------+------------+ + | animal_1 | animal_2 | + +============+============+ + | elk | dog | + +------------+------------+ + | pig | quetzal | + +------------+------------+ + + """ + return self.to_pandas().to_markdown( + mode=mode, index=index, tablefmt=tablefmt, storage_options=storage_options, **kwargs + ) + def _prep_data(self, index=False, columns=None): # if no columns are stored, we will save all columns if columns is None: diff --git a/arkouda/series.py b/arkouda/series.py index 91e9fced06..ef246f5950 100644 --- a/arkouda/series.py +++ b/arkouda/series.py @@ -713,6 +713,69 @@ def to_pandas(self) -> pd.Series: else: return pd.Series(val.to_ndarray(), index=idx) + def to_markdown(self, mode="wt", index=True, tablefmt="grid", storage_options=None, **kwargs): + r""" + Print Series in Markdown-friendly format. + + Parameters + ---------- + mode : str, optional + Mode in which file is opened, "wt" by default. + index : bool, optional, default True + Add index (row) labels. + tablefmt: str = "grid" + Table format to call from tablulate: + https://pypi.org/project/tabulate/ + storage_options: dict, optional + Extra options that make sense for a particular storage connection, + e.g. host, port, username, password, etc., if using a URL that will be parsed by fsspec, + e.g., starting “s3://”, “gcs://”. + An error will be raised if providing this argument with a non-fsspec URL. + See the fsspec and backend storage implementation docs for the set + of allowed keys and values. + + **kwargs + These parameters will be passed to tabulate. + + Note + ---- + This function calls pandas.Series.to_markdown: + https://pandas.pydata.org/pandas-docs/version/1.2.4/reference/api/pandas.DataFrame.to_markdown.html + + Examples + -------- + + >>> import arkouda as ak + >>> ak.connect() + >>> s = ak.Series(["elk", "pig", "dog", "quetzal"], name="animal") + >>> print(s.to_markdown()) + | | animal | + |---:|:---------| + | 0 | elk | + | 1 | pig | + | 2 | dog | + | 3 | quetzal | + + Output markdown with a tabulate option. + + >>> print(s.to_markdown(tablefmt="grid")) + +----+----------+ + | | animal | + +====+==========+ + | 0 | elk | + +----+----------+ + | 1 | pig | + +----+----------+ + | 2 | dog | + +----+----------+ + | 3 | quetzal | + +----+----------+ + + """ + return self.to_pandas().to_markdown( + mode=mode, index=index, tablefmt=tablefmt, storage_options=storage_options, **kwargs + ) + @typechecked() def to_list(self) -> list: p = self.to_pandas() diff --git a/tests/dataframe_test.py b/tests/dataframe_test.py index 26c37af627..719f2f7483 100644 --- a/tests/dataframe_test.py +++ b/tests/dataframe_test.py @@ -1398,6 +1398,37 @@ def test_multi_col_merge(self): # assert_frame_equal(sorted_ak.to_pandas()[sorted_columns], # sorted_pd[sorted_columns]) + def test_to_markdown(self): + df = ak.DataFrame({"animal_1": ["elk", "pig"], "animal_2": ["dog", "quetzal"]}) + self.assertEqual( + df.to_markdown(), + "+----+------------+------------+\n" + "| | animal_1 | animal_2 |\n" + "+====+============+============+\n" + "| 0 | elk | dog |\n" + "+----+------------+------------+\n" + "| 1 | pig | quetzal |\n" + "+----+------------+------------+", + ) + + self.assertEqual( + df.to_markdown(index=False), + "+------------+------------+\n" + "| animal_1 | animal_2 |\n" + "+============+============+\n" + "| elk | dog |\n" + "+------------+------------+\n" + "| pig | quetzal |\n" + "+------------+------------+", + ) + + self.assertEqual(df.to_markdown(tablefmt="grid"), df.to_pandas().to_markdown(tablefmt="grid")) + self.assertEqual( + df.to_markdown(tablefmt="grid", index=False), + df.to_pandas().to_markdown(tablefmt="grid", index=False), + ) + self.assertEqual(df.to_markdown(tablefmt="jira"), df.to_pandas().to_markdown(tablefmt="jira")) + def pda_to_str_helper(pda): return ak.array([f"str {i}" for i in pda.to_list()]) diff --git a/tests/series_test.py b/tests/series_test.py index 14f880eec7..3f16007691 100644 --- a/tests/series_test.py +++ b/tests/series_test.py @@ -101,6 +101,58 @@ def test_shape(self): (l,) = s.shape self.assertEqual(l, 3) + def test_to_markdown(self): + s = ak.Series(["elk", "pig", "dog", "quetzal"], name="animal") + self.assertEqual( + s.to_markdown(), + "+----+----------+\n" + "| | animal |\n" + "+====+==========+\n" + "| 0 | elk |\n" + "+----+----------+\n" + "| 1 | pig |\n" + "+----+----------+\n" + "| 2 | dog |\n" + "+----+----------+\n" + "| 3 | quetzal |\n" + "+----+----------+", + ) + self.assertEqual( + s.to_markdown(index=False), + "+----------+\n" + "| animal |\n" + "+==========+\n" + "| elk |\n" + "+----------+\n" + "| pig |\n" + "+----------+\n" + "| dog |\n" + "+----------+\n" + "| quetzal |\n" + "+----------+", + ) + self.assertEqual( + s.to_markdown(tablefmt="grid"), + "+----+----------+\n" + "| | animal |\n" + "+====+==========+\n" + "| 0 | elk |\n" + "+----+----------+\n" + "| 1 | pig |\n" + "+----+----------+\n" + "| 2 | dog |\n" + "+----+----------+\n" + "| 3 | quetzal |\n" + "+----+----------+", + ) + + self.assertEqual(s.to_markdown(tablefmt="grid"), s.to_pandas().to_markdown(tablefmt="grid")) + self.assertEqual( + s.to_markdown(tablefmt="grid", index=False), + s.to_pandas().to_markdown(tablefmt="grid", index=False), + ) + self.assertEqual(s.to_markdown(tablefmt="jira"), s.to_pandas().to_markdown(tablefmt="jira")) + def test_add(self): i = ak.arange(3) v = ak.arange(3, 6, 1) From 022c23038fec599676a1ec9ec7cc83f31d4582c5 Mon Sep 17 00:00:00 2001 From: Amanda Potts Date: Mon, 11 Mar 2024 17:11:33 -0400 Subject: [PATCH 2/2] Minor doc string changes in response to code review. --- arkouda/dataframe.py | 4 ++-- arkouda/series.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arkouda/dataframe.py b/arkouda/dataframe.py index 829eae8334..59b74b27a3 100644 --- a/arkouda/dataframe.py +++ b/arkouda/dataframe.py @@ -2683,7 +2683,7 @@ def to_pandas(self, datalimit=maxTransferBytes, retain_index=False): def to_markdown(self, mode="wt", index=True, tablefmt="grid", storage_options=None, **kwargs): r""" - Print Series in Markdown-friendly format. + Print DataFrame in Markdown-friendly format. Parameters ---------- @@ -2706,7 +2706,7 @@ def to_markdown(self, mode="wt", index=True, tablefmt="grid", storage_options=No Note ---- - This function calls pandas.DataFrame.to_markdown: + This function should only be called on small DataFrames as it calls pandas.DataFrame.to_markdown: https://pandas.pydata.org/pandas-docs/version/1.2.4/reference/api/pandas.DataFrame.to_markdown.html Examples diff --git a/arkouda/series.py b/arkouda/series.py index ef246f5950..19fb26501c 100644 --- a/arkouda/series.py +++ b/arkouda/series.py @@ -739,8 +739,8 @@ def to_markdown(self, mode="wt", index=True, tablefmt="grid", storage_options=No Note ---- - This function calls pandas.Series.to_markdown: - https://pandas.pydata.org/pandas-docs/version/1.2.4/reference/api/pandas.DataFrame.to_markdown.html + This function should only be called on small Series as it calls pandas.Series.to_markdown: + https://pandas.pydata.org/docs/reference/api/pandas.Series.to_markdown.html Examples --------