From 539799d0c86ae7fa740eaea1926840731b41d60d Mon Sep 17 00:00:00 2001 From: Labanya Mukhopadhyay Date: Fri, 6 Dec 2024 12:46:44 -0800 Subject: [PATCH 01/13] SNOW-1805842: add plotly integ tests and interop doc Signed-off-by: Labanya Mukhopadhyay --- .../modin/supported/interoperability.rst | 46 +++++ .../interoperability/plotly/test_plotly.py | 188 ++++++++++++++++++ 2 files changed, 234 insertions(+) create mode 100644 docs/source/modin/supported/interoperability.rst create mode 100644 tests/integ/modin/interoperability/plotly/test_plotly.py diff --git a/docs/source/modin/supported/interoperability.rst b/docs/source/modin/supported/interoperability.rst new file mode 100644 index 00000000000..5b35cc19cea --- /dev/null +++ b/docs/source/modin/supported/interoperability.rst @@ -0,0 +1,46 @@ +Third Party Library Interoperable APIs +======================== + +The following table is structured as follows: The first column contains the API name. +The second column is a flag for whether or not interoperability is guaranteed with Snowpark pandas. + +.. note:: + ``Y`` stands for yes, i.e., interoperability is guaranteed with this API, and ``N`` stands for no. + +Plotly.express module APIs + ++-------------------------+---------------------------------------------+--------------------------------------------+ +| API name | Interoperable with Snowpark pandas? (Y/N) | Notes for current implementation | ++-------------------------+---------------------------------------------+--------------------------------------------+ +| ``scatterplot`` | Y | | ++-------------------------+---------------------------------------------+--------------------------------------------+ +| ``lineplot`` | Y | | ++-------------------------+---------------------------------------------+--------------------------------------------+ +| ``area`` | Y | | ++-------------------------+---------------------------------------------+--------------------------------------------+ +| ``timeline`` | Y | | ++-------------------------+---------------------------------------------+--------------------------------------------+ +| ``violin`` | Y | | ++-------------------------+---------------------------------------------+--------------------------------------------+ +| ``barplot`` | Y | | ++-------------------------+---------------------------------------------+--------------------------------------------+ +| ``histogram`` | Y | | ++-------------------------+---------------------------------------------+--------------------------------------------+ +| ``pie`` | Y | | ++-------------------------+---------------------------------------------+--------------------------------------------+ +| ``treemap`` | Y | | ++-------------------------+---------------------------------------------+--------------------------------------------+ +| ``sunburst`` | Y | | ++-------------------------+---------------------------------------------+--------------------------------------------+ +| ``icicle`` | Y | | ++-------------------------+---------------------------------------------+--------------------------------------------+ +| ``scatter_matrix`` | Y | | ++-------------------------+---------------------------------------------+--------------------------------------------+ +| ``funnel`` | Y | | ++-------------------------+---------------------------------------------+--------------------------------------------+ +| ``density_heatmap`` | Y | | ++-------------------------+---------------------------------------------+--------------------------------------------+ +| ``boxplot`` | Y | | ++-------------------------+---------------------------------------------+--------------------------------------------+ +| ``imshow`` | Y | | ++-------------------------+---------------------------------------------+--------------------------------------------+ diff --git a/tests/integ/modin/interoperability/plotly/test_plotly.py b/tests/integ/modin/interoperability/plotly/test_plotly.py new file mode 100644 index 00000000000..91c620c435a --- /dev/null +++ b/tests/integ/modin/interoperability/plotly/test_plotly.py @@ -0,0 +1,188 @@ +# +# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. +# + +import modin.pandas as pd +import numpy as np +import plotly.express as px +import pytest + +import snowflake.snowpark.modin.plugin # noqa: F401 +from tests.integ.utils.sql_counter import sql_count_checker, SqlCounter + +# Integration tests for plotly.express module (https://plotly.com/python-api-reference/plotly.express.html). +# To add tests for additional APIs, +# - Call the API with Snowpark pandas and native pandas df input and get the JSON representation with +# `to_plotly_json()`. +# - Assert correctness of the plot produced using `assert_plotly_equal` function defined below. + + +nsamps = 50 + + +def assert_plotly_equal(expect, got): + assert type(expect) == type(got) + if isinstance(expect, dict): + assert expect.keys() == got.keys() + for k in expect.keys(): + assert_plotly_equal(expect[k], got[k]) + elif isinstance(got, list): + assert len(expect) == len(got) + for i in range(len(expect)): + assert_plotly_equal(expect[i], got[i]) + elif isinstance(expect, np.ndarray): + if isinstance(expect[0], float): + np.testing.assert_allclose(expect, got) + else: + assert (expect == got).all() + else: + assert expect == got + + +@pytest.fixture(scope="module") +def df(): + rng = np.random.default_rng(42) + return pd.DataFrame( + { + "x": rng.random(nsamps), + "y": rng.random(nsamps), + "category": rng.integers(0, 5, nsamps), + "category2": rng.integers(0, 5, nsamps), + } + ) + + +@sql_count_checker(query_count=2) +def test_plotly_scatterplot(df): + snow_res = px.scatter(df, x="x", y="y").to_plotly_json() + native_res = px.scatter(df._to_pandas(), x="x", y="y").to_plotly_json() + assert_plotly_equal(snow_res, native_res) + + +@sql_count_checker(query_count=2) +def test_plotly_lineplot(df): + snow_res = px.line(df, x="category", y="y").to_plotly_json() + native_res = px.line(df._to_pandas(), x="category", y="y").to_plotly_json() + assert_plotly_equal(snow_res, native_res) + + +@sql_count_checker(query_count=2) +def test_plotly_area(df): + snow_res = px.area(df, x="category", y="y").to_plotly_json() + native_res = px.area(df._to_pandas(), x="category", y="y").to_plotly_json() + assert_plotly_equal(snow_res, native_res) + + +@sql_count_checker(query_count=2) +def test_plotly_timeline(): + df = pd.DataFrame( + [ + dict(Task="Job A", Start="2009-01-01", Finish="2009-02-28"), + dict(Task="Job B", Start="2009-03-05", Finish="2009-04-15"), + dict(Task="Job C", Start="2009-02-20", Finish="2009-05-30"), + ] + ) + snow_res = px.timeline( + df, x_start="Start", x_end="Finish", y="Task" + ).to_plotly_json() + native_res = px.timeline( + df._to_pandas(), x_start="Start", x_end="Finish", y="Task" + ).to_plotly_json() + assert_plotly_equal(snow_res, native_res) + + +@sql_count_checker(query_count=2) +def test_plotly_violin(df): + snow_res = px.violin(df, y="y").to_plotly_json() + native_res = px.violin(df._to_pandas(), y="y").to_plotly_json() + assert_plotly_equal(snow_res, native_res) + + +@sql_count_checker(query_count=2) +def test_plotly_barplot(df): + snow_res = px.bar(df, x="category", y="y").to_plotly_json() + native_res = px.bar(df._to_pandas(), x="category", y="y").to_plotly_json() + assert_plotly_equal(snow_res, native_res) + + +@sql_count_checker(query_count=2) +def test_plotly_histogram(df): + snow_res = px.histogram(df, x="category").to_plotly_json() + native_res = px.histogram(df._to_pandas(), x="category").to_plotly_json() + assert_plotly_equal(snow_res, native_res) + + +@sql_count_checker(query_count=2) +def test_plotly_pie(df): + snow_res = px.pie(df, values="category", names="category2").to_plotly_json() + native_res = px.pie( + df._to_pandas(), values="category", names="category2" + ).to_plotly_json() + assert_plotly_equal(snow_res, native_res) + + +@sql_count_checker(query_count=2) +def test_plotly_treemap(df): + snow_res = px.treemap(df, names="category", values="y").to_plotly_json() + native_res = px.treemap( + df._to_pandas(), names="category", values="y" + ).to_plotly_json() + assert_plotly_equal(snow_res, native_res) + + +@sql_count_checker(query_count=2) +def test_plotly_sunburst(df): + snow_res = px.sunburst(df, names="category", values="y").to_plotly_json() + native_res = px.sunburst( + df._to_pandas(), names="category", values="y" + ).to_plotly_json() + assert_plotly_equal(snow_res, native_res) + + +@sql_count_checker(query_count=2) +def test_plotly_icicle(df): + snow_res = px.icicle(df, names="category", values="y").to_plotly_json() + native_res = px.icicle( + df._to_pandas(), names="category", values="y" + ).to_plotly_json() + assert_plotly_equal(snow_res, native_res) + + +@sql_count_checker(query_count=2) +def test_plotly_scatter_matrix(df): + snow_res = px.scatter_matrix(df, dimensions=["category"]).to_plotly_json() + native_res = px.scatter_matrix( + df._to_pandas(), dimensions=["category"] + ).to_plotly_json() + assert_plotly_equal(snow_res, native_res) + + +@sql_count_checker(query_count=2) +def test_plotly_funnel(df): + snow_res = px.funnel(df, x="x", y="y").to_plotly_json() + native_res = px.funnel(df._to_pandas(), x="x", y="y").to_plotly_json() + assert_plotly_equal(snow_res, native_res) + + +@sql_count_checker(query_count=2) +def test_plotly_density_heatmap(df): + snow_res = px.density_heatmap(df, x="x", y="y").to_plotly_json() + native_res = px.density_heatmap(df._to_pandas(), x="x", y="y").to_plotly_json() + assert_plotly_equal(snow_res, native_res) + + +@sql_count_checker(query_count=2) +def test_plotly_boxplot(df): + snow_res = px.box(df, x="category", y="y").to_plotly_json() + native_res = px.box(df._to_pandas(), x="category", y="y").to_plotly_json() + assert_plotly_equal(snow_res, native_res) + + +def test_plotly_imshow(df): + df = pd.DataFrame([[1, 3], [4, 5], [7, 2]], columns=["a", "b"]) + with SqlCounter(query_count=4): + snow_res = px.imshow(df, x=df.columns, y=df.index).to_plotly_json() + native_res = px.imshow( + df._to_pandas(), x=df._to_pandas().columns, y=df._to_pandas().index + ).to_plotly_json() + assert_plotly_equal(snow_res, native_res) From 12ee8aab78f24b040d0ecfbc942faa85deabc78a Mon Sep 17 00:00:00 2001 From: Labanya Mukhopadhyay Date: Fri, 6 Dec 2024 12:53:57 -0800 Subject: [PATCH 02/13] update setup for plotly dependency Signed-off-by: Labanya Mukhopadhyay --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index a1be8a8eda7..ed8730940de 100644 --- a/setup.py +++ b/setup.py @@ -200,6 +200,7 @@ def run(self): "scipy", # Snowpark pandas 3rd party library testing "statsmodels", # Snowpark pandas 3rd party library testing "scikit-learn==1.5.2", # Snowpark pandas scikit-learn tests + "plotly", # Snowpark pandas 3rd party library testing ], "localtest": [ "pandas", From 6f27bbd360f7d3be8a9956aa13f9f625373d4db2 Mon Sep 17 00:00:00 2001 From: Labanya Mukhopadhyay Date: Fri, 6 Dec 2024 16:49:37 -0800 Subject: [PATCH 03/13] move and update doc Signed-off-by: Labanya Mukhopadhyay --- docs/source/modin/{supported => }/interoperability.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) rename docs/source/modin/{supported => }/interoperability.rst (95%) diff --git a/docs/source/modin/supported/interoperability.rst b/docs/source/modin/interoperability.rst similarity index 95% rename from docs/source/modin/supported/interoperability.rst rename to docs/source/modin/interoperability.rst index 5b35cc19cea..fd5cad385af 100644 --- a/docs/source/modin/supported/interoperability.rst +++ b/docs/source/modin/interoperability.rst @@ -1,5 +1,8 @@ Third Party Library Interoperable APIs -======================== +========================================= + +Snowpark pandas provides interoperability with third party libraries through the dataframe interchange protocol and +guarantees interoperability for select library APIs as listed below. The following table is structured as follows: The first column contains the API name. The second column is a flag for whether or not interoperability is guaranteed with Snowpark pandas. From 747964e6a43d399c2c19927062b916283ebc497f Mon Sep 17 00:00:00 2001 From: Labanya Mukhopadhyay Date: Tue, 10 Dec 2024 13:26:23 -0800 Subject: [PATCH 04/13] add interop doc to toctree Signed-off-by: Labanya Mukhopadhyay --- docs/source/modin/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/modin/index.rst b/docs/source/modin/index.rst index 99ea3881bd8..68e8ef7c8e9 100644 --- a/docs/source/modin/index.rst +++ b/docs/source/modin/index.rst @@ -19,5 +19,6 @@ For your convenience, here is all the :doc:`Supported APIs ` window groupby resampling + interoperability numpy performance \ No newline at end of file From c176fd22157014fbebb8c53f784cecd7101ae9b7 Mon Sep 17 00:00:00 2001 From: Labanya Mukhopadhyay Date: Thu, 12 Dec 2024 10:42:31 -0800 Subject: [PATCH 05/13] Apply doc change suggestions from code review Co-authored-by: Mahesh Vashishtha Co-authored-by: Hazem Elmeleegy --- docs/source/modin/interoperability.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/source/modin/interoperability.rst b/docs/source/modin/interoperability.rst index fd5cad385af..a163c3260ac 100644 --- a/docs/source/modin/interoperability.rst +++ b/docs/source/modin/interoperability.rst @@ -1,23 +1,23 @@ -Third Party Library Interoperable APIs +Interoperability with third party libraries ========================================= Snowpark pandas provides interoperability with third party libraries through the dataframe interchange protocol and guarantees interoperability for select library APIs as listed below. -The following table is structured as follows: The first column contains the API name. +The following table is structured as follows: The first column contains a method name. The second column is a flag for whether or not interoperability is guaranteed with Snowpark pandas. .. note:: ``Y`` stands for yes, i.e., interoperability is guaranteed with this API, and ``N`` stands for no. -Plotly.express module APIs +Plotly.express module methods +-------------------------+---------------------------------------------+--------------------------------------------+ -| API name | Interoperable with Snowpark pandas? (Y/N) | Notes for current implementation | +| Method name | Interoperable with Snowpark pandas? (Y/N) | Notes for current implementation | +-------------------------+---------------------------------------------+--------------------------------------------+ -| ``scatterplot`` | Y | | +| ``scatter`` | Y | | +-------------------------+---------------------------------------------+--------------------------------------------+ -| ``lineplot`` | Y | | +| ``line`` | Y | | +-------------------------+---------------------------------------------+--------------------------------------------+ | ``area`` | Y | | +-------------------------+---------------------------------------------+--------------------------------------------+ @@ -25,7 +25,7 @@ Plotly.express module APIs +-------------------------+---------------------------------------------+--------------------------------------------+ | ``violin`` | Y | | +-------------------------+---------------------------------------------+--------------------------------------------+ -| ``barplot`` | Y | | +| ``bar`` | Y | | +-------------------------+---------------------------------------------+--------------------------------------------+ | ``histogram`` | Y | | +-------------------------+---------------------------------------------+--------------------------------------------+ From fde180624424048f764a2b1f31910c8046bb0c58 Mon Sep 17 00:00:00 2001 From: Labanya Mukhopadhyay Date: Thu, 12 Dec 2024 11:48:26 -0800 Subject: [PATCH 06/13] cleanup doc and test file Signed-off-by: Labanya Mukhopadhyay --- docs/source/modin/interoperability.rst | 2 +- .../interoperability/plotly/test_plotly.py | 38 +++++++++---------- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/docs/source/modin/interoperability.rst b/docs/source/modin/interoperability.rst index a163c3260ac..1de268e9e10 100644 --- a/docs/source/modin/interoperability.rst +++ b/docs/source/modin/interoperability.rst @@ -15,7 +15,7 @@ Plotly.express module methods +-------------------------+---------------------------------------------+--------------------------------------------+ | Method name | Interoperable with Snowpark pandas? (Y/N) | Notes for current implementation | +-------------------------+---------------------------------------------+--------------------------------------------+ -| ``scatter`` | Y | | +| ``scatter`` | Y | | +-------------------------+---------------------------------------------+--------------------------------------------+ | ``line`` | Y | | +-------------------------+---------------------------------------------+--------------------------------------------+ diff --git a/tests/integ/modin/interoperability/plotly/test_plotly.py b/tests/integ/modin/interoperability/plotly/test_plotly.py index 91c620c435a..74d29873e24 100644 --- a/tests/integ/modin/interoperability/plotly/test_plotly.py +++ b/tests/integ/modin/interoperability/plotly/test_plotly.py @@ -17,9 +17,6 @@ # - Assert correctness of the plot produced using `assert_plotly_equal` function defined below. -nsamps = 50 - - def assert_plotly_equal(expect, got): assert type(expect) == type(got) if isinstance(expect, dict): @@ -41,7 +38,8 @@ def assert_plotly_equal(expect, got): @pytest.fixture(scope="module") def df(): - rng = np.random.default_rng(42) + nsamps = 50 + rng = np.random.default_rng(seed=42) return pd.DataFrame( { "x": rng.random(nsamps), @@ -53,28 +51,28 @@ def df(): @sql_count_checker(query_count=2) -def test_plotly_scatterplot(df): +def test_scatter(df): snow_res = px.scatter(df, x="x", y="y").to_plotly_json() native_res = px.scatter(df._to_pandas(), x="x", y="y").to_plotly_json() assert_plotly_equal(snow_res, native_res) @sql_count_checker(query_count=2) -def test_plotly_lineplot(df): +def test_line(df): snow_res = px.line(df, x="category", y="y").to_plotly_json() native_res = px.line(df._to_pandas(), x="category", y="y").to_plotly_json() assert_plotly_equal(snow_res, native_res) @sql_count_checker(query_count=2) -def test_plotly_area(df): +def test_area(df): snow_res = px.area(df, x="category", y="y").to_plotly_json() native_res = px.area(df._to_pandas(), x="category", y="y").to_plotly_json() assert_plotly_equal(snow_res, native_res) @sql_count_checker(query_count=2) -def test_plotly_timeline(): +def test_timeline(): df = pd.DataFrame( [ dict(Task="Job A", Start="2009-01-01", Finish="2009-02-28"), @@ -92,28 +90,28 @@ def test_plotly_timeline(): @sql_count_checker(query_count=2) -def test_plotly_violin(df): +def test_violin(df): snow_res = px.violin(df, y="y").to_plotly_json() native_res = px.violin(df._to_pandas(), y="y").to_plotly_json() assert_plotly_equal(snow_res, native_res) @sql_count_checker(query_count=2) -def test_plotly_barplot(df): +def test_bar(df): snow_res = px.bar(df, x="category", y="y").to_plotly_json() native_res = px.bar(df._to_pandas(), x="category", y="y").to_plotly_json() assert_plotly_equal(snow_res, native_res) @sql_count_checker(query_count=2) -def test_plotly_histogram(df): +def test_histogram(df): snow_res = px.histogram(df, x="category").to_plotly_json() native_res = px.histogram(df._to_pandas(), x="category").to_plotly_json() assert_plotly_equal(snow_res, native_res) @sql_count_checker(query_count=2) -def test_plotly_pie(df): +def test_pie(df): snow_res = px.pie(df, values="category", names="category2").to_plotly_json() native_res = px.pie( df._to_pandas(), values="category", names="category2" @@ -122,7 +120,7 @@ def test_plotly_pie(df): @sql_count_checker(query_count=2) -def test_plotly_treemap(df): +def test_treemap(df): snow_res = px.treemap(df, names="category", values="y").to_plotly_json() native_res = px.treemap( df._to_pandas(), names="category", values="y" @@ -131,7 +129,7 @@ def test_plotly_treemap(df): @sql_count_checker(query_count=2) -def test_plotly_sunburst(df): +def test_sunburst(df): snow_res = px.sunburst(df, names="category", values="y").to_plotly_json() native_res = px.sunburst( df._to_pandas(), names="category", values="y" @@ -140,7 +138,7 @@ def test_plotly_sunburst(df): @sql_count_checker(query_count=2) -def test_plotly_icicle(df): +def test_icicle(df): snow_res = px.icicle(df, names="category", values="y").to_plotly_json() native_res = px.icicle( df._to_pandas(), names="category", values="y" @@ -149,7 +147,7 @@ def test_plotly_icicle(df): @sql_count_checker(query_count=2) -def test_plotly_scatter_matrix(df): +def test_scatter_matrix(df): snow_res = px.scatter_matrix(df, dimensions=["category"]).to_plotly_json() native_res = px.scatter_matrix( df._to_pandas(), dimensions=["category"] @@ -158,27 +156,27 @@ def test_plotly_scatter_matrix(df): @sql_count_checker(query_count=2) -def test_plotly_funnel(df): +def test_funnel(df): snow_res = px.funnel(df, x="x", y="y").to_plotly_json() native_res = px.funnel(df._to_pandas(), x="x", y="y").to_plotly_json() assert_plotly_equal(snow_res, native_res) @sql_count_checker(query_count=2) -def test_plotly_density_heatmap(df): +def test_density_heatmap(df): snow_res = px.density_heatmap(df, x="x", y="y").to_plotly_json() native_res = px.density_heatmap(df._to_pandas(), x="x", y="y").to_plotly_json() assert_plotly_equal(snow_res, native_res) @sql_count_checker(query_count=2) -def test_plotly_boxplot(df): +def test_box(df): snow_res = px.box(df, x="category", y="y").to_plotly_json() native_res = px.box(df._to_pandas(), x="category", y="y").to_plotly_json() assert_plotly_equal(snow_res, native_res) -def test_plotly_imshow(df): +def test_imshow(df): df = pd.DataFrame([[1, 3], [4, 5], [7, 2]], columns=["a", "b"]) with SqlCounter(query_count=4): snow_res = px.imshow(df, x=df.columns, y=df.index).to_plotly_json() From ee54d7b6206da9b06bdd8eb5ffb6884a2f5a5565 Mon Sep 17 00:00:00 2001 From: Labanya Mukhopadhyay Date: Fri, 13 Dec 2024 01:04:10 -0800 Subject: [PATCH 07/13] update doc and test changes from review Signed-off-by: Labanya Mukhopadhyay --- docs/source/modin/interoperability.rst | 14 +- .../interoperability/plotly/test_plotly.py | 295 ++++++++++-------- 2 files changed, 171 insertions(+), 138 deletions(-) diff --git a/docs/source/modin/interoperability.rst b/docs/source/modin/interoperability.rst index 1de268e9e10..6dc356bf31a 100644 --- a/docs/source/modin/interoperability.rst +++ b/docs/source/modin/interoperability.rst @@ -1,14 +1,20 @@ Interoperability with third party libraries ========================================= -Snowpark pandas provides interoperability with third party libraries through the dataframe interchange protocol and -guarantees interoperability for select library APIs as listed below. +Many third party libraries are interoperable with pandas, for example by accepting pandas dataframes objects as function +inputs. Here we have a non-exhaustive list of third party library use cases with pandas and note whether each method +works in Snowpark pandas as well. + +Snowpark pandas supports the `dataframe interchange protocol `_, which +some libraries use to interoperate with Snowpark pandas to the same level of support as pandas. The following table is structured as follows: The first column contains a method name. -The second column is a flag for whether or not interoperability is guaranteed with Snowpark pandas. +The second column is a flag for whether or not interoperability is guaranteed with Snowpark pandas. For each of these +methods, we validate that passing in a Snowpark pandas dataframe as the dataframe input parameter behaves equivalently +to passing in a pandas dataframe. .. note:: - ``Y`` stands for yes, i.e., interoperability is guaranteed with this API, and ``N`` stands for no. + ``Y`` stands for yes, i.e., interoperability is guaranteed with this method, and ``N`` stands for no. Plotly.express module methods diff --git a/tests/integ/modin/interoperability/plotly/test_plotly.py b/tests/integ/modin/interoperability/plotly/test_plotly.py index 74d29873e24..24006dc7355 100644 --- a/tests/integ/modin/interoperability/plotly/test_plotly.py +++ b/tests/integ/modin/interoperability/plotly/test_plotly.py @@ -6,18 +6,24 @@ import numpy as np import plotly.express as px import pytest +import pandas as native_pd import snowflake.snowpark.modin.plugin # noqa: F401 -from tests.integ.utils.sql_counter import sql_count_checker, SqlCounter +from tests.integ.utils.sql_counter import sql_count_checker +from tests.integ.modin.utils import eval_snowpark_pandas_result # Integration tests for plotly.express module (https://plotly.com/python-api-reference/plotly.express.html). # To add tests for additional APIs, -# - Call the API with Snowpark pandas and native pandas df input and get the JSON representation with +# - Call the method with Snowpark pandas and native pandas df input and get the JSON representation with # `to_plotly_json()`. # - Assert correctness of the plot produced using `assert_plotly_equal` function defined below. def assert_plotly_equal(expect, got): + # referenced from cudf plotly integration test + # https://github.com/rapidsai/cudf/blob/main/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/ + # test_plotly.py#L10 + assert type(expect) == type(got) if isinstance(expect, dict): assert expect.keys() == got.keys() @@ -36,151 +42,172 @@ def assert_plotly_equal(expect, got): assert expect == got -@pytest.fixture(scope="module") -def df(): +@pytest.fixture() +def test_dfs(): nsamps = 50 rng = np.random.default_rng(seed=42) - return pd.DataFrame( - { - "x": rng.random(nsamps), - "y": rng.random(nsamps), - "category": rng.integers(0, 5, nsamps), - "category2": rng.integers(0, 5, nsamps), - } + data = { + "x": rng.random(nsamps), + "y": rng.random(nsamps), + "category": rng.integers(0, 5, nsamps), + "category2": rng.integers(0, 5, nsamps), + } + snow_df = pd.DataFrame(data) + native_df = native_pd.DataFrame(data) + return snow_df, native_df + + +@sql_count_checker(query_count=1) +def test_scatter(test_dfs): + # test_dfs = dfs() + eval_snowpark_pandas_result( + *test_dfs, + lambda df: px.scatter(df, x="x", y="y").to_plotly_json(), + comparator=assert_plotly_equal ) -@sql_count_checker(query_count=2) -def test_scatter(df): - snow_res = px.scatter(df, x="x", y="y").to_plotly_json() - native_res = px.scatter(df._to_pandas(), x="x", y="y").to_plotly_json() - assert_plotly_equal(snow_res, native_res) - - -@sql_count_checker(query_count=2) -def test_line(df): - snow_res = px.line(df, x="category", y="y").to_plotly_json() - native_res = px.line(df._to_pandas(), x="category", y="y").to_plotly_json() - assert_plotly_equal(snow_res, native_res) +@sql_count_checker(query_count=1) +def test_line(test_dfs): + eval_snowpark_pandas_result( + *test_dfs, + lambda df: px.line(df, x="category", y="y").to_plotly_json(), + comparator=assert_plotly_equal + ) -@sql_count_checker(query_count=2) -def test_area(df): - snow_res = px.area(df, x="category", y="y").to_plotly_json() - native_res = px.area(df._to_pandas(), x="category", y="y").to_plotly_json() - assert_plotly_equal(snow_res, native_res) +@sql_count_checker(query_count=1) +def test_area(test_dfs): + eval_snowpark_pandas_result( + *test_dfs, + lambda df: px.area(df, x="category", y="y").to_plotly_json(), + comparator=assert_plotly_equal + ) -@sql_count_checker(query_count=2) +@sql_count_checker(query_count=1) def test_timeline(): - df = pd.DataFrame( + native_df = native_pd.DataFrame( [ dict(Task="Job A", Start="2009-01-01", Finish="2009-02-28"), dict(Task="Job B", Start="2009-03-05", Finish="2009-04-15"), dict(Task="Job C", Start="2009-02-20", Finish="2009-05-30"), ] ) - snow_res = px.timeline( - df, x_start="Start", x_end="Finish", y="Task" - ).to_plotly_json() - native_res = px.timeline( - df._to_pandas(), x_start="Start", x_end="Finish", y="Task" - ).to_plotly_json() - assert_plotly_equal(snow_res, native_res) - - -@sql_count_checker(query_count=2) -def test_violin(df): - snow_res = px.violin(df, y="y").to_plotly_json() - native_res = px.violin(df._to_pandas(), y="y").to_plotly_json() - assert_plotly_equal(snow_res, native_res) - - -@sql_count_checker(query_count=2) -def test_bar(df): - snow_res = px.bar(df, x="category", y="y").to_plotly_json() - native_res = px.bar(df._to_pandas(), x="category", y="y").to_plotly_json() - assert_plotly_equal(snow_res, native_res) - - -@sql_count_checker(query_count=2) -def test_histogram(df): - snow_res = px.histogram(df, x="category").to_plotly_json() - native_res = px.histogram(df._to_pandas(), x="category").to_plotly_json() - assert_plotly_equal(snow_res, native_res) - - -@sql_count_checker(query_count=2) -def test_pie(df): - snow_res = px.pie(df, values="category", names="category2").to_plotly_json() - native_res = px.pie( - df._to_pandas(), values="category", names="category2" - ).to_plotly_json() - assert_plotly_equal(snow_res, native_res) - - -@sql_count_checker(query_count=2) -def test_treemap(df): - snow_res = px.treemap(df, names="category", values="y").to_plotly_json() - native_res = px.treemap( - df._to_pandas(), names="category", values="y" - ).to_plotly_json() - assert_plotly_equal(snow_res, native_res) - - -@sql_count_checker(query_count=2) -def test_sunburst(df): - snow_res = px.sunburst(df, names="category", values="y").to_plotly_json() - native_res = px.sunburst( - df._to_pandas(), names="category", values="y" - ).to_plotly_json() - assert_plotly_equal(snow_res, native_res) - - -@sql_count_checker(query_count=2) -def test_icicle(df): - snow_res = px.icicle(df, names="category", values="y").to_plotly_json() - native_res = px.icicle( - df._to_pandas(), names="category", values="y" - ).to_plotly_json() - assert_plotly_equal(snow_res, native_res) - - -@sql_count_checker(query_count=2) -def test_scatter_matrix(df): - snow_res = px.scatter_matrix(df, dimensions=["category"]).to_plotly_json() - native_res = px.scatter_matrix( - df._to_pandas(), dimensions=["category"] - ).to_plotly_json() - assert_plotly_equal(snow_res, native_res) - - -@sql_count_checker(query_count=2) -def test_funnel(df): - snow_res = px.funnel(df, x="x", y="y").to_plotly_json() - native_res = px.funnel(df._to_pandas(), x="x", y="y").to_plotly_json() - assert_plotly_equal(snow_res, native_res) - - -@sql_count_checker(query_count=2) -def test_density_heatmap(df): - snow_res = px.density_heatmap(df, x="x", y="y").to_plotly_json() - native_res = px.density_heatmap(df._to_pandas(), x="x", y="y").to_plotly_json() - assert_plotly_equal(snow_res, native_res) - - -@sql_count_checker(query_count=2) -def test_box(df): - snow_res = px.box(df, x="category", y="y").to_plotly_json() - native_res = px.box(df._to_pandas(), x="category", y="y").to_plotly_json() - assert_plotly_equal(snow_res, native_res) - - -def test_imshow(df): - df = pd.DataFrame([[1, 3], [4, 5], [7, 2]], columns=["a", "b"]) - with SqlCounter(query_count=4): - snow_res = px.imshow(df, x=df.columns, y=df.index).to_plotly_json() - native_res = px.imshow( - df._to_pandas(), x=df._to_pandas().columns, y=df._to_pandas().index - ).to_plotly_json() - assert_plotly_equal(snow_res, native_res) + snow_df = pd.DataFrame(native_df) + eval_snowpark_pandas_result( + snow_df, + native_df, + lambda df: px.timeline( + df, x_start="Start", x_end="Finish", y="Task" + ).to_plotly_json(), + comparator=assert_plotly_equal, + ) + + +@sql_count_checker(query_count=1) +def test_violin(test_dfs): + eval_snowpark_pandas_result( + *test_dfs, + lambda df: px.violin(df, y="y").to_plotly_json(), + comparator=assert_plotly_equal + ) + + +@sql_count_checker(query_count=1) +def test_bar(test_dfs): + eval_snowpark_pandas_result( + *test_dfs, + lambda df: px.bar(df, x="category", y="y").to_plotly_json(), + comparator=assert_plotly_equal + ) + + +@sql_count_checker(query_count=1) +def test_histogram(test_dfs): + eval_snowpark_pandas_result( + *test_dfs, + lambda df: px.histogram(df, x="category").to_plotly_json(), + comparator=assert_plotly_equal + ) + + +@sql_count_checker(query_count=1) +def test_pie(test_dfs): + eval_snowpark_pandas_result( + *test_dfs, + lambda df: px.pie(df, values="category", names="category2").to_plotly_json(), + comparator=assert_plotly_equal + ) + + +@sql_count_checker(query_count=1) +def test_treemap(test_dfs): + eval_snowpark_pandas_result( + *test_dfs, + lambda df: px.treemap(df, names="category", values="y").to_plotly_json(), + comparator=assert_plotly_equal + ) + + +@sql_count_checker(query_count=1) +def test_sunburst(test_dfs): + eval_snowpark_pandas_result( + *test_dfs, + lambda df: px.sunburst(df, names="category", values="y").to_plotly_json(), + comparator=assert_plotly_equal + ) + + +@sql_count_checker(query_count=1) +def test_icicle(test_dfs): + eval_snowpark_pandas_result( + *test_dfs, + lambda df: px.icicle(df, names="category", values="y").to_plotly_json(), + comparator=assert_plotly_equal + ) + + +@sql_count_checker(query_count=1) +def test_scatter_matrix(test_dfs): + eval_snowpark_pandas_result( + *test_dfs, + lambda df: px.scatter_matrix(df, dimensions=["category"]).to_plotly_json(), + comparator=assert_plotly_equal + ) + + +@sql_count_checker(query_count=1) +def test_funnel(test_dfs): + eval_snowpark_pandas_result( + *test_dfs, + lambda df: px.funnel(df, x="x", y="y").to_plotly_json(), + comparator=assert_plotly_equal + ) + + +@sql_count_checker(query_count=1) +def test_density_heatmap(test_dfs): + eval_snowpark_pandas_result( + *test_dfs, + lambda df: px.density_heatmap(df, x="x", y="y").to_plotly_json(), + comparator=assert_plotly_equal + ) + + +@sql_count_checker(query_count=1) +def test_box(test_dfs): + eval_snowpark_pandas_result( + *test_dfs, + lambda df: px.box(df, x="category", y="y").to_plotly_json(), + comparator=assert_plotly_equal + ) + + +@sql_count_checker(query_count=4) +def test_imshow(test_dfs): + eval_snowpark_pandas_result( + *test_dfs, + lambda df: px.imshow(df, x=df.columns, y=df.index).to_plotly_json(), + comparator=assert_plotly_equal + ) From 85d31a5fbbcc4c3c09145280d2268c100fd11eac Mon Sep 17 00:00:00 2001 From: Labanya Mukhopadhyay Date: Fri, 13 Dec 2024 01:13:51 -0800 Subject: [PATCH 08/13] fix doc title Signed-off-by: Labanya Mukhopadhyay --- docs/source/modin/interoperability.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/modin/interoperability.rst b/docs/source/modin/interoperability.rst index 6dc356bf31a..f7d52d2b49a 100644 --- a/docs/source/modin/interoperability.rst +++ b/docs/source/modin/interoperability.rst @@ -1,5 +1,5 @@ Interoperability with third party libraries -========================================= +============================================= Many third party libraries are interoperable with pandas, for example by accepting pandas dataframes objects as function inputs. Here we have a non-exhaustive list of third party library use cases with pandas and note whether each method From 4c71489e2e122caa630ad64b44bcb5d0441ff80d Mon Sep 17 00:00:00 2001 From: Labanya Mukhopadhyay Date: Fri, 13 Dec 2024 08:57:27 -0800 Subject: [PATCH 09/13] limit plotly version Signed-off-by: Labanya Mukhopadhyay --- docs/source/modin/interoperability.rst | 3 +++ setup.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/source/modin/interoperability.rst b/docs/source/modin/interoperability.rst index f7d52d2b49a..1c0e846fadf 100644 --- a/docs/source/modin/interoperability.rst +++ b/docs/source/modin/interoperability.rst @@ -18,6 +18,9 @@ to passing in a pandas dataframe. Plotly.express module methods +.. note:: + Currently only plotly versions <6.0.0 are supported through the dataframe interchange protocol. + +-------------------------+---------------------------------------------+--------------------------------------------+ | Method name | Interoperable with Snowpark pandas? (Y/N) | Notes for current implementation | +-------------------------+---------------------------------------------+--------------------------------------------+ diff --git a/setup.py b/setup.py index ed8730940de..77d5eafe53b 100644 --- a/setup.py +++ b/setup.py @@ -200,7 +200,7 @@ def run(self): "scipy", # Snowpark pandas 3rd party library testing "statsmodels", # Snowpark pandas 3rd party library testing "scikit-learn==1.5.2", # Snowpark pandas scikit-learn tests - "plotly", # Snowpark pandas 3rd party library testing + "plotly<6.0.0", # Snowpark pandas 3rd party library testing ], "localtest": [ "pandas", From f56ddc56540d7267b8c6ec9ba53002656ce593de Mon Sep 17 00:00:00 2001 From: Labanya Mukhopadhyay Date: Fri, 13 Dec 2024 13:13:17 -0800 Subject: [PATCH 10/13] review changes Signed-off-by: Labanya Mukhopadhyay --- setup.py | 1 + tests/integ/modin/interoperability/plotly/test_plotly.py | 4 +--- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 77d5eafe53b..f313fec54ed 100644 --- a/setup.py +++ b/setup.py @@ -200,6 +200,7 @@ def run(self): "scipy", # Snowpark pandas 3rd party library testing "statsmodels", # Snowpark pandas 3rd party library testing "scikit-learn==1.5.2", # Snowpark pandas scikit-learn tests + # TODO: SNOW-1855330 - plotly version restricted to <6.0.0.0 due to unsupported `from_dict` "plotly<6.0.0", # Snowpark pandas 3rd party library testing ], "localtest": [ diff --git a/tests/integ/modin/interoperability/plotly/test_plotly.py b/tests/integ/modin/interoperability/plotly/test_plotly.py index 24006dc7355..2dc9ae59d55 100644 --- a/tests/integ/modin/interoperability/plotly/test_plotly.py +++ b/tests/integ/modin/interoperability/plotly/test_plotly.py @@ -21,8 +21,7 @@ def assert_plotly_equal(expect, got): # referenced from cudf plotly integration test - # https://github.com/rapidsai/cudf/blob/main/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/ - # test_plotly.py#L10 + # https://github.com/rapidsai/cudf/blob/main/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_plotly.py#L10 assert type(expect) == type(got) if isinstance(expect, dict): @@ -59,7 +58,6 @@ def test_dfs(): @sql_count_checker(query_count=1) def test_scatter(test_dfs): - # test_dfs = dfs() eval_snowpark_pandas_result( *test_dfs, lambda df: px.scatter(df, x="x", y="y").to_plotly_json(), From 68fb6a53f1f16ebd0cd5d378f615fcf1c9480119 Mon Sep 17 00:00:00 2001 From: Labanya Mukhopadhyay Date: Fri, 13 Dec 2024 13:14:40 -0800 Subject: [PATCH 11/13] change setup comment Signed-off-by: Labanya Mukhopadhyay --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index f313fec54ed..3705145708d 100644 --- a/setup.py +++ b/setup.py @@ -200,7 +200,7 @@ def run(self): "scipy", # Snowpark pandas 3rd party library testing "statsmodels", # Snowpark pandas 3rd party library testing "scikit-learn==1.5.2", # Snowpark pandas scikit-learn tests - # TODO: SNOW-1855330 - plotly version restricted to <6.0.0.0 due to unsupported `from_dict` + # plotly version restricted to <6.0.0.0 due to unsupported `from_dict` (SNOW-1855330) "plotly<6.0.0", # Snowpark pandas 3rd party library testing ], "localtest": [ From afdacecc40bffa765a69bc50cf36ad1751022cab Mon Sep 17 00:00:00 2001 From: Mahesh Vashishtha Date: Fri, 13 Dec 2024 14:07:28 -0800 Subject: [PATCH 12/13] Update setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 3705145708d..90b0212d9f0 100644 --- a/setup.py +++ b/setup.py @@ -200,7 +200,7 @@ def run(self): "scipy", # Snowpark pandas 3rd party library testing "statsmodels", # Snowpark pandas 3rd party library testing "scikit-learn==1.5.2", # Snowpark pandas scikit-learn tests - # plotly version restricted to <6.0.0.0 due to unsupported `from_dict` (SNOW-1855330) + # plotly version restricted to <6.0.0 due to unsupported `from_dict` (SNOW-1855330) "plotly<6.0.0", # Snowpark pandas 3rd party library testing ], "localtest": [ From 0dcd3e461b8b4c8cc19941b0571d1eaf8395f8a4 Mon Sep 17 00:00:00 2001 From: Labanya Mukhopadhyay Date: Fri, 13 Dec 2024 15:14:30 -0800 Subject: [PATCH 13/13] update setup comment Signed-off-by: Labanya Mukhopadhyay --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 90b0212d9f0..12e67ab3393 100644 --- a/setup.py +++ b/setup.py @@ -200,7 +200,7 @@ def run(self): "scipy", # Snowpark pandas 3rd party library testing "statsmodels", # Snowpark pandas 3rd party library testing "scikit-learn==1.5.2", # Snowpark pandas scikit-learn tests - # plotly version restricted to <6.0.0 due to unsupported `from_dict` (SNOW-1855330) + # plotly version restricted due to foreseen change in query counts in version 6.0.0+ "plotly<6.0.0", # Snowpark pandas 3rd party library testing ], "localtest": [