From aa5efe166a8057b7989aa3e0a78043adbe6e4b3d Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Thu, 12 Sep 2024 19:13:47 +0100 Subject: [PATCH] clean: Assorted cleanups (#958) --- .github/workflows/check_tpch_queries.yml | 2 +- README.md | 3 -- tpch/generate_data.py | 2 +- utils/check_api_reference.py | 65 +++++++++--------------- 4 files changed, 27 insertions(+), 45 deletions(-) diff --git a/.github/workflows/check_tpch_queries.yml b/.github/workflows/check_tpch_queries.yml index 619587eae..94cf101d5 100644 --- a/.github/workflows/check_tpch_queries.yml +++ b/.github/workflows/check_tpch_queries.yml @@ -27,4 +27,4 @@ jobs: - name: generate-data run: cd tpch && python generate_data.py - name: tpch-tests - run: cd tpch #&& pytest tests + run: cd tpch && pytest tests diff --git a/README.md b/README.md index 74630fd03..29623920a 100644 --- a/README.md +++ b/README.md @@ -95,9 +95,6 @@ See the [tutorial](https://narwhals-dev.github.io/narwhals/basics/dataframe/) fo If you said yes to both, we'd love to hear from you! -**Note**: You might suspect that this is a secret ploy to infiltrate the Polars API everywhere. -Indeed, you may suspect that. - ## Sponsors and institutional partners Narwhals is 100% independent, community-driven, and community-owned. diff --git a/tpch/generate_data.py b/tpch/generate_data.py index 4d5695dcf..5fd73b1f7 100644 --- a/tpch/generate_data.py +++ b/tpch/generate_data.py @@ -10,7 +10,7 @@ con = duckdb.connect(database=":memory:") con.execute("INSTALL tpch; LOAD tpch") -con.execute("CALL dbgen(sf=1)") +con.execute("CALL dbgen(sf=.5)") tables = [ "lineitem", "customer", diff --git a/utils/check_api_reference.py b/utils/check_api_reference.py index ec599def5..1bf1f086e 100644 --- a/utils/check_api_reference.py +++ b/utils/check_api_reference.py @@ -9,15 +9,30 @@ ret = 0 +NAMESPACES = {"dt", "str", "cat", "name"} +EXPR_ONLY_METHODS = {"over"} +SERIES_ONLY_METHODS = { + "to_arrow", + "to_dummies", + "to_pandas", + "to_list", + "to_numpy", + "dtype", + "name", + "shape", + "to_frame", + "is_empty", + "is_sorted", + "value_counts", + "zip_with", + "item", + "scatter", +} + # TODO(Unassigned): make dtypes reference page as well files = {remove_suffix(i, ".py") for i in os.listdir("narwhals")} top_level_functions = [ - i - for i in nw.__dir__() - if not i[0].isupper() - and i[0] != "_" - and i not in files - and i not in {"annotations", "DataFrame", "LazyFrame", "Series"} + i for i in nw.__dir__() if not i[0].isupper() and i[0] != "_" and i not in files ] with open("docs/api-reference/narwhals.md") as fd: content = fd.read() @@ -89,11 +104,7 @@ for i in content.splitlines() if i.startswith(" - ") and not i.startswith(" - _") ] -if ( - missing := set(top_level_functions) - .difference(documented) - .difference({"dt", "str", "cat", "name"}) -): +if missing := set(top_level_functions).difference(documented).difference(NAMESPACES): print("Series: not documented") # noqa: T201 print(missing) # noqa: T201 ret = 1 @@ -112,11 +123,7 @@ for i in content.splitlines() if i.startswith(" - ") ] -if ( - missing := set(top_level_functions) - .difference(documented) - .difference({"cat", "str", "dt", "name"}) -): +if missing := set(top_level_functions).difference(documented).difference(NAMESPACES): print("Expr: not documented") # noqa: T201 print(missing) # noqa: T201 ret = 1 @@ -139,33 +146,11 @@ if not i[0].isupper() and i[0] != "_" ] -if missing := set(expr).difference(series).difference({"over"}): +if missing := set(expr).difference(series).difference(EXPR_ONLY_METHODS): print("In expr but not in series") # noqa: T201 print(missing) # noqa: T201 ret = 1 -if ( - extra := set(series) - .difference(expr) - .difference( - { - "to_arrow", - "to_dummies", - "to_pandas", - "to_list", - "to_numpy", - "dtype", - "name", - "shape", - "to_frame", - "is_empty", - "is_sorted", - "value_counts", - "zip_with", - "item", - "scatter", - } - ) -): +if extra := set(series).difference(expr).difference(SERIES_ONLY_METHODS): print("in series but not in expr") # noqa: T201 print(extra) # noqa: T201 ret = 1