Skip to content

Commit

Permalink
clean: Assorted cleanups (#958)
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli authored Sep 12, 2024
1 parent 8d4c658 commit aa5efe1
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 45 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/check_tpch_queries.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,4 @@ jobs:
- name: generate-data
run: cd tpch && python generate_data.py
- name: tpch-tests
run: cd tpch #&& pytest tests
run: cd tpch && pytest tests
3 changes: 0 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,6 @@ See the [tutorial](https://narwhals-dev.github.io/narwhals/basics/dataframe/) fo

If you said yes to both, we'd love to hear from you!

**Note**: You might suspect that this is a secret ploy to infiltrate the Polars API everywhere.
Indeed, you may suspect that.

## Sponsors and institutional partners

Narwhals is 100% independent, community-driven, and community-owned.
Expand Down
2 changes: 1 addition & 1 deletion tpch/generate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

con = duckdb.connect(database=":memory:")
con.execute("INSTALL tpch; LOAD tpch")
con.execute("CALL dbgen(sf=1)")
con.execute("CALL dbgen(sf=.5)")
tables = [
"lineitem",
"customer",
Expand Down
65 changes: 25 additions & 40 deletions utils/check_api_reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,30 @@

ret = 0

NAMESPACES = {"dt", "str", "cat", "name"}
EXPR_ONLY_METHODS = {"over"}
SERIES_ONLY_METHODS = {
"to_arrow",
"to_dummies",
"to_pandas",
"to_list",
"to_numpy",
"dtype",
"name",
"shape",
"to_frame",
"is_empty",
"is_sorted",
"value_counts",
"zip_with",
"item",
"scatter",
}

# TODO(Unassigned): make dtypes reference page as well
files = {remove_suffix(i, ".py") for i in os.listdir("narwhals")}
top_level_functions = [
i
for i in nw.__dir__()
if not i[0].isupper()
and i[0] != "_"
and i not in files
and i not in {"annotations", "DataFrame", "LazyFrame", "Series"}
i for i in nw.__dir__() if not i[0].isupper() and i[0] != "_" and i not in files
]
with open("docs/api-reference/narwhals.md") as fd:
content = fd.read()
Expand Down Expand Up @@ -89,11 +104,7 @@
for i in content.splitlines()
if i.startswith(" - ") and not i.startswith(" - _")
]
if (
missing := set(top_level_functions)
.difference(documented)
.difference({"dt", "str", "cat", "name"})
):
if missing := set(top_level_functions).difference(documented).difference(NAMESPACES):
print("Series: not documented") # noqa: T201
print(missing) # noqa: T201
ret = 1
Expand All @@ -112,11 +123,7 @@
for i in content.splitlines()
if i.startswith(" - ")
]
if (
missing := set(top_level_functions)
.difference(documented)
.difference({"cat", "str", "dt", "name"})
):
if missing := set(top_level_functions).difference(documented).difference(NAMESPACES):
print("Expr: not documented") # noqa: T201
print(missing) # noqa: T201
ret = 1
Expand All @@ -139,33 +146,11 @@
if not i[0].isupper() and i[0] != "_"
]

if missing := set(expr).difference(series).difference({"over"}):
if missing := set(expr).difference(series).difference(EXPR_ONLY_METHODS):
print("In expr but not in series") # noqa: T201
print(missing) # noqa: T201
ret = 1
if (
extra := set(series)
.difference(expr)
.difference(
{
"to_arrow",
"to_dummies",
"to_pandas",
"to_list",
"to_numpy",
"dtype",
"name",
"shape",
"to_frame",
"is_empty",
"is_sorted",
"value_counts",
"zip_with",
"item",
"scatter",
}
)
):
if extra := set(series).difference(expr).difference(SERIES_ONLY_METHODS):
print("in series but not in expr") # noqa: T201
print(extra) # noqa: T201
ret = 1
Expand Down

0 comments on commit aa5efe1

Please sign in to comment.