From 9a8d59e76c20d6ce0acc0e5f616c5fe78e7f403d Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Mon, 14 Oct 2024 11:14:28 +0000 Subject: [PATCH] Update overview docs --- python/cudf_polars/docs/overview.md | 34 +++++++++++++++++++---------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/python/cudf_polars/docs/overview.md b/python/cudf_polars/docs/overview.md index 97803d569df..967fbf95ea0 100644 --- a/python/cudf_polars/docs/overview.md +++ b/python/cudf_polars/docs/overview.md @@ -272,8 +272,10 @@ Suppose we want a rewrite rule (`rewrite`) between expressions function `rewrite` with type `Expr -> (Expr -> T) -> T`: ```python +from cudf_polars.typing import GenericTransformer + @singledispatch -def rewrite(e: Expr, rec: Callable[[Expr], T]) -> T: +def rewrite(e: Expr, rec: GenericTransformer[Expr, T]) -> T: ... ``` @@ -289,10 +291,12 @@ recursion. To this end, we have two utilities in `traversal.py`: - `make_recursive` and - `CachingVisitor`. -Both of these can be wrapped around a transformation function like -`rewrite` to provide a function `Expr -> T`. We can also attach -arbitrary state to the objects they return, which `rewrite` can -inspect. `make_recursive` is very simple, and provides no caching of +These both implement the `GenericTransformer` protocol, and can be +wrapped around a transformation function like `rewrite` to provide a +function `Expr -> T`. They also allow us to attach arbitrary +*immutable* state to our visitor by passing a `state` dictionary. This +dictionary can then be inspected by the concrete transformation +function. `make_recursive` is very simple, and provides no caching of intermediate results (so any DAGs that are visited will be viewed as trees). `CachingVisitor` provides the same interface, but maintains a cache of intermediate results, and reuses them if the same expression @@ -313,15 +317,24 @@ expression with appropriate columns renamed. To start, we define the dispatch function ```python +from collections.abc import Mapping +from functools import singledispatch +from cudf_polars.dsl.traversal import ( + CachingVisitor, make_recursive, reuse_if_unchanged +) +from cudf_polars.dsl.expr import Col, Expr +from cudf_polars.typing import ExprTransformer + + @singledispatch -def _rename(e: Expr, rec: Callable[[Expr], Expr]) -> Expr: +def _rename(e: Expr, rec: ExprTransformer) -> Expr: raise NotImplementedError(f"No handler for {type(e)}") ``` then we register specific handlers, first for columns: ```python @_rename.register -def _(e: Col, rec: Callable[[Expr], Expr]) -> Expr: - mapping = rec.mapping # state set on rec +def _(e: Col, rec: ExprTransformer) -> Expr: + mapping = rec.state["mapping"] # state set on rec if e.name in mapping: # If we have a rename, return a new Col reference # with a new name @@ -341,10 +354,9 @@ Finally we tie everything together with a public function: ```python def rename(e: Expr, mapping: Mapping[str, str]) -> Expr: """Rename column references in an expression.""" - mapper = CachingVisitor(_rename) + mapper = CachingVisitor(_rename, state={"mapping": mapping}) # or - # mapper = make_recursive(_rename) - mapper.mapping = mapping + # mapper = make_recursive(_rename, state={"mapping": mapping}) return mapper(e) ```