From edda56c697b1c848daa658b5660bf0d8199c855c Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Wed, 9 Oct 2024 11:21:21 +0000
Subject: [PATCH] Remove superclass init calls

This is marginally faster, and makes it clearer that the base classes
are abstract.
---
 python/cudf_polars/cudf_polars/dsl/expr.py | 39 +++++------
 python/cudf_polars/cudf_polars/dsl/ir.py   | 80 ++++++++++------------
 2 files changed, 55 insertions(+), 64 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/expr.py b/python/cudf_polars/cudf_polars/dsl/expr.py
index b4434de8c5d..7099a781e4b 100644
--- a/python/cudf_polars/cudf_polars/dsl/expr.py
+++ b/python/cudf_polars/cudf_polars/dsl/expr.py
@@ -79,10 +79,6 @@ class Expr(Node):
     """Data type of the expression."""
     children: tuple[Expr, ...] = ()
 
-    # Constructor must take arguments in order (*_non_child, *children)
-    def __init__(self, dtype: plc.DataType) -> None:
-        self.dtype = dtype
-
     def do_evaluate(
         self,
         df: DataFrame,
@@ -271,7 +267,7 @@ class Literal(Expr):
     children: tuple[()]
 
     def __init__(self, dtype: plc.DataType, value: pa.Scalar[Any]) -> None:
-        super().__init__(dtype)
+        self.dtype = dtype
         assert value.type == plc.interop.to_arrow(dtype)
         self.value = value
 
@@ -298,7 +294,7 @@ class LiteralColumn(Expr):
     children: tuple[()]
 
     def __init__(self, dtype: plc.DataType, value: pl.Series) -> None:
-        super().__init__(dtype)
+        self.dtype = dtype
         data = value.to_arrow()
         self.value = data.cast(dtypes.downcast_arrow_lists(data.type))
 
@@ -355,6 +351,9 @@ def collect_agg(self, *, depth: int) -> AggInfo:
 class Len(Expr):
     children: tuple[()]
 
+    def __init__(self, dtype: plc.DataType) -> None:
+        self.dtype = dtype
+
     def do_evaluate(
         self,
         df: DataFrame,
@@ -392,7 +391,7 @@ def __init__(
         options: tuple[Any, ...],
         *children: Expr,
     ) -> None:
-        super().__init__(dtype)
+        self.dtype = dtype
         self.options = options
         self.name = name
         self.children = children
@@ -631,7 +630,7 @@ def __init__(
         options: tuple[Any, ...],
         *children: Expr,
     ) -> None:
-        super().__init__(dtype)
+        self.dtype = dtype
         self.options = options
         self.name = name
         self.children = children
@@ -887,7 +886,7 @@ def __init__(
         options: tuple[Any, ...],
         *children: Expr,
     ) -> None:
-        super().__init__(dtype)
+        self.dtype = dtype
         self.options = options
         self.name = name
         self.children = children
@@ -992,7 +991,7 @@ class UnaryFunction(Expr):
     def __init__(
         self, dtype: plc.DataType, name: str, options: tuple[Any, ...], *children: Expr
     ) -> None:
-        super().__init__(dtype)
+        self.dtype = dtype
         self.name = name
         self.options = options
         self.children = children
@@ -1231,7 +1230,7 @@ class Sort(Expr):
     def __init__(
         self, dtype: plc.DataType, options: tuple[bool, bool, bool], column: Expr
     ) -> None:
-        super().__init__(dtype)
+        self.dtype = dtype
         self.options = options
         self.children = (column,)
 
@@ -1271,7 +1270,7 @@ def __init__(
         column: Expr,
         *by: Expr,
     ) -> None:
-        super().__init__(dtype)
+        self.dtype = dtype
         self.options = options
         self.children = (column, *by)
 
@@ -1304,7 +1303,7 @@ class Gather(Expr):
     children: tuple[Expr, Expr]
 
     def __init__(self, dtype: plc.DataType, values: Expr, indices: Expr) -> None:
-        super().__init__(dtype)
+        self.dtype = dtype
         self.children = (values, indices)
 
     def do_evaluate(
@@ -1346,7 +1345,7 @@ class Filter(Expr):
     children: tuple[Expr, Expr]
 
     def __init__(self, dtype: plc.DataType, values: Expr, indices: Expr):
-        super().__init__(dtype)
+        self.dtype = dtype
         self.children = (values, indices)
 
     def do_evaluate(
@@ -1373,7 +1372,7 @@ class RollingWindow(Expr):
     children: tuple[Expr]
 
     def __init__(self, dtype: plc.DataType, options: Any, agg: Expr) -> None:
-        super().__init__(dtype)
+        self.dtype = dtype
         self.options = options
         self.children = (agg,)
         raise NotImplementedError("Rolling window not implemented")
@@ -1385,7 +1384,7 @@ class GroupedRollingWindow(Expr):
     children: tuple[Expr, ...]
 
     def __init__(self, dtype: plc.DataType, options: Any, agg: Expr, *by: Expr) -> None:
-        super().__init__(dtype)
+        self.dtype = dtype
         self.options = options
         self.children = (agg, *by)
         raise NotImplementedError("Grouped rolling window not implemented")
@@ -1397,7 +1396,7 @@ class Cast(Expr):
     children: tuple[Expr]
 
     def __init__(self, dtype: plc.DataType, value: Expr) -> None:
-        super().__init__(dtype)
+        self.dtype = dtype
         self.children = (value,)
         if not dtypes.can_cast(value.dtype, self.dtype):
             raise NotImplementedError(
@@ -1431,7 +1430,7 @@ class Agg(Expr):
     def __init__(
         self, dtype: plc.DataType, name: str, options: Any, *children: Expr
     ) -> None:
-        super().__init__(dtype)
+        self.dtype = dtype
         self.name = name
         self.options = options
         self.children = children
@@ -1631,7 +1630,7 @@ class Ternary(Expr):
     def __init__(
         self, dtype: plc.DataType, when: Expr, then: Expr, otherwise: Expr
     ) -> None:
-        super().__init__(dtype)
+        self.dtype = dtype
         self.children = (when, then, otherwise)
 
     def do_evaluate(
@@ -1663,7 +1662,7 @@ def __init__(
         left: Expr,
         right: Expr,
     ) -> None:
-        super().__init__(dtype)
+        self.dtype = dtype
         if plc.traits.is_boolean(self.dtype):
             # For boolean output types, bitand and bitor implement
             # boolean logic, so translate. bitxor also does, but the
diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py
index b2edf0084bc..ee47f154a35 100644
--- a/python/cudf_polars/cudf_polars/dsl/ir.py
+++ b/python/cudf_polars/cudf_polars/dsl/ir.py
@@ -127,12 +127,9 @@ class IR(Node):
 
     __slots__ = ("schema",)
     _non_child: ClassVar[tuple[str, ...]] = ("schema",)
-    children: tuple[IR, ...] = ()
     schema: Schema
     """Mapping from column names to their data types."""
-
-    def __init__(self, schema: Schema) -> None:
-        self.schema = schema
+    children: tuple[IR, ...] = ()
 
     def get_hash(self) -> int:
         """Hash of node, treating schema dictionary."""
@@ -179,7 +176,7 @@ class PythonScan(IR):
     """Filter to apply to the constructed dataframe before returning it."""
 
     def __init__(self, schema: Schema, options: Any, predicate: expr.NamedExpr | None):
-        super().__init__(schema)
+        self.schema = schema
         self.options = options
         self.predicate = predicate
         raise NotImplementedError("PythonScan not implemented")
@@ -230,23 +227,6 @@ class Scan(IR):
     predicate: expr.NamedExpr | None
     """Mask to apply to the read dataframe."""
 
-    def get_hash(self) -> int:
-        """Hash of the node."""
-        return hash(
-            (
-                type(self),
-                self.typ,
-                json.dumps(self.reader_options),
-                json.dumps(self.cloud_options),
-                tuple(self.paths),
-                tuple(self.with_columns) if self.with_columns is not None else None,
-                self.skip_rows,
-                self.n_rows,
-                self.row_index,
-                self.predicate,
-            )
-        )
-
     def __init__(
         self,
         schema: Schema,
@@ -260,7 +240,7 @@ def __init__(
         row_index: tuple[str, int] | None,
         predicate: expr.NamedExpr | None,
     ):
-        super().__init__(schema)
+        self.schema = schema
         self.typ = typ
         self.reader_options = reader_options
         self.cloud_options = cloud_options
@@ -329,6 +309,23 @@ def __init__(
                 "Reading only parquet metadata to produce row index."
             )
 
+    def get_hash(self) -> int:
+        """Hash of the node."""
+        return hash(
+            (
+                type(self),
+                self.typ,
+                json.dumps(self.reader_options),
+                json.dumps(self.cloud_options),
+                tuple(self.paths),
+                tuple(self.with_columns) if self.with_columns is not None else None,
+                self.skip_rows,
+                self.n_rows,
+                self.row_index,
+                self.predicate,
+            )
+        )
+
     def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
         """Evaluate and return a dataframe."""
         with_columns = self.with_columns
@@ -482,14 +479,13 @@ class Cache(IR):
     __slots__ = ("key", "children")
     _non_child = ("schema", "key")
     children: tuple[IR]
-
     key: int
     """The cache key."""
     value: IR
     """The unevaluated node to cache."""
 
     def __init__(self, schema: Schema, key: int, value: IR):
-        super().__init__(schema)
+        self.schema = schema
         self.key = key
         self.children = (value,)
 
@@ -511,7 +507,6 @@ class DataFrameScan(IR):
 
     __slots__ = ("df", "projection", "predicate")
     _non_child = ("schema", "df", "projection", "predicate")
-
     df: Any
     """Polars LazyFrame object."""
     projection: tuple[str, ...] | None
@@ -526,7 +521,7 @@ def __init__(
         projection: Sequence[str] | None,
         predicate: expr.NamedExpr | None,
     ):
-        super().__init__(schema)
+        self.schema = schema
         self.df = df
         self.projection = tuple(projection) if projection is not None else None
         self.predicate = predicate
@@ -562,7 +557,6 @@ class Select(IR):
     __slots__ = ("exprs", "children", "should_broadcast")
     _non_child = ("schema", "exprs", "should_broadcast")
     children: tuple[IR]
-
     df: IR
     """Input dataframe."""
     exprs: tuple[expr.NamedExpr, ...]
@@ -577,7 +571,7 @@ def __init__(
         should_broadcast: bool,  # noqa: FBT001
         df: IR,
     ):
-        super().__init__(schema)
+        self.schema = schema
         self.exprs = tuple(exprs)
         self.should_broadcast = should_broadcast
         self.children = (df,)
@@ -611,7 +605,7 @@ class Reduce(IR):
     def __init__(
         self, schema: Schema, exprs: Sequence[expr.NamedExpr], df: IR
     ):  # pragma: no cover; polars doesn't emit this node yet
-        super().__init__(schema)
+        self.schema = schema
         self.exprs = tuple(exprs)
         self.children = (df,)
 
@@ -649,7 +643,7 @@ def __init__(
         options: Any,
         df: IR,
     ):
-        super().__init__(schema)
+        self.schema = schema
         self.keys = tuple(keys)
         self.agg_requests = tuple(agg_requests)
         self.maintain_order = maintain_order
@@ -819,7 +813,7 @@ def __init__(
         left: IR,
         right: IR,
     ):
-        super().__init__(schema)
+        self.schema = schema
         self.left_on = tuple(left_on)
         self.right_on = tuple(right_on)
         self.options = options
@@ -1026,7 +1020,7 @@ def __init__(
         should_broadcast: bool,  # noqa: FBT001
         df: IR,
     ):
-        super().__init__(schema)
+        self.schema = schema
         self.columns = tuple(columns)
         self.should_broadcast = should_broadcast
         self.children = (df,)
@@ -1066,7 +1060,7 @@ def __init__(
         stable: bool,  # noqa: FBT001
         df: IR,
     ):
-        super().__init__(schema)
+        self.schema = schema
         self.keep = keep
         self.subset = subset
         self.zlice = zlice
@@ -1139,7 +1133,7 @@ def __init__(
         zlice: tuple[int, int] | None,
         df: IR,
     ):
-        super().__init__(schema)
+        self.schema = schema
         self.by = tuple(by)
         self.order = tuple(order)
         self.null_order = tuple(null_order)
@@ -1189,16 +1183,14 @@ class Slice(IR):
 
     __slots__ = ("offset", "length", "children")
     _non_child = ("schema", "offset", "length")
-
-    df: IR
-    """Input."""
+    children: tuple[IR]
     offset: int
     """Start of the slice."""
     length: int
     """Length of the slice."""
 
     def __init__(self, schema: Schema, offset: int, length: int, df: IR):
-        super().__init__(schema)
+        self.schema = schema
         self.offset = offset
         self.length = length
         self.children = (df,)
@@ -1218,7 +1210,7 @@ class Filter(IR):
     children: tuple[IR]
 
     def __init__(self, schema: Schema, mask: expr.NamedExpr, df: IR):
-        super().__init__(schema)
+        self.schema = schema
         self.mask = mask
         self.children = (df,)
 
@@ -1238,7 +1230,7 @@ class Projection(IR):
     children: tuple[IR]
 
     def __init__(self, schema: Schema, df: IR):
-        super().__init__(schema)
+        self.schema = schema
         self.children = (df,)
 
     def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
@@ -1274,7 +1266,7 @@ class MapFunction(IR):
     )
 
     def __init__(self, schema: Schema, name: str, options: Any, df: IR):
-        super().__init__(schema)
+        self.schema = schema
         self.name = name
         self.options = options
         self.children = (df,)
@@ -1380,7 +1372,7 @@ class Union(IR):
     _non_child = ("schema", "zlice")
 
     def __init__(self, schema: Schema, zlice: tuple[int, int] | None, *children: IR):
-        super().__init__(schema)
+        self.schema = schema
         self.zlice = zlice
         self.children = children
         schema = self.children[0].schema
@@ -1403,7 +1395,7 @@ class HConcat(IR):
     _non_child = ("schema",)
 
     def __init__(self, schema: Schema, *children: IR):
-        super().__init__(schema)
+        self.schema = schema
         self.children = children
 
     @staticmethod