Skip to content

Commit

Permalink
Add substrait.proto convenience module and document it
Browse files Browse the repository at this point in the history
  • Loading branch information
amol- committed Mar 13, 2024
1 parent dc42f18 commit 2c430ef
Show file tree
Hide file tree
Showing 2 changed files with 155 additions and 14 deletions.
131 changes: 117 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,118 @@ This project is not an execution engine for Substrait Plans.
This is an experimental package that is still under development.

# Example
At the moment, this project contains only generated Python classes for the Substrait protobuf messages. Let's use an existing Substrait producer, [Ibis](https://ibis-project.org), to provide an example using Python Substrait as the consumer.

## Produce a Substrait Plan
The ``substrait.proto`` module provides access to the classes
that represent a substrait Plan, thus allowing to create new plans.

Here is an example plan equivalent to ``SELECT first_name FROM person``
where ``people`` table has ``first_name`` and ``surname`` columns of type ``String``

```
>>> from substrait import proto
>>> plan = proto.Plan(
... relations=[
... proto.PlanRel(
... root=proto.RelRoot(
... names=["first_name"],
... input=proto.Rel(
... read=proto.ReadRel(
... named_table=proto.ReadRel.NamedTable(names=["people"]),
... base_schema=proto.NamedStruct(
... names=["first_name", "surname"],
... struct=proto.Type.Struct(
... types=[
... proto.Type(string=proto.Type.String(nullability=proto.Type.Nullability.NULLABILITY_REQUIRED)),
... proto.Type(string=proto.Type.String(nullability=proto.Type.Nullability.NULLABILITY_REQUIRED))
... ] # /types
... ) # /struct
... ) # /base_schema
... ) # /read
... ) # /input
... ) # /root
... ) # /PlanRel
... ] # /relations
... )
>>> print(plan)
relations {
root {
input {
read {
base_schema {
names: "first_name"
names: "surname"
struct {
types {
string {
nullability: NULLABILITY_REQUIRED
}
}
types {
string {
nullability: NULLABILITY_REQUIRED
}
}
}
}
named_table {
names: "people"
}
}
}
names: "first_name"
}
}
>>> serialized_plan = p.SerializeToString()
>>> serialized_plan
b'\x1aA\x12?\n1\n/\x12#\n\nfirst_name\n\x07surname\x12\x0c\n\x04b\x02\x10\x02\n\x04b\x02\x10\x02:\x08\n\x06people\x12\nfirst_name'
```

## Consume the Substrait Plan
The same plan we generated in the previous example,
can be loaded back from its binary representation
using the ``Plan.ParseFromString`` method:

```
>>> from substrait.proto import Plan
>>> p = Plan()
>>> p.ParseFromString(serialized_plan)
67
>>> p
relations {
root {
input {
read {
base_schema {
names: "first_name"
names: "surname"
struct {
types {
string {
nullability: NULLABILITY_REQUIRED
}
}
types {
string {
nullability: NULLABILITY_REQUIRED
}
}
}
}
named_table {
names: "people"
}
}
}
names: "first_name"
}
}
```

## Produce a Substrait Plan with Ibis
Let's use an existing Substrait producer, [Ibis](https://ibis-project.org),
to provide an example using Python Substrait as the consumer.

```
In [1]: import ibis
Expand All @@ -54,21 +164,14 @@ In [5]: compiler = SubstraitCompiler()
In [6]: protobuf_msg = compiler.compile(query).SerializeToString()
In [7]: type(protobuf_msg)
Out[7]: bytes
```
## Consume the Substrait Plan using Python Substrait
```
In [8]: import substrait
In [7]: from substrait.proto import Plan
In [9]: from substrait.gen.proto.plan_pb2 import Plan
In [8]: my_plan = Plan()
In [10]: my_plan = Plan()
In [9]: my_plan.ParseFromString(protobuf_msg)
Out[9]: 186
In [11]: my_plan.ParseFromString(protobuf_msg)
Out[11]: 186
In [12]: print(my_plan)
In [10]: print(my_plan)
relations {
root {
input {
Expand Down Expand Up @@ -177,4 +280,4 @@ version {
minor_number: 24
producer: "ibis-substrait"
}
```
```
38 changes: 38 additions & 0 deletions src/substrait/proto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
def _load():
"""Import all substrait protobuf classes as human friendly.
Instead of forcing users to deal with autogenerated protobuf
modules, importing individual components of the protocol
from submodules etc... this functions loads into the module
all classes representing substrait expressions and loads
the protocol modules with a friendly name making the protocol
more convenient to use.
substrait.gen.proto.extensions.extensions_pb2.SimpleExtensionDeclaration
becomes substrait.proto.SimpleExtensionDeclaration
"""
import sys
import inspect
import pkgutil
import importlib
from substrait.gen import proto as _proto

selfmodule = sys.modules[__name__]
for submodule_info in pkgutil.iter_modules(_proto.__path__):
submodule_name = submodule_info.name
attr_name = submodule_name.replace("_pb2", "")
if submodule_name == "extensions":
# Extensions are in a submodule
submodule_name = "extensions.extensions_pb2"
attr_name = "extensions"

submodule = importlib.import_module(f".{submodule_name}", _proto.__name__)
setattr(selfmodule, attr_name, submodule)

for membername, _ in inspect.getmembers(submodule):
member = getattr(submodule, membername)
if inspect.isclass(member):
setattr(selfmodule, membername, member)


_load()

0 comments on commit 2c430ef

Please sign in to comment.