apache · Fokko · Feb 6, 2024 · Feb 2, 2024 · Feb 2, 2024 · Feb 2, 2024
diff --git a/mkdocs/docs/api.md b/mkdocs/docs/api.md
@@ -636,3 +636,56 @@ print(ray_dataset.take(2))
     },
 ]
 ```
+
+### Daft
+
+PyIceberg interfaces closely with Daft Dataframes (see also: [Daft integration with Iceberg](https://www.getdaft.io/projects/docs/en/latest/user_guide/integrations/iceberg.html)) which provides a full lazily optimized query engine interface on top of PyIceberg tables.
+
+<!-- prettier-ignore-start -->
+
+!!! note "Requirements"
+    This requires [Daft to be installed](index.md).
+
+<!-- prettier-ignore-end -->
+
+A table can be read easily into a Daft Dataframe:
+
+```python
+df = table.to_daft()  # equivalent to `daft.read_iceberg(table)`
+df = df.where(df["trip_distance"] >= 10.0)
+df = df.select("VendorID", "tpep_pickup_datetime", "tpep_dropoff_datetime")
+```
+
+This returns a Daft Dataframe which is lazily materialized. Printing `df` will display the schema:
+
+```
+╭──────────┬───────────────────────────────┬───────────────────────────────╮
+│ VendorID ┆ tpep_pickup_datetime          ┆ tpep_dropoff_datetime         │
+│ ---      ┆ ---                           ┆ ---                           │
+│ Int64    ┆ Timestamp(Microseconds, None) ┆ Timestamp(Microseconds, None) │
+╰──────────┴───────────────────────────────┴───────────────────────────────╯
+
+(No data to display: Dataframe not materialized)
+```
+
+We can execute the Dataframe to preview the first few rows of the query with `df.show()`.
+
+This is correctly optimized to take advantage of Iceberg features such as hidden partitioning and file-level statistics for efficient reads.
+
+```python
+df.show(2)
+```
+
+```
+╭──────────┬───────────────────────────────┬───────────────────────────────╮
+│ VendorID ┆ tpep_pickup_datetime          ┆ tpep_dropoff_datetime         │
+│ ---      ┆ ---                           ┆ ---                           │
+│ Int64    ┆ Timestamp(Microseconds, None) ┆ Timestamp(Microseconds, None) │
+╞══════════╪═══════════════════════════════╪═══════════════════════════════╡
+│ 2        ┆ 2008-12-31T23:23:50.000000    ┆ 2009-01-01T00:34:31.000000    │
+├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ 2        ┆ 2008-12-31T23:05:03.000000    ┆ 2009-01-01T16:10:18.000000    │
+╰──────────┴───────────────────────────────┴───────────────────────────────╯
+
+(Showing first 2 rows)
+```
diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md
@@ -51,6 +51,7 @@ You can mix and match optional dependencies depending on your needs:
 | pandas       | Installs both PyArrow and Pandas                                     |
 | duckdb       | Installs both PyArrow and DuckDB                                     |
 | ray          | Installs PyArrow, Pandas, and Ray                                    |
+| daft         | Installs Daft                                                        |
 | s3fs         | S3FS as a FileIO implementation to interact with the object store    |
 | adlfs        | ADLFS as a FileIO implementation to interact with the object store   |
 | snappy       | Support for snappy Avro compression                                  |

diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
@@ -120,6 +120,7 @@
 from pyiceberg.utils.datetime import datetime_to_millis
 
 if TYPE_CHECKING:
+    import daft
     import pandas as pd
     import pyarrow as pa
     import ray
@@ -1380,6 +1381,16 @@ def to_ray(self) -> ray.data.dataset.Dataset:
         import ray
 
         return ray.data.from_arrow(self.to_arrow())
+
+    def to_daft(self) -> daft.DataFrame:
+        """Reads a Daft DataFrame lazily from this Iceberg table
+
+        Returns:
+            daft.DataFrame: Unmaterialized Daft Dataframe created from the Iceberg table
+        """
+        import daft
+
+        return daft.read_iceberg(self)
 
 
 class MoveOperation(Enum):

diff --git a/pyproject.toml b/pyproject.toml
@@ -105,6 +105,7 @@ pyarrow = ["pyarrow"]
 pandas = ["pandas", "pyarrow"]
 duckdb = ["duckdb", "pyarrow"]
 ray = ["ray", "pyarrow", "pandas"]
+daft = ["getdaft>=0.2.12"]
 snappy = ["python-snappy"]
 hive = ["thrift"]
 s3fs = ["s3fs"]
@@ -263,6 +264,10 @@ ignore_missing_imports = true
 module = "ray.*"
 ignore_missing_imports = true
 
+[[tool.mypy.overrides]]
+module = "daft.*"
+ignore_missing_imports = true
+
 [[tool.mypy.overrides]]
 module = "pyparsing.*"
 ignore_missing_imports = true