From 4c2aee78a2656af5ec62f04e64f6a587f24154f0 Mon Sep 17 00:00:00 2001
From: Trent Hauck <trent@trenthauck.com>
Date: Sat, 21 Oct 2023 13:10:22 -0700
Subject: [PATCH] docs: better for duckdb

---
 Makefile                     |  4 +++-
 README.md                    | 27 +++++++++++++++++++++++++++
 python/tests/test_session.py |  4 ++--
 src/execution_result.rs      |  2 +-
 4 files changed, 33 insertions(+), 4 deletions(-)
diff --git a/Makefile b/Makefile
index d63d391..0ce78dd 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,9 @@ build:
 	cargo build --release
 	maturin develop --release
 
-test: build
+test:
+	cargo build
+	maturin develop
 	pytest
 
 run-benchmarks:
diff --git a/README.md b/README.md
index ed2f757..d82cc4f 100644
--- a/README.md
+++ b/README.md
@@ -87,6 +87,32 @@ df.head()
 # └──────────────┴─────────────────┴──────┴───────┴───┴────────────┴────────┴───────┴───────────────────────────────────┘
 ```
 
+## Using DuckDB
+
+biobear can also be used to read files into a [duckdb][] database.
+
+```python
+import biobear as bb
+import duckdb
+
+session = bb.connect()
+
+session.sql("""
+    CREATE EXTERNAL TABLE gene_annotations STORED AS GFF LOCATION 'python/tests/data/test.gff'
+""")
+
+result = session.sql("""
+    SELECT * FROM gene_annotations
+""")
+
+gff_table_arrow_table = result.to_arrow()
+
+duckdb_conn = duckdb.connect()
+
+result = duckdb_conn.execute('SELECT * FROM gff_table_arrow_table').fetchall()
+print(result)
+```
+
 ## Performance
 
 Please see the [exon][]'s performance metrics for thorough benchmarks, but in short, biobear is generally faster than other Python libraries for reading bioinformatic file formats.
@@ -101,3 +127,4 @@ For example, here's quick benchmarks for reading one FASTA file with 1 million r
 The larger difference multiple files is due to biobear's ability to read multiple files in parallel.
 
 [exon]: https://github.com/wheretrue/exon/tree/main/exon-benchmarks
+[duckdb]: https://duckdb.org/
diff --git a/python/tests/test_session.py b/python/tests/test_session.py
index 6d0f4d0..460270e 100644
--- a/python/tests/test_session.py
+++ b/python/tests/test_session.py
@@ -22,7 +22,7 @@
 DATA = Path(__file__).parent / "data"
 
 
-def test_connect():
+def test_connect_and_to_arrow():
     """Test connecting to a context."""
     session = connect()
 
@@ -32,7 +32,7 @@ def test_connect():
     session.sql(query)
 
     query = "SELECT * FROM gff_file"
-    arrow_table = session.sql(query).to_arrow_table()
+    arrow_table = session.sql(query).to_arrow()
 
     assert len(arrow_table) == 2
 
diff --git a/src/execution_result.rs b/src/execution_result.rs
index 271da4f..f64c9ef 100644
--- a/src/execution_result.rs
+++ b/src/execution_result.rs
@@ -50,7 +50,7 @@ impl PyExecutionResult {
     }
 
     /// Convert to Arrow Table
-    fn to_arrow_table(&self, py: Python) -> PyResult<PyObject> {
+    fn to_arrow(&self, py: Python) -> PyResult<PyObject> {
         let batches = self.collect(py)?.to_object(py);
 
         Python::with_gil(|py| {