diff --git a/python/tests/data/example_crispri_v2_sample.fastq.gz b/python/tests/data/example_crispri_v2_sample.fastq.gz new file mode 100644 index 0000000..0ee084d Binary files /dev/null and b/python/tests/data/example_crispri_v2_sample.fastq.gz differ diff --git a/python/tests/test_session.py b/python/tests/test_session.py index 6cd9007..cb2fd69 100644 --- a/python/tests/test_session.py +++ b/python/tests/test_session.py @@ -744,6 +744,17 @@ def test_bed_four(): assert result.to_polars().shape == (10, 4) +def test_cripri_example(): + session = new_session() + + fasta_file = DATA / "example_crispri_v2_sample.fastq.gz" + result = session.sql( + f"SELECT name, COUNT(*) FROM fastq_scan('{fasta_file}') GROUP BY name" + ).to_polars() + + assert len(result) == 25000 + + def test_sdf_file(): session = new_session() diff --git a/src/execution_result.rs b/src/execution_result.rs index fce08f3..62445b0 100644 --- a/src/execution_result.rs +++ b/src/execution_result.rs @@ -55,6 +55,8 @@ impl ExecutionResult { } /// Returns the schema from the logical plan + /// + /// Note: This is a logical schema and may not match the physical schema fn schema(&self) -> PyArrowType { PyArrowType(self.df.schema().into()) } @@ -94,7 +96,8 @@ impl ExecutionResult { /// Convert to Arrow Table fn to_arrow(&self, py: Python) -> PyResult { let batches = self.collect(py)?.to_object(py); - let schema = self.schema().into_py(py); + + let schema = None::>.into_py(py); // Instantiate pyarrow Table object and use its from_batches method let table_class = py.import_bound("pyarrow")?.getattr("Table")?; @@ -109,6 +112,8 @@ impl ExecutionResult { let stream = wait_for_future(py, self.df.as_ref().clone().execute_stream()) .map_err(error::BioBearError::from)?; + let schema = stream.schema().to_pyarrow(py)?; + let runtime = Arc::new(Runtime::new()?); let dataframe_record_batch_stream = DataFrameRecordBatchStream::new(stream, runtime); @@ -120,10 +125,9 @@ impl ExecutionResult { let batches = batches.into_pyarrow(py)?; - let schema = self.schema().into_py(py); - let table_class = py.import_bound("pyarrow")?.getattr("Table")?; let args = (batches, schema); + let table: PyObject = table_class.call_method1("from_batches", args)?.into(); let module = py.import_bound("polars")?;