Skip to content

Commit

Permalink
fix: logical vs physical plan mismatch (#162)
Browse files Browse the repository at this point in the history
* fix: logical vs physical plan mismatch
* test: add test file
  • Loading branch information
tshauck authored Jul 26, 2024
1 parent 1b6771b commit c941566
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 3 deletions.
Binary file not shown.
11 changes: 11 additions & 0 deletions python/tests/test_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,17 @@ def test_bed_four():
assert result.to_polars().shape == (10, 4)


def test_cripri_example():
session = new_session()

fasta_file = DATA / "example_crispri_v2_sample.fastq.gz"
result = session.sql(
f"SELECT name, COUNT(*) FROM fastq_scan('{fasta_file}') GROUP BY name"
).to_polars()

assert len(result) == 25000


def test_sdf_file():
session = new_session()

Expand Down
10 changes: 7 additions & 3 deletions src/execution_result.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ impl ExecutionResult {
}

/// Returns the schema from the logical plan
///
/// Note: This is a logical schema and may not match the physical schema
fn schema(&self) -> PyArrowType<Schema> {
PyArrowType(self.df.schema().into())
}
Expand Down Expand Up @@ -94,7 +96,8 @@ impl ExecutionResult {
/// Convert to Arrow Table
fn to_arrow(&self, py: Python) -> PyResult<PyObject> {
let batches = self.collect(py)?.to_object(py);
let schema = self.schema().into_py(py);

let schema = None::<PyArrowType<Schema>>.into_py(py);

// Instantiate pyarrow Table object and use its from_batches method
let table_class = py.import_bound("pyarrow")?.getattr("Table")?;
Expand All @@ -109,6 +112,8 @@ impl ExecutionResult {
let stream = wait_for_future(py, self.df.as_ref().clone().execute_stream())
.map_err(error::BioBearError::from)?;

let schema = stream.schema().to_pyarrow(py)?;

let runtime = Arc::new(Runtime::new()?);

let dataframe_record_batch_stream = DataFrameRecordBatchStream::new(stream, runtime);
Expand All @@ -120,10 +125,9 @@ impl ExecutionResult {

let batches = batches.into_pyarrow(py)?;

let schema = self.schema().into_py(py);

let table_class = py.import_bound("pyarrow")?.getattr("Table")?;
let args = (batches, schema);

let table: PyObject = table_class.call_method1("from_batches", args)?.into();

let module = py.import_bound("polars")?;
Expand Down

0 comments on commit c941566

Please sign in to comment.