diff --git a/parquet/src/arrow/async_reader/mod.rs b/parquet/src/arrow/async_reader/mod.rs index 04383bb51bda..c8399d52c157 100644 --- a/parquet/src/arrow/async_reader/mod.rs +++ b/parquet/src/arrow/async_reader/mod.rs @@ -572,7 +572,15 @@ impl std::fmt::Debug for ParquetRecordBatchStream { } impl ParquetRecordBatchStream { - /// Returns the [`SchemaRef`] for this parquet file + /// Returns the [`SchemaRef`] for this parquet file. + /// + /// Note that unlike its synchronous counterpart [`ParquetRecordBatchReader`], the [`SchemaRef`] + /// returned here will contain the original metadata, whereas [`ParquetRecordBatchReader`] + /// strips this metadata. + /// + /// As such, this schema is the same as [`ParquetRecordBatchStreamBuilder::schema`] which also + /// contains the metadata, but differs from the [`RecordBatch`]es produced which have the metadata + /// in their [`RecordBatch::schema`] stripped. pub fn schema(&self) -> &SchemaRef { &self.schema }