Skip to content

Commit

Permalink
improve comments
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed May 17, 2024
1 parent 08618e8 commit 8be8b09
Showing 1 changed file with 8 additions and 3 deletions.
11 changes: 8 additions & 3 deletions datafusion-examples/examples/parquet_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ async fn main() -> Result<()> {
.await?
.show()
.await?;
println!("Files pruned: {}", provider.index().last_num_pruned());

// Run a query that uses the index to prune files.
//
Expand Down Expand Up @@ -221,8 +222,12 @@ impl TableProvider for IndexTableProvider {
// that always evaluates to true we can pass to the index
.unwrap_or_else(|| datafusion_physical_expr::expressions::lit(true));

// Use the index to find the files that might have data that matches the predicate.
// Use the index to find the files that might have data that matches the
// predicate. Any file that can not have data that matches the predicate
// will not be returned.
let files = self.index.get_files(predicate.clone())?;

// Transform to the format needed to pass to ParquetExec
// Create one file group per file (default to scanning them all in parallel)
let file_groups = files
.into_iter()
Expand Down Expand Up @@ -534,8 +539,8 @@ impl ParquetMetadataIndexBuilder {
assert_eq!(value_column_mins.null_count(), 0);
assert_eq!(value_column_maxes.null_count(), 0);

// compute the total row count, and overall min and max of the "value"
// column in this file
// The statistics above are one for row group so we need to compute the
// overall file row count, and min and max .
let row_count = row_counts
.iter()
.flatten() // skip nulls (should be none)
Expand Down

0 comments on commit 8be8b09

Please sign in to comment.