Skip to content

Commit

Permalink
consolidate avro demo
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed Jun 28, 2024
1 parent 6ada9f7 commit cbd16c2
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 51 deletions.
51 changes: 0 additions & 51 deletions datafusion-examples/examples/avro_sql.rs

This file was deleted.

33 changes: 33 additions & 0 deletions datafusion-examples/examples/sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
//! This file contains several examples of how to run SQL queries using DataFusion
//!
//! * [`parquet_demo`]: run SQL query against a single Parquet file
//! * [`avro_demo`]: run SQL query against a single Avro file
//! * [`parquet_multi_files_demo`]: run SQL query against a table backed by multiple Parquet files
//! * [`regexp_demo`]: regular expression functions to manipulate strings
//! * [`to_char_demo`]: to_char function to convert strings to date, time, timestamp and durations
Expand All @@ -34,10 +35,12 @@ use datafusion_common::{assert_batches_eq, assert_contains};
use object_store::local::LocalFileSystem;
use std::path::Path;
use std::sync::Arc;
use arrow::util::pretty;

#[tokio::main]
async fn main() -> Result<()> {
parquet_demo().await?;
avro_demo().await?;
parquet_multi_files_demo().await?;
regexp_demo().await?;
to_char_demo().await?;
Expand Down Expand Up @@ -77,6 +80,36 @@ async fn parquet_demo() -> Result<()> {
Ok(())
}

/// This example demonstrates executing a simple query against an Arrow data
/// source (Avro) and fetching results
async fn avro_demo() -> Result<()> {
// create local execution context
let ctx = SessionContext::new();

let testdata = datafusion::test_util::arrow_test_data();

// register avro file with the execution context
let avro_file = &format!("{testdata}/avro/alltypes_plain.avro");
ctx.register_avro("alltypes_plain", avro_file, AvroReadOptions::default())
.await?;

// execute the query
let df = ctx
.sql(
"SELECT int_col, double_col, CAST(date_string_col as VARCHAR) \
FROM alltypes_plain \
WHERE id > 1 AND tinyint_col < double_col",
)
.await?;
let results = df.collect().await?;

// print the results
pretty::print_batches(&results)?;

Ok(())
}


/// This example demonstrates executing a simple query against an Arrow data
/// source (a directory with multiple Parquet files) and fetching results.
///
Expand Down

0 comments on commit cbd16c2

Please sign in to comment.