From cbd16c2d71b5cb871d4d232d6707b88ec0f8a75f Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 28 Jun 2024 17:06:01 -0400 Subject: [PATCH] consolidate avro demo --- datafusion-examples/examples/avro_sql.rs | 51 ------------------------ datafusion-examples/examples/sql.rs | 33 +++++++++++++++ 2 files changed, 33 insertions(+), 51 deletions(-) delete mode 100644 datafusion-examples/examples/avro_sql.rs diff --git a/datafusion-examples/examples/avro_sql.rs b/datafusion-examples/examples/avro_sql.rs deleted file mode 100644 index ac1053aa1881..000000000000 --- a/datafusion-examples/examples/avro_sql.rs +++ /dev/null @@ -1,51 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use datafusion::arrow::util::pretty; - -use datafusion::error::Result; -use datafusion::prelude::*; - -/// This example demonstrates executing a simple query against an Arrow data source (Avro) and -/// fetching results -#[tokio::main] -async fn main() -> Result<()> { - // create local execution context - let ctx = SessionContext::new(); - - let testdata = datafusion::test_util::arrow_test_data(); - - // register avro file with the execution context - let avro_file = &format!("{testdata}/avro/alltypes_plain.avro"); - ctx.register_avro("alltypes_plain", avro_file, AvroReadOptions::default()) - .await?; - - // execute the query - let df = ctx - .sql( - "SELECT int_col, double_col, CAST(date_string_col as VARCHAR) \ - FROM alltypes_plain \ - WHERE id > 1 AND tinyint_col < double_col", - ) - .await?; - let results = df.collect().await?; - - // print the results - pretty::print_batches(&results)?; - - Ok(()) -} diff --git a/datafusion-examples/examples/sql.rs b/datafusion-examples/examples/sql.rs index 1c54c8cc6b00..9614b3003556 100644 --- a/datafusion-examples/examples/sql.rs +++ b/datafusion-examples/examples/sql.rs @@ -18,6 +18,7 @@ //! This file contains several examples of how to run SQL queries using DataFusion //! //! * [`parquet_demo`]: run SQL query against a single Parquet file +//! * [`avro_demo`]: run SQL query against a single Avro file //! * [`parquet_multi_files_demo`]: run SQL query against a table backed by multiple Parquet files //! * [`regexp_demo`]: regular expression functions to manipulate strings //! * [`to_char_demo`]: to_char function to convert strings to date, time, timestamp and durations @@ -34,10 +35,12 @@ use datafusion_common::{assert_batches_eq, assert_contains}; use object_store::local::LocalFileSystem; use std::path::Path; use std::sync::Arc; +use arrow::util::pretty; #[tokio::main] async fn main() -> Result<()> { parquet_demo().await?; + avro_demo().await?; parquet_multi_files_demo().await?; regexp_demo().await?; to_char_demo().await?; @@ -77,6 +80,36 @@ async fn parquet_demo() -> Result<()> { Ok(()) } +/// This example demonstrates executing a simple query against an Arrow data +/// source (Avro) and fetching results +async fn avro_demo() -> Result<()> { + // create local execution context + let ctx = SessionContext::new(); + + let testdata = datafusion::test_util::arrow_test_data(); + + // register avro file with the execution context + let avro_file = &format!("{testdata}/avro/alltypes_plain.avro"); + ctx.register_avro("alltypes_plain", avro_file, AvroReadOptions::default()) + .await?; + + // execute the query + let df = ctx + .sql( + "SELECT int_col, double_col, CAST(date_string_col as VARCHAR) \ + FROM alltypes_plain \ + WHERE id > 1 AND tinyint_col < double_col", + ) + .await?; + let results = df.collect().await?; + + // print the results + pretty::print_batches(&results)?; + + Ok(()) +} + + /// This example demonstrates executing a simple query against an Arrow data /// source (a directory with multiple Parquet files) and fetching results. ///