Merge pull request apache#14 from bzhang02/avro-example

[YSPARK-1522] Add an Avro example
redsanket · Apr 1, 2020 · 00cf4bc · 00cf4bc
2 parents 543c8ba + fa473d5
commit 00cf4bc
Show file tree

Hide file tree

Showing 11 changed files with 48 additions and 0 deletions.
diff --git a/src/main/resources/avro/randomBoolean.avro b/src/main/resources/avro/randomBoolean.avro
diff --git a/src/main/resources/avro/randomBytes.avro b/src/main/resources/avro/randomBytes.avro
diff --git a/src/main/resources/avro/randomDouble.avro b/src/main/resources/avro/randomDouble.avro
diff --git a/src/main/resources/avro/randomFloat.avro b/src/main/resources/avro/randomFloat.avro
diff --git a/src/main/resources/avro/randomInt.avro b/src/main/resources/avro/randomInt.avro
diff --git a/src/main/resources/avro/randomLong.avro b/src/main/resources/avro/randomLong.avro
diff --git a/src/main/resources/avro/randomLongMap.avro b/src/main/resources/avro/randomLongMap.avro
diff --git a/src/main/resources/avro/randomString.avro b/src/main/resources/avro/randomString.avro
diff --git a/src/main/resources/avro/randomStringArray.avro b/src/main/resources/avro/randomStringArray.avro
diff --git a/src/main/resources/avro/users.avro b/src/main/resources/avro/users.avro
diff --git a/src/main/scala/com/yahoo/spark/starter/SparkAvroExample.scala b/src/main/scala/com/yahoo/spark/starter/SparkAvroExample.scala
@@ -0,0 +1,48 @@
+package com.yahoo.spark.starter
+
+import org.apache.spark.sql.{SparkSession, DataFrame}
+import org.apache.spark.sql.avro._
+
+object SparkAvroExample {
+	def main(args: Array[String]) {
+		val spark = SparkSession
+      .builder()
+      .appName("Spark Avro Example")
+      .getOrCreate()
+
+    val inputDir = "avro_test/resources/"
+    val outputDir = "avro_test/output/"
+
+    // Read, query and write for normal data frame
+    val usersDF = spark.read.format("avro").load(inputDir + "users.avro")
+    usersDF.show()
+
+    usersDF.select("name", "favorite_color").write.format("avro").save(outputDir + "namesAndFavColors.avro")
+
+    val namesAndFavColorsDF = spark.read.format("avro").load(outputDir + "namesAndFavColors.avro")
+    namesAndFavColorsDF.show()
+
+
+    // Read, query and write for primitive types
+    def readAndWritePrimitive(filename: String): DataFrame = {
+      val df = spark.read.format("avro").load(inputDir + filename)
+      df.show()
+      df.write.format("avro").save(outputDir + filename)
+
+      val df2 = spark.read.format("avro").load(outputDir + filename)
+      df2.show()
+
+      df2
+    }
+
+    println(readAndWritePrimitive("randomBoolean.avro").head().getBoolean(0))
+    println(readAndWritePrimitive("randomBytes.avro").head().getAs[Array[Byte]](0))
+    println(readAndWritePrimitive("randomDouble.avro").head().getDouble(0))
+    println(readAndWritePrimitive("randomFloat.avro").head().getFloat(0))
+    println(readAndWritePrimitive("randomInt.avro").head().getInt(0))
+    println(readAndWritePrimitive("randomLong.avro").head().getLong(0))
+    println(readAndWritePrimitive("randomString.avro").head().getString(0))
+    println(readAndWritePrimitive("randomLongMap.avro").head().getAs[Map[String, Long]](0))
+    println(readAndWritePrimitive("randomStringArray.avro").head().getAs[Array[String]](0))
+	}
+}