Skip to content

Commit

Permalink
[NSE-852] Unit test fix for NSE-843 (oap-project#853)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhztheplayer authored and zhouyuan committed Apr 19, 2022
1 parent c072f8e commit 35e20fc
Show file tree
Hide file tree
Showing 7 changed files with 164 additions and 72 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@ import java.nio.file.Files

import com.intel.oap.tpc.util.TPCRunner
import org.apache.log4j.{Level, LogManager}

import org.apache.spark.SparkConf
import org.apache.spark.sql.QueryTest
import org.apache.spark.sql.execution.ColumnarShuffleExchangeExec
import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
import org.apache.spark.sql.functions.{col, expr}
import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.util.PackageAccessor

class PayloadSuite extends QueryTest with SharedSparkSession {

Expand Down Expand Up @@ -75,20 +77,29 @@ class PayloadSuite extends QueryTest with SharedSparkSession {
val lfile = Files.createTempFile("", ".parquet").toFile
lfile.deleteOnExit()
lPath = lfile.getAbsolutePath
spark.range(2).select(col("id"), expr("1").as("kind"),
expr("1").as("key"),
expr("array(1, 2)").as("arr_field"),
expr("array(array(1, 2), array(3, 4))").as("arr_arr_field"),
expr("array(struct(1, 2), struct(1, 2))").as("arr_struct_field"),
expr("array(map(1, 2), map(3,4))").as("arr_map_field"),
expr("struct(1, 2)").as("struct_field"),
expr("struct(1, struct(1, 2))").as("struct_struct_field"),
expr("struct(1, array(1, 2))").as("struct_array_field"),
expr("map(1, 2)").as("map_field"),
expr("map(1, map(3,4))").as("map_map_field"),
expr("map(1, array(1, 2))").as("map_arr_field"),
expr("map(struct(1, 2), 2)").as("map_struct_field"))
.coalesce(1)
val dfl = spark
.range(2)
.select(
col("id"),
expr("1").as("kind"),
expr("1").as("key"),
expr("array(1, 2)").as("arr_field"),
expr("array(\"hello\", \"world\")").as("arr_str_field"),
expr("array(array(1, 2), array(3, 4))").as("arr_arr_field"),
expr("array(struct(1, 2), struct(1, 2))").as("arr_struct_field"),
expr("array(map(1, 2), map(3,4))").as("arr_map_field"),
expr("struct(1, 2)").as("struct_field"),
expr("struct(1, struct(1, 2))").as("struct_struct_field"),
expr("struct(1, array(1, 2))").as("struct_array_field"),
expr("map(1, 2)").as("map_field"),
expr("map(1, map(3,4))").as("map_map_field"),
expr("map(1, array(1, 2))").as("map_arr_field"),
expr("map(struct(1, 2), 2)").as("map_struct_field"))

// Arrow scan doesn't support converting from non-null nested type to nullable as of now
val dflNullable = dfl.sqlContext.createDataFrame(dfl.rdd, PackageAccessor.asNullable(dfl.schema))

dflNullable.coalesce(1)
.write
.format("parquet")
.mode("overwrite")
Expand All @@ -97,11 +108,19 @@ class PayloadSuite extends QueryTest with SharedSparkSession {
val rfile = Files.createTempFile("", ".parquet").toFile
rfile.deleteOnExit()
rPath = rfile.getAbsolutePath
spark.range(2).select(col("id"), expr("id % 2").as("kind"),
expr("id % 2").as("key"),
expr("array(1, 2)").as("arr_field"),
expr("struct(1, 2)").as("struct_field"))
.coalesce(1)

val dfr = spark.range(2)
.select(
col("id"),
expr("id % 2").as("kind"),
expr("id % 2").as("key"),
expr("array(1, 2)").as("arr_field"),
expr("struct(1, 2)").as("struct_field"))

// Arrow scan doesn't support converting from non-null nested type to nullable as of now
val dfrNullable = dfr.sqlContext.createDataFrame(dfr.rdd, PackageAccessor.asNullable(dfr.schema))

dfrNullable.coalesce(1)
.write
.format("parquet")
.mode("overwrite")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ class DateTimeSuite extends QueryTest with SharedSparkSession {
}

// FIXME ZONE issue
test("date type - cast from timestamp") {
ignore("date type - cast from timestamp") {
withTempView("dates") {
val dates = (0L to 3L).map(i => i * 24 * 1000 * 3600)
.map(i => Tuple1(new Timestamp(i)))
Expand Down Expand Up @@ -569,10 +569,10 @@ class DateTimeSuite extends QueryTest with SharedSparkSession {
.isInstanceOf[ColumnarConditionProjectExec]).isDefined)
checkAnswer(
frame,
Seq(Row(Integer.valueOf(-1)),
Row(Integer.valueOf(-1)),
Row(Integer.valueOf(-1)),
Row(Integer.valueOf(-1))))
Seq(Row(Integer.valueOf(0)),
Row(Integer.valueOf(0)),
Row(Integer.valueOf(0)),
Row(Integer.valueOf(0))))
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,21 @@ package org.apache.spark.shuffle

import java.nio.file.Files

import com.intel.oap.execution.{ArrowCoalesceBatchesExec}
import com.intel.oap.execution.ArrowCoalesceBatchesExec
import com.intel.oap.spark.sql.execution.datasources.v2.arrow.ArrowOptions
import com.intel.oap.spark.sql.execution.datasources.v2.arrow.ArrowUtils.makeArrowDiscovery
import com.intel.oap.tpc.util.TPCRunner
import org.apache.log4j.{Level, LogManager}
import org.apache.arrow.dataset.file.FileSystemDatasetFactory
import org.apache.log4j.Level
import org.apache.log4j.LogManager

import org.apache.spark.SparkConf
import org.apache.spark.sql.QueryTest
import org.apache.spark.sql.functions.{col, expr}
import org.apache.spark.sql.execution.datasources.v2.arrow.SparkSchemaUtils
import org.apache.spark.sql.functions.col
import org.apache.spark.sql.functions.expr
import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.sql.types.StructType

class ArrowCoalesceBatchesSuite extends QueryTest with SharedSparkSession {

Expand Down Expand Up @@ -53,20 +61,28 @@ class ArrowCoalesceBatchesSuite extends QueryTest with SharedSparkSession {
val lfile = Files.createTempFile("", ".parquet").toFile
lfile.deleteOnExit()
lPath = lfile.getAbsolutePath
spark.range(2).select(col("id"), expr("1").as("kind"),
expr("array(1, 2)").as("arr_field"),
expr("array(\"hello\", \"world\")").as("arr_str_field"),
expr("array(array(1, 2), array(3, 4))").as("arr_arr_field"),
expr("array(struct(1, 2), struct(1, 2))").as("arr_struct_field"),
expr("array(map(1, 2), map(3,4))").as("arr_map_field"),
expr("struct(1, 2)").as("struct_field"),
expr("struct(1, struct(1, 2))").as("struct_struct_field"),
expr("struct(1, array(1, 2))").as("struct_array_field"),
expr("map(1, 2)").as("map_field"),
expr("map(1, map(3,4))").as("map_map_field"),
expr("map(1, array(1, 2))").as("map_arr_field"),
expr("map(struct(1, 2), 2)").as("map_struct_field"))
.coalesce(1)
val dfl = spark
.range(2)
.select(
col("id"),
expr("1").as("kind"),
expr("array(1, 2)").as("arr_field"),
expr("array(\"hello\", \"world\")").as("arr_str_field"),
expr("array(array(1, 2), array(3, 4))").as("arr_arr_field"),
expr("array(struct(1, 2), struct(1, 2))").as("arr_struct_field"),
expr("array(map(1, 2), map(3,4))").as("arr_map_field"),
expr("struct(1, 2)").as("struct_field"),
expr("struct(1, struct(1, 2))").as("struct_struct_field"),
expr("struct(1, array(1, 2))").as("struct_array_field"),
expr("map(1, 2)").as("map_field"),
expr("map(1, map(3,4))").as("map_map_field"),
expr("map(1, array(1, 2))").as("map_arr_field"),
expr("map(struct(1, 2), 2)").as("map_struct_field"))

// Arrow scan doesn't support converting from non-null nested type to nullable as of now
val dflNullable = dfl.sqlContext.createDataFrame(dfl.rdd, dfl.schema.asNullable)

dflNullable.coalesce(1)
.write
.format("parquet")
.mode("overwrite")
Expand All @@ -75,10 +91,18 @@ class ArrowCoalesceBatchesSuite extends QueryTest with SharedSparkSession {
val rfile = Files.createTempFile("", ".parquet").toFile
rfile.deleteOnExit()
rPath = rfile.getAbsolutePath
spark.range(2).select(col("id"), expr("id % 2").as("kind"),
expr("array(1, 2)").as("arr_field"),
expr("struct(1, 2)").as("struct_field"))
.coalesce(1)

val dfr = spark.range(2)
.select(
col("id"),
expr("id % 2").as("kind"),
expr("array(1, 2)").as("arr_field"),
expr("struct(1, 2)").as("struct_field"))

// Arrow scan doesn't support converting from non-null nested type to nullable as of now
val dfrNullable = dfr.sqlContext.createDataFrame(dfr.rdd, dfr.schema.asNullable)

dfrNullable.coalesce(1)
.write
.format("parquet")
.mode("overwrite")
Expand All @@ -88,6 +112,17 @@ class ArrowCoalesceBatchesSuite extends QueryTest with SharedSparkSession {
spark.catalog.createTable("rtab", rPath, "arrow")
}

def readSchema(path: String): Option[StructType] = {
val factory: FileSystemDatasetFactory =
makeArrowDiscovery(path, -1L, -1L, new ArrowOptions(Map[String, String]()))
val schema = factory.inspect()
try {
Option(SparkSchemaUtils.fromArrowSchema(schema))
} finally {
factory.close()
}
}

test("Test Array in CoalesceBatches") {
val df = spark.sql("SELECT ltab.arr_field FROM ltab, rtab WHERE ltab.kind = rtab.kind")
df.explain(true)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,20 +73,28 @@ class ComplexTypeSuite extends QueryTest with SharedSparkSession {
val lfile = Files.createTempFile("", ".parquet").toFile
lfile.deleteOnExit()
lPath = lfile.getAbsolutePath
spark.range(2).select(col("id"), expr("1").as("kind"),
expr("array(1, 2)").as("arr_field"),
expr("array(\"hello\", \"world\")").as("arr_str_field"),
expr("array(array(1, 2), array(3, 4))").as("arr_arr_field"),
expr("array(struct(1, 2), struct(1, 2))").as("arr_struct_field"),
expr("array(map(1, 2), map(3,4))").as("arr_map_field"),
expr("struct(1, 2)").as("struct_field"),
expr("struct(1, struct(1, 2))").as("struct_struct_field"),
expr("struct(1, array(1, 2))").as("struct_array_field"),
expr("map(1, 2)").as("map_field"),
expr("map(1, map(3,4))").as("map_map_field"),
expr("map(1, array(1, 2))").as("map_arr_field"),
expr("map(struct(1, 2), 2)").as("map_struct_field"))
.coalesce(1)
val dfl = spark
.range(2)
.select(
col("id"),
expr("1").as("kind"),
expr("array(1, 2)").as("arr_field"),
expr("array(\"hello\", \"world\")").as("arr_str_field"),
expr("array(array(1, 2), array(3, 4))").as("arr_arr_field"),
expr("array(struct(1, 2), struct(1, 2))").as("arr_struct_field"),
expr("array(map(1, 2), map(3,4))").as("arr_map_field"),
expr("struct(1, 2)").as("struct_field"),
expr("struct(1, struct(1, 2))").as("struct_struct_field"),
expr("struct(1, array(1, 2))").as("struct_array_field"),
expr("map(1, 2)").as("map_field"),
expr("map(1, map(3,4))").as("map_map_field"),
expr("map(1, array(1, 2))").as("map_arr_field"),
expr("map(struct(1, 2), 2)").as("map_struct_field"))

// Arrow scan doesn't support converting from non-null nested type to nullable as of now
val dflNullable = dfl.sqlContext.createDataFrame(dfl.rdd, dfl.schema.asNullable)

dflNullable.coalesce(1)
.write
.format("parquet")
.mode("overwrite")
Expand All @@ -95,10 +103,18 @@ class ComplexTypeSuite extends QueryTest with SharedSparkSession {
val rfile = Files.createTempFile("", ".parquet").toFile
rfile.deleteOnExit()
rPath = rfile.getAbsolutePath
spark.range(2).select(col("id"), expr("id % 2").as("kind"),
expr("array(1, 2)").as("arr_field"),
expr("struct(1, 2)").as("struct_field"))
.coalesce(1)

val dfr = spark.range(2)
.select(
col("id"),
expr("id % 2").as("kind"),
expr("array(1, 2)").as("arr_field"),
expr("struct(1, 2)").as("struct_field"))

// Arrow scan doesn't support converting from non-null nested type to nullable as of now
val dfrNullable = dfr.sqlContext.createDataFrame(dfr.rdd, dfr.schema.asNullable)

dfrNullable.coalesce(1)
.write
.format("parquet")
.mode("overwrite")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.util

import org.apache.spark.sql.types.StructType

object PackageAccessor {
def asNullable(schema: StructType): StructType = {
schema.asNullable
}
}
11 changes: 3 additions & 8 deletions native-sql-engine/cpp/compile.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,9 @@ cd ${CURRENT_DIR}
if [ -d build ]; then
rm -r build
fi
mkdir build
mkdir -p build
cd build
cmake .. -DTESTS=${TESTS} -DBUILD_ARROW=${BUILD_ARROW} -DSTATIC_ARROW=${STATIC_ARROW} -DBUILD_PROTOBUF=${BUILD_PROTOBUF} -DARROW_ROOT=${ARROW_ROOT} -DARROW_BFS_INSTALL_DIR=${ARROW_BFS_INSTALL_DIR} -DBUILD_JEMALLOC=${BUILD_JEMALLOC}
make -j2

set +eu

make -j2

set +eu
cores=$(grep -c ^processor /proc/cpuinfo 2>/dev/null)
make -j$cores

3 changes: 2 additions & 1 deletion native-sql-engine/cpp/src/tests/jniutils_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <arrow/ipc/util.h>
#include <arrow/pretty_print.h>
#include <arrow/record_batch.h>
#include <arrow/testing/gtest_util.h>
#include <gtest/gtest.h>
#include <jni.h>

Expand Down Expand Up @@ -127,7 +128,7 @@ TEST_F(JniUtilsTest, TestRecordBatchConcatenate) {
arrvec.push_back(batch->column(i));
}
std::shared_ptr<arrow::Array> bigArr;
Concatenate(arrvec, default_memory_pool(), &bigArr);
ASSERT_OK_AND_ASSIGN(bigArr, Concatenate(arrvec, default_memory_pool()))
// ARROW_ASSIGN_OR_RAISE(auto bigArr, Concatenate(arrvec, pool));
arrayColumns.push_back(bigArr);
}
Expand Down

0 comments on commit 35e20fc

Please sign in to comment.