Skip to content

Commit

Permalink
Merge master branch and try to test Hadoop-3.1 on jenkins
Browse files Browse the repository at this point in the history
  • Loading branch information
wangyum committed Mar 10, 2019
1 parent bf47619 commit 71421d6
Show file tree
Hide file tree
Showing 13 changed files with 86 additions and 58 deletions.
60 changes: 33 additions & 27 deletions dev/deps/spark-deps-hadoop-3.1
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,22 @@ arpack_combined_all-0.1.jar
arrow-format-0.12.0.jar
arrow-memory-0.12.0.jar
arrow-vector-0.12.0.jar
audience-annotations-0.5.0.jar
automaton-1.11-8.jar
avro-1.8.2.jar
avro-ipc-1.8.2.jar
avro-mapred-1.8.2-hadoop2.jar
bonecp-0.8.0.RELEASE.jar
breeze-macros_2.12-0.13.2.jar
breeze_2.12-0.13.2.jar
calcite-avatica-1.2.0-incubating.jar
calcite-core-1.10.0.jar
calcite-linq4j-1.10.0.jar
chill-java-0.9.3.jar
chill_2.12-0.9.3.jar
commons-beanutils-1.9.3.jar
commons-cli-1.2.jar
commons-codec-1.10.jar
commons-collections-3.2.2.jar
commons-compiler-3.0.11.jar
commons-compress-1.8.1.jar
commons-compress-1.9.jar
commons-configuration2-2.1.1.jar
commons-crypto-1.0.0.jar
commons-daemon-1.0.13.jar
Expand All @@ -45,11 +43,12 @@ commons-logging-1.1.3.jar
commons-math3-3.4.1.jar
commons-net-3.1.jar
commons-pool-1.5.4.jar
commons-text-1.6.jar
compress-lzf-1.0.3.jar
core-1.1.2.jar
curator-client-2.12.0.jar
curator-framework-2.12.0.jar
curator-recipes-2.12.0.jar
curator-client-2.13.0.jar
curator-framework-2.13.0.jar
curator-recipes-2.13.0.jar
datanucleus-api-jdo-3.2.6.jar
datanucleus-core-4.1.17.jar
datanucleus-rdbms-3.2.9.jar
Expand All @@ -63,25 +62,30 @@ gson-2.2.4.jar
guava-14.0.1.jar
guice-4.0.jar
guice-servlet-4.0.jar
hadoop-annotations-3.1.0.jar
hadoop-auth-3.1.0.jar
hadoop-client-3.1.0.jar
hadoop-common-3.1.0.jar
hadoop-hdfs-client-3.1.0.jar
hadoop-mapreduce-client-common-3.1.0.jar
hadoop-mapreduce-client-core-3.1.0.jar
hadoop-mapreduce-client-jobclient-3.1.0.jar
hadoop-yarn-api-3.1.0.jar
hadoop-yarn-client-3.1.0.jar
hadoop-yarn-common-3.1.0.jar
hadoop-yarn-registry-3.1.0.jar
hadoop-yarn-server-common-3.1.0.jar
hadoop-yarn-server-web-proxy-3.1.0.jar
hadoop-annotations-3.2.0.jar
hadoop-auth-3.2.0.jar
hadoop-client-3.2.0.jar
hadoop-common-3.2.0.jar
hadoop-hdfs-client-3.2.0.jar
hadoop-mapreduce-client-common-3.2.0.jar
hadoop-mapreduce-client-core-3.2.0.jar
hadoop-mapreduce-client-jobclient-3.2.0.jar
hadoop-yarn-api-3.2.0.jar
hadoop-yarn-client-3.2.0.jar
hadoop-yarn-common-3.2.0.jar
hadoop-yarn-registry-3.2.0.jar
hadoop-yarn-server-common-3.2.0.jar
hadoop-yarn-server-web-proxy-3.2.0.jar
hive-common-2.3.4.jar
hive-llap-client-2.3.4.jar
hive-llap-common-2.3.4.jar
hive-serde-2.3.4.jar
hive-service-rpc-2.3.4.jar
hive-shims-0.23-2.3.4.jar
hive-shims-2.3.4.jar
hive-shims-common-2.3.4.jar
hive-storage-api-2.7.0.jar
hive-shims-scheduler-2.3.4.jar
hive-storage-api-2.6.0.jar
hk2-api-2.4.0-b34.jar
hk2-locator-2.4.0-b34.jar
hk2-utils-2.4.0-b34.jar
Expand All @@ -96,8 +100,8 @@ jackson-core-2.9.8.jar
jackson-core-asl-1.9.13.jar
jackson-databind-2.9.8.jar
jackson-dataformat-yaml-2.9.8.jar
jackson-jaxrs-base-2.7.8.jar
jackson-jaxrs-json-provider-2.7.8.jar
jackson-jaxrs-base-2.9.5.jar
jackson-jaxrs-json-provider-2.9.5.jar
jackson-mapper-asl-1.9.13.jar
jackson-module-jaxb-annotations-2.9.8.jar
jackson-module-paranamer-2.9.8.jar
Expand All @@ -124,12 +128,13 @@ jersey-container-servlet-core-2.22.2.jar
jersey-guava-2.22.2.jar
jersey-media-jaxb-2.22.2.jar
jersey-server-2.22.2.jar
jetty-webapp-9.4.12.v20180830.jar
jetty-xml-9.4.12.v20180830.jar
jetty-webapp-9.3.24.v20180605.jar
jetty-xml-9.3.24.v20180605.jar
jline-2.14.6.jar
joda-time-2.9.3.jar
jodd-core-3.5.2.jar
jpam-1.1.jar
json-1.8.jar
json-smart-2.3.jar
json4s-ast_2.12-3.5.3.jar
json4s-core_2.12-3.5.3.jar
Expand Down Expand Up @@ -181,6 +186,7 @@ okhttp-3.8.1.jar
okio-1.13.0.jar
opencsv-2.3.jar
orc-core-1.5.4-nohive.jar
orc-core-1.5.4.jar
orc-mapreduce-1.5.4-nohive.jar
orc-shims-1.5.4.jar
oro-2.0.8.jar
Expand Down Expand Up @@ -222,5 +228,5 @@ woodstox-core-5.0.3.jar
xbean-asm7-shaded-4.12.jar
xz-1.5.jar
zjsonpatch-0.3.0.jar
zookeeper-3.4.9.jar
zookeeper-3.4.13.jar
zstd-jni-1.3.2-2.jar
2 changes: 1 addition & 1 deletion dev/lint-java
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
SPARK_ROOT_DIR="$(dirname $SCRIPT_DIR)"

ERRORS=$($SCRIPT_DIR/../build/mvn -Pkinesis-asl -Pmesos -Pkubernetes -Pyarn -Phive -Phive-thriftserver checkstyle:check | grep ERROR)
ERRORS=$($SCRIPT_DIR/../build/mvn -Pkinesis-asl -Pmesos -Pkubernetes -Pyarn -Phive -Phive-thriftserver2 checkstyle:check | grep ERROR)

if test ! -z "$ERRORS"; then
echo -e "Checkstyle checks failed at following occurrences:\n$ERRORS"
Expand Down
2 changes: 1 addition & 1 deletion dev/mima
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ set -e
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
cd "$FWDIR"

SPARK_PROFILES="-Pmesos -Pkubernetes -Pyarn -Pspark-ganglia-lgpl -Pkinesis-asl -Phive-thriftserver -Phive"
SPARK_PROFILES="-Pmesos -Pkubernetes -Pyarn -Pspark-ganglia-lgpl -Pkinesis-asl -Phive-thriftserver2 -Phive"
TOOLS_CLASSPATH="$(build/sbt -DcopyDependencies=false "export tools/fullClasspath" | tail -n1)"
OLD_DEPS_CLASSPATH="$(build/sbt -DcopyDependencies=false $SPARK_PROFILES "export oldDeps/fullClasspath" | tail -n1)"

Expand Down
2 changes: 2 additions & 0 deletions dev/run-tests-jenkins.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@ def main():
os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.6"
if "test-hadoop2.7" in ghprb_pull_title:
os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.7"
if "test-hadoop3.1" in ghprb_pull_title:
os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop3.1"

build_display_name = os.environ["BUILD_DISPLAY_NAME"]
build_url = os.environ["BUILD_URL"]
Expand Down
3 changes: 2 additions & 1 deletion dev/run-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def determine_modules_to_test(changed_modules):
['graphx', 'examples']
>>> x = [x.name for x in determine_modules_to_test([modules.sql])]
>>> x # doctest: +NORMALIZE_WHITESPACE
['sql', 'avro', 'hive', 'mllib', 'sql-kafka-0-10', 'examples', 'hive-thriftserver',
['sql', 'avro', 'hive', 'mllib', 'sql-kafka-0-10', 'examples', 'hive-thriftserver2',
'pyspark-sql', 'repl', 'sparkr', 'pyspark-mllib', 'pyspark-ml']
"""
modules_to_test = set()
Expand Down Expand Up @@ -274,6 +274,7 @@ def get_hadoop_profiles(hadoop_version):

sbt_maven_hadoop_profiles = {
"hadoop2.7": ["-Phadoop-2.7"],
"hadoop3.1": ["-Phadoop-3.1"],
}

if hadoop_version in sbt_maven_hadoop_profiles:
Expand Down
2 changes: 1 addition & 1 deletion dev/sbt-checkstyle
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ ERRORS=$(echo -e "q\n" \
-Pkubernetes \
-Pyarn \
-Phive \
-Phive-thriftserver \
-Phive-thriftserver2 \
checkstyle test:checkstyle \
| awk '{if($1~/error/)print}' \
)
Expand Down
2 changes: 1 addition & 1 deletion dev/scalastyle
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ ERRORS=$(echo -e "q\n" \
-Pkubernetes \
-Pyarn \
-Phive \
-Phive-thriftserver \
-Phive-thriftserver2 \
-Pspark-ganglia-lgpl \
-Pdocker-integration-tests \
-Pkubernetes-integration-tests \
Expand Down
11 changes: 5 additions & 6 deletions dev/sparktestsupport/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,19 +154,18 @@ def __hash__(self):
],
)


hive_thriftserver = Module(
name="hive-thriftserver",
hive_thriftserver2 = Module(
name="hive-thriftserver2",
dependencies=[hive],
source_file_regexes=[
"sql/hive-thriftserver",
"sql/hive-thriftserver2",
"sbin/start-thriftserver.sh",
],
build_profile_flags=[
"-Phive-thriftserver",
"-Phive-thriftserver2",
],
sbt_test_goals=[
"hive-thriftserver/test",
"hive-thriftserver2/test",
]
)

Expand Down
15 changes: 11 additions & 4 deletions dev/test-dependencies.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ export LC_ALL=C
# TODO: This would be much nicer to do in SBT, once SBT supports Maven-style resolution.

# NOTE: These should match those in the release publishing script
HADOOP2_MODULE_PROFILES="-Phive-thriftserver -Pmesos -Pkubernetes -Pyarn -Phive"
HADOOP2_MODULE_PROFILES="-Pmesos -Pkubernetes -Pyarn -Phive"
MVN="build/mvn"
HADOOP_PROFILES=(
hadoop-2.7
Expand Down Expand Up @@ -67,15 +67,22 @@ $MVN -q versions:set -DnewVersion=$TEMP_VERSION -DgenerateBackupPoms=false > /de

# Generate manifests for each Hadoop profile:
for HADOOP_PROFILE in "${HADOOP_PROFILES[@]}"; do
HIVE_THRIFTSERVER_MODULE=none
if [ "${HADOOP_PROFILE}" = 'hadoop-2.7' ]; then
HIVE_THRIFTSERVER_MODULE="-Phive-thriftserver"
else
HIVE_THRIFTSERVER_MODULE="-Phive-thriftserver2"
fi

echo "Performing Maven install for $HADOOP_PROFILE"
$MVN $HADOOP2_MODULE_PROFILES -P$HADOOP_PROFILE jar:jar jar:test-jar install:install clean -q
$MVN $HADOOP2_MODULE_PROFILES $HIVE_THRIFTSERVER_MODULE -P$HADOOP_PROFILE jar:jar jar:test-jar install:install clean -q

echo "Performing Maven validate for $HADOOP_PROFILE"
$MVN $HADOOP2_MODULE_PROFILES -P$HADOOP_PROFILE validate -q
$MVN $HADOOP2_MODULE_PROFILES $HIVE_THRIFTSERVER_MODULE -P$HADOOP_PROFILE validate -q

echo "Generating dependency manifest for $HADOOP_PROFILE"
mkdir -p dev/pr-deps
$MVN $HADOOP2_MODULE_PROFILES -P$HADOOP_PROFILE dependency:build-classpath -pl assembly \
$MVN $HADOOP2_MODULE_PROFILES $HIVE_THRIFTSERVER_MODULE -P$HADOOP_PROFILE dependency:build-classpath -pl assembly \
| grep "Dependencies classpath:" -A 1 \
| tail -n 1 | tr ":" "\n" | rev | cut -d "/" -f 1 | rev | sort \
| grep -v spark > dev/pr-deps/spark-deps-$HADOOP_PROFILE
Expand Down
8 changes: 8 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1523,6 +1523,14 @@
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-avatica</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-druid</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.calcite.avatica</groupId>
<artifactId>avatica</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.curator</groupId>
<artifactId>apache-curator</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
import org.apache.spark.sql.vectorized.ColumnarMap;
import org.apache.spark.unsafe.types.UTF8String;


/**
* A column vector class wrapping Hive's ColumnVector. Because Spark ColumnarBatch only accepts
* Spark's vectorized.ColumnVector, this column vector is used to adapt Hive ColumnVector with
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,28 +37,34 @@ class OrcDeserializer(

private val resultRow = new SpecificInternalRow(requiredSchema.map(_.dataType))

// `fieldWriters(index)` is
// - null if the respective source column is missing, since the output value
// is always null in this case
// - a function that updates target column `index` otherwise.
private val fieldWriters: Array[WritableComparable[_] => Unit] = {
requiredSchema.zipWithIndex
// The value of missing columns are always null, do not need writers.
.filterNot { case (_, index) => requestedColIds(index) == -1 }
.map { case (f, index) =>
val writer = newWriter(f.dataType, new RowUpdater(resultRow))
(value: WritableComparable[_]) => writer(index, value)
if (requestedColIds(index) == -1) {
null
} else {
val writer = newWriter(f.dataType, new RowUpdater(resultRow))
(value: WritableComparable[_]) => writer(index, value)
}
}.toArray
}

private val validColIds = requestedColIds.filterNot(_ == -1)

def deserialize(orcStruct: OrcStruct): InternalRow = {
var i = 0
while (i < validColIds.length) {
val value = orcStruct.getFieldValue(validColIds(i))
if (value == null) {
resultRow.setNullAt(i)
} else {
fieldWriters(i)(value)
var targetColumnIndex = 0
while (targetColumnIndex < fieldWriters.length) {
if (fieldWriters(targetColumnIndex) != null) {
val value = orcStruct.getFieldValue(requestedColIds(targetColumnIndex))
if (value == null) {
resultRow.setNullAt(targetColumnIndex)
} else {
fieldWriters(targetColumnIndex)(value)
}
}
i += 1
targetColumnIndex += 1
}
resultRow
}
Expand Down
2 changes: 1 addition & 1 deletion sql/hive-thriftserver2/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
<dependency>
<groupId>${hive.group}</groupId>
<artifactId>hive-exec</artifactId>
<classifier>core</classifier>
<classifier>${hive.classifier}</classifier>
</dependency>
<dependency>
<groupId>${hive.group}</groupId>
Expand Down

0 comments on commit 71421d6

Please sign in to comment.