Skip to content

Commit

Permalink
spark 3.0.0 - hadoop 2.6.0 build apache#1
Browse files Browse the repository at this point in the history
  • Loading branch information
cjun5 committed Jun 30, 2022
1 parent 3fdfce3 commit 871701f
Show file tree
Hide file tree
Showing 13 changed files with 1,067 additions and 819 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -80,69 +80,69 @@ class HadoopDelegationTokenManagerSuite extends SparkFunSuite {
assert(manager.isProviderLoaded("hbase"))
}

test("SPARK-29082: do not fail if current user does not have credentials") {
// SparkHadoopUtil overrides the UGI configuration during initialization. That normally
// happens early in the Spark application, but here it may affect the test depending on
// how it's run, so force its initialization.
SparkHadoopUtil.get

var kdc: MiniKdc = null
try {
// UserGroupInformation.setConfiguration needs default kerberos realm which can be set in
// krb5.conf. MiniKdc sets "java.security.krb5.conf" in start and removes it when stop called.
val kdcDir = Utils.createTempDir()
val kdcConf = MiniKdc.createConf()
// The port for MiniKdc service gets selected in the constructor, but will be bound
// to it later in MiniKdc.start() -> MiniKdc.initKDCServer() -> KdcServer.start().
// In meantime, when some other service might capture the port during this progress, and
// cause BindException.
// This makes our tests which have dedicated JVMs and rely on MiniKDC being flaky
//
// https://issues.apache.org/jira/browse/HADOOP-12656 get fixed in Hadoop 2.8.0.
//
// The workaround here is to periodically repeat this process with a timeout , since we are
// using Hadoop 2.7.4 as default.
// https://issues.apache.org/jira/browse/SPARK-31631
eventually(timeout(60.seconds), interval(1.second)) {
try {
kdc = new MiniKdc(kdcConf, kdcDir)
kdc.start()
} catch {
case NonFatal(e) =>
if (kdc != null) {
kdc.stop()
kdc = null
}
throw e
}
}

val krbConf = new Configuration()
krbConf.set(HADOOP_SECURITY_AUTHENTICATION, "kerberos")

UserGroupInformation.setConfiguration(krbConf)
val manager = new HadoopDelegationTokenManager(new SparkConf(false), krbConf, null)
val testImpl = new PrivilegedExceptionAction[Unit] {
override def run(): Unit = {
assert(UserGroupInformation.isSecurityEnabled())
val creds = new Credentials()
manager.obtainDelegationTokens(creds)
assert(creds.numberOfTokens() === 0)
assert(creds.numberOfSecretKeys() === 0)
}
}

val realUser = UserGroupInformation.createUserForTesting("realUser", Array.empty)
realUser.doAs(testImpl)

val proxyUser = UserGroupInformation.createProxyUserForTesting("proxyUser", realUser,
Array.empty)
proxyUser.doAs(testImpl)
} finally {
if (kdc != null) {
kdc.stop()
}
UserGroupInformation.reset()
}
}
// test("SPARK-29082: do not fail if current user does not have credentials") {
// // SparkHadoopUtil overrides the UGI configuration during initialization. That normally
// // happens early in the Spark application, but here it may affect the test depending on
// // how it's run, so force its initialization.
// SparkHadoopUtil.get
//
// var kdc: MiniKdc = null
// try {
// // UserGroupInformation.setConfiguration needs default kerberos realm which can be set in
// // krb5.conf. MiniKdc sets "java.security.krb5.conf" in start and removes it when stop called.
// val kdcDir = Utils.createTempDir()
// val kdcConf = MiniKdc.createConf()
// // The port for MiniKdc service gets selected in the constructor, but will be bound
// // to it later in MiniKdc.start() -> MiniKdc.initKDCServer() -> KdcServer.start().
// // In meantime, when some other service might capture the port during this progress, and
// // cause BindException.
// // This makes our tests which have dedicated JVMs and rely on MiniKDC being flaky
// //
// // https://issues.apache.org/jira/browse/HADOOP-12656 get fixed in Hadoop 2.8.0.
// //
// // The workaround here is to periodically repeat this process with a timeout , since we are
// // using Hadoop 2.7.4 as default.
// // https://issues.apache.org/jira/browse/SPARK-31631
// eventually(timeout(60.seconds), interval(1.second)) {
// try {
// kdc = new MiniKdc(kdcConf, kdcDir)
// kdc.start()
// } catch {
// case NonFatal(e) =>
// if (kdc != null) {
// kdc.stop()
// kdc = null
// }
// throw e
// }
// }
//
// val krbConf = new Configuration()
// krbConf.set(HADOOP_SECURITY_AUTHENTICATION, "kerberos")
//
// UserGroupInformation.setConfiguration(krbConf)
// val manager = new HadoopDelegationTokenManager(new SparkConf(false), krbConf, null)
// val testImpl = new PrivilegedExceptionAction[Unit] {
// override def run(): Unit = {
// assert(UserGroupInformation.isSecurityEnabled())
// val creds = new Credentials()
// manager.obtainDelegationTokens(creds)
// assert(creds.numberOfTokens() === 0)
// assert(creds.numberOfSecretKeys() === 0)
// }
// }
//
// val realUser = UserGroupInformation.createUserForTesting("realUser", Array.empty)
// realUser.doAs(testImpl)
//
// val proxyUser = UserGroupInformation.createProxyUserForTesting("proxyUser", realUser,
// Array.empty)
// proxyUser.doAs(testImpl)
// } finally {
// if (kdc != null) {
// kdc.stop()
// }
// UserGroupInformation.reset()
// }
// }
}
2 changes: 1 addition & 1 deletion dev/appveyor-install-dependencies.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ Pop-Location

# ========================== Hadoop bin package
# This must match the version at https://github.com/steveloughran/winutils/tree/master/hadoop-2.7.1
$hadoopVer = "2.7.1"
$hadoopVer = "2.6.4"
$hadoopPath = "$tools\hadoop"
if (!(Test-Path $hadoopPath)) {
New-Item -ItemType Directory -Force -Path $hadoopPath | Out-Null
Expand Down
23 changes: 20 additions & 3 deletions dev/create-release/release-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,16 @@ if [[ "$1" == "package" ]]; then
-DzincPort=$ZINC_PORT 2>&1 > ../binary-release-$NAME.log
cd ..

if [[ -n $R_FLAG ]]; then
if [ -z "$BUILD_PACKAGE" ]; then
echo "Creating distribution without PIP/R package"
./dev/make-distribution.sh --name $NAME --mvn $MVN_HOME/bin/mvn --tgz $FLAGS \
-DzincPort=$ZINC_PORT 2>&1 > ../binary-release-$NAME.log
cd ..
elif [[ "$BUILD_PACKAGE" == "withr" ]]; then
echo "Creating distribution with R package"
./dev/make-distribution.sh --name $NAME --mvn $MVN_HOME/bin/mvn --tgz --r $FLAGS \
-DzincPort=$ZINC_PORT 2>&1 > ../binary-release-$NAME.log
cd ..
echo "Copying and signing R source package"
R_DIST_NAME=SparkR_$SPARK_VERSION.tar.gz
cp spark-$SPARK_VERSION-bin-$NAME/R/$R_DIST_NAME .
Expand All @@ -246,7 +255,11 @@ if [[ "$1" == "package" ]]; then
echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \
SHA512 $R_DIST_NAME > \
$R_DIST_NAME.sha512
fi
else
echo "Creating distribution with PIP package"
./dev/make-distribution.sh --name $NAME --mvn $MVN_HOME/bin/mvn --tgz --pip $FLAGS \
-DzincPort=$ZINC_PORT 2>&1 > ../binary-release-$NAME.log
cd ..

if [[ -n $PIP_FLAG ]]; then
echo "Copying and signing python distribution"
Expand Down Expand Up @@ -281,6 +294,7 @@ if [[ "$1" == "package" ]]; then
declare -A BINARY_PKGS_ARGS
BINARY_PKGS_ARGS["hadoop2.7"]="-Phadoop-2.7 $HIVE_PROFILES"
if ! is_dry_run; then
BINARY_PKGS_ARGS["hadoop2.6"]="-Phadoop-2.6 $HIVE_PROFILES"
BINARY_PKGS_ARGS["without-hadoop"]="-Phadoop-provided"
if [[ $SPARK_VERSION < "3.0." ]]; then
BINARY_PKGS_ARGS["hadoop2.6"]="-Phadoop-2.6 $HIVE_PROFILES"
Expand All @@ -291,7 +305,10 @@ if [[ "$1" == "package" ]]; then
fi

declare -A BINARY_PKGS_EXTRA
BINARY_PKGS_EXTRA["hadoop2.7"]="withpip,withr"
BINARY_PKGS_EXTRA["hadoop2.7"]="withpip"
if ! is_dry_run; then
BINARY_PKGS_EXTRA["hadoop2.6"]="withr"
fi

if [[ $PUBLISH_SCALA_2_11 = 1 ]]; then
key="without-hadoop-scala-2.11"
Expand Down
198 changes: 198 additions & 0 deletions dev/deps/spark-deps-hadoop-2.6-hive-1.2
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
JavaEWAH-0.3.2.jar
RoaringBitmap-0.5.11.jar
ST4-4.0.4.jar
activation-1.1.1.jar
aircompressor-0.10.jar
antlr-2.7.7.jar
antlr-runtime-3.4.jar
antlr4-runtime-4.7.jar
aopalliance-1.0.jar
aopalliance-repackaged-2.4.0-b34.jar
apache-log4j-extras-1.2.17.jar
apacheds-i18n-2.0.0-M15.jar
apacheds-kerberos-codec-2.0.0-M15.jar
api-asn1-api-1.0.0-M20.jar
api-util-1.0.0-M20.jar
arpack_combined_all-0.1.jar
arrow-format-0.10.0.jar
arrow-memory-0.10.0.jar
arrow-vector-0.10.0.jar
automaton-1.11-8.jar
avro-1.8.2.jar
avro-ipc-1.8.2.jar
avro-mapred-1.8.2-hadoop2.jar
bonecp-0.8.0.RELEASE.jar
breeze-macros_2.11-0.13.2.jar
breeze_2.11-0.13.2.jar
calcite-avatica-1.2.0-incubating.jar
calcite-core-1.2.0-incubating.jar
calcite-linq4j-1.2.0-incubating.jar
chill-java-0.9.3.jar
chill_2.11-0.9.3.jar
commons-beanutils-1.7.0.jar
commons-beanutils-core-1.8.0.jar
commons-cli-1.2.jar
commons-codec-1.10.jar
commons-collections-3.2.2.jar
commons-compiler-3.0.10.jar
commons-compress-1.8.1.jar
commons-configuration-1.6.jar
commons-crypto-1.0.0.jar
commons-dbcp-1.4.jar
commons-digester-1.8.jar
commons-httpclient-3.1.jar
commons-io-2.4.jar
commons-lang-2.6.jar
commons-lang3-3.5.jar
commons-logging-1.1.3.jar
commons-math3-3.4.1.jar
commons-net-3.1.jar
commons-pool-1.5.4.jar
compress-lzf-1.0.3.jar
core-1.1.2.jar
curator-client-2.6.0.jar
curator-framework-2.6.0.jar
curator-recipes-2.6.0.jar
datanucleus-api-jdo-3.2.6.jar
datanucleus-core-3.2.10.jar
datanucleus-rdbms-3.2.9.jar
derby-10.12.1.1.jar
eigenbase-properties-1.1.5.jar
flatbuffers-1.2.0-3f79e055.jar
generex-1.0.1.jar
gson-2.2.4.jar
guava-14.0.1.jar
guice-3.0.jar
guice-servlet-3.0.jar
hadoop-annotations-2.6.5.jar
hadoop-auth-2.6.5.jar
hadoop-client-2.6.5.jar
hadoop-common-2.6.5.jar
hadoop-hdfs-2.6.5.jar
hadoop-mapreduce-client-app-2.6.5.jar
hadoop-mapreduce-client-common-2.6.5.jar
hadoop-mapreduce-client-core-2.6.5.jar
hadoop-mapreduce-client-jobclient-2.6.5.jar
hadoop-mapreduce-client-shuffle-2.6.5.jar
hadoop-yarn-api-2.6.5.jar
hadoop-yarn-client-2.6.5.jar
hadoop-yarn-common-2.6.5.jar
hadoop-yarn-server-common-2.6.5.jar
hadoop-yarn-server-web-proxy-2.6.5.jar
hk2-api-2.4.0-b34.jar
hk2-locator-2.4.0-b34.jar
hk2-utils-2.4.0-b34.jar
hppc-0.7.2.jar
htrace-core-3.0.4.jar
httpclient-4.5.6.jar
httpcore-4.4.10.jar
ivy-2.4.0.jar
jackson-annotations-2.9.6.jar
jackson-core-2.9.6.jar
jackson-core-asl-1.9.13.jar
jackson-databind-2.9.6.jar
jackson-dataformat-yaml-2.9.6.jar
jackson-jaxrs-1.9.13.jar
jackson-mapper-asl-1.9.13.jar
jackson-module-jaxb-annotations-2.9.6.jar
jackson-module-paranamer-2.9.6.jar
jackson-module-scala_2.11-2.9.6.jar
jackson-xc-1.9.13.jar
janino-3.0.10.jar
javassist-3.18.1-GA.jar
javax.annotation-api-1.2.jar
javax.inject-1.jar
javax.inject-2.4.0-b34.jar
javax.servlet-api-3.1.0.jar
javax.ws.rs-api-2.0.1.jar
javolution-5.5.1.jar
jaxb-api-2.2.2.jar
jcl-over-slf4j-1.7.16.jar
jdo-api-3.0.1.jar
jersey-client-2.22.2.jar
jersey-common-2.22.2.jar
jersey-container-servlet-2.22.2.jar
jersey-container-servlet-core-2.22.2.jar
jersey-guava-2.22.2.jar
jersey-media-jaxb-2.22.2.jar
jersey-server-2.22.2.jar
jetty-6.1.26.jar
jetty-util-6.1.26.jar
jline-2.14.6.jar
joda-time-2.9.3.jar
jodd-core-3.5.2.jar
jpam-1.1.jar
json4s-ast_2.11-3.5.3.jar
json4s-core_2.11-3.5.3.jar
json4s-jackson_2.11-3.5.3.jar
json4s-scalap_2.11-3.5.3.jar
jsr305-1.3.9.jar
jta-1.1.jar
jtransforms-2.4.0.jar
jul-to-slf4j-1.7.16.jar
kryo-shaded-4.0.2.jar
kubernetes-client-3.0.0.jar
kubernetes-model-2.0.0.jar
leveldbjni-all-1.8.jar
libfb303-0.9.3.jar
libthrift-0.9.3.jar
log4j-1.2.17.jar
logging-interceptor-3.8.1.jar
lz4-java-1.5.0.jar
machinist_2.11-0.6.1.jar
macro-compat_2.11-1.1.1.jar
mesos-1.4.0-shaded-protobuf.jar
metrics-core-3.1.5.jar
metrics-graphite-3.1.5.jar
metrics-json-3.1.5.jar
metrics-jvm-3.1.5.jar
minlog-1.3.0.jar
netty-3.9.9.Final.jar
netty-all-4.1.17.Final.jar
objenesis-2.5.1.jar
okhttp-3.8.1.jar
okio-1.13.0.jar
opencsv-2.3.jar
orc-core-1.5.3-nohive.jar
orc-mapreduce-1.5.3-nohive.jar
orc-shims-1.5.3.jar
oro-2.0.8.jar
osgi-resource-locator-1.0.1.jar
paranamer-2.8.jar
parquet-column-1.10.0.jar
parquet-common-1.10.0.jar
parquet-encoding-1.10.0.jar
parquet-format-2.4.0.jar
parquet-hadoop-1.10.0.jar
parquet-hadoop-bundle-1.6.0.jar
parquet-jackson-1.10.0.jar
protobuf-java-2.5.0.jar
py4j-0.10.7.jar
pyrolite-4.13.jar
scala-compiler-2.11.12.jar
scala-library-2.11.12.jar
scala-parser-combinators_2.11-1.1.0.jar
scala-reflect-2.11.12.jar
scala-xml_2.11-1.0.5.jar
shapeless_2.11-2.3.2.jar
slf4j-api-1.7.16.jar
slf4j-log4j12-1.7.16.jar
snakeyaml-1.18.jar
snappy-0.2.jar
snappy-java-1.1.7.1.jar
spire-macros_2.11-0.13.0.jar
spire_2.11-0.13.0.jar
stax-api-1.0-2.jar
stax-api-1.0.1.jar
stream-2.7.0.jar
stringtemplate-3.2.1.jar
super-csv-2.2.0.jar
univocity-parsers-2.7.3.jar
validation-api-1.1.0.Final.jar
xbean-asm6-shaded-4.8.jar
xercesImpl-2.9.1.jar
xmlenc-0.52.jar
xz-1.5.jar
zjsonpatch-0.3.0.jar
zookeeper-3.4.6.jar
zstd-jni-1.3.2-2.jar
Loading

0 comments on commit 871701f

Please sign in to comment.