diff --git a/.github/scripts/install-hdfs.sh b/.github/scripts/install-hdfs.sh index 77d8803e..6124d0d9 100755 --- a/.github/scripts/install-hdfs.sh +++ b/.github/scripts/install-hdfs.sh @@ -2,7 +2,7 @@ set -e -KERBEROS=${KERBEROS-"false"} +KERBEROS="${KERBEROS-false}" AES=${AES-"false"} if [ "$DATA_TRANSFER_PROTECTION" = "privacy" ]; then KERBEROS="true" @@ -15,11 +15,17 @@ else ENCRYPT_DATA_TRANSFER="false" fi +CONF_KMS_PROVIDER="" +TRANSPARENT_ENCRYPTION="${TRANSPARENT_ENCRYPTION-false}" +if [ "$TRANSPARENT_ENCRYPTION" = "true" ]; then + CONF_KMS_PROVIDER="kms://http@localhost:9600/kms" +fi + CONF_AUTHENTICATION="simple" KERBEROS_REALM="EXAMPLE.COM" KERBEROS_PRINCIPLE="administrator" KERBEROS_PASSWORD="password1234" -if [ $KERBEROS = "true" ]; then +if [ "$KERBEROS" = "true" ]; then CONF_AUTHENTICATION="kerberos" HOSTNAME=$(hostname) @@ -50,7 +56,7 @@ EOF sudo apt-get install -y krb5-user krb5-kdc krb5-admin-server printf "$KERBEROS_PASSWORD\n$KERBEROS_PASSWORD" | sudo kdb5_util -r "$KERBEROS_REALM" create -s - for p in nn dn $USER gohdfs1 gohdfs2; do + for p in nn dn kms $USER gohdfs1 gohdfs2; do sudo kadmin.local -q "addprinc -randkey $p/$HOSTNAME@$KERBEROS_REALM" sudo kadmin.local -q "addprinc -randkey $p/localhost@$KERBEROS_REALM" sudo kadmin.local -q "xst -k /tmp/$p.keytab $p/$HOSTNAME@$KERBEROS_REALM" @@ -116,6 +122,10 @@ sudo tee $HADOOP_ROOT/etc/hadoop/core-site.xml <hadoop.rpc.protection $RPC_PROTECTION + + hadoop.security.key.provider.path + $CONF_KMS_PROVIDER + EOF @@ -172,6 +182,41 @@ $HADOOP_ROOT/bin/hdfs namenode -format sudo groupadd hadoop sudo usermod -a -G hadoop $USER +sudo tee $HADOOP_ROOT/etc/hadoop/kms-site.xml < + + hadoop.kms.key.provider.uri + jceks://file@/tmp/hdfs/kms.keystore + + + hadoop.security.keystore.java-keystore-provider.password-file + kms.keystore.password + + + hadoop.kms.authentication.type + $CONF_AUTHENTICATION + + + hadoop.kms.authentication.kerberos.keytab + /tmp/kms.keytab + + + hadoop.kms.authentication.kerberos.principal + kms/localhost@$KERBEROS_REALM + + +EOF + +sudo tee $HADOOP_ROOT/etc/hadoop/kms.keystore.password < /tmp/hdfs/kms.log 2>&1 & +fi + echo "Starting namenode..." $HADOOP_ROOT/bin/hdfs namenode > /tmp/hdfs/namenode.log 2>&1 & @@ -183,5 +228,12 @@ sleep 5 echo "Waiting for cluster to exit safe mode..." $HADOOP_ROOT/bin/hdfs dfsadmin -safemode wait +$HADOOP_ROOT/bin/hadoop fs -mkdir -p /_test/kms +if [ "$TRANSPARENT_ENCRYPTION" = "true" ]; then + echo "Prepare encrypted zone" + $HADOOP_ROOT/bin/hadoop key create key1 + $HADOOP_ROOT/bin/hdfs crypto -createZone -keyName key1 -path /_test/kms +fi + echo "HADOOP_CONF_DIR=$(pwd)/$HADOOP_ROOT/etc/hadoop" >> $GITHUB_ENV -echo "$(pwd)/$HADOOP_ROOT/bin" >> $GITHUB_PATH \ No newline at end of file +echo "$(pwd)/$HADOOP_ROOT/bin" >> $GITHUB_PATH diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7ff0ed63..c779afef 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -14,6 +14,8 @@ jobs: include: - hadoop_version: 2.10.1 - hadoop_version: 3.3.1 + - hadoop_version: 3.3.1 + transparent_encryption: true - hadoop_version: 3.3.1 kerberos: true rpc_protection: authentication @@ -48,6 +50,7 @@ jobs: RPC_PROTECTION: ${{ matrix.rpc_protection }} TRANSFER_PROTECTION: ${{ matrix.transfer_protection }} AES: ${{ matrix.aes }} + TRANSPARENT_ENCRYPTION: ${{ matrix.transparent_encryption }} # Similarly, this step adds the bats binary to GITHUB_PATH. - name: install-bats.sh @@ -62,9 +65,21 @@ jobs: run: find -name '*.pb.go' -exec touch {} \; && make - name: make test + env: + HADOOP_VERSION: ${{ matrix.hadoop_version }} run: | make test + - name: cat kms.log + if: always() + run: | + if [ -f /tmp/hdfs/kms.log ] + then + cat /tmp/hdfs/kms.log + else + echo "not exists" + fi + - name: cat namenode.log if: always() run: cat /tmp/hdfs/namenode.log diff --git a/cmd/hdfs/test/kms.bats b/cmd/hdfs/test/kms.bats new file mode 100644 index 00000000..115e30bb --- /dev/null +++ b/cmd/hdfs/test/kms.bats @@ -0,0 +1,33 @@ +#!/usr/bin/env bats + +load helper + +@test "kms: put java to go" { + run $HADOOP_FS -put $ROOT_TEST_DIR/testdata/foo.txt /_test/kms/foo1 + assert_success + + run $HDFS cat /_test/kms/foo1 + assert_output "bar" +} + +@test "kms: put go to java" { + if [ "$HADOOP_VERSION" != "2.10.1" ]; then + run $HDFS put $ROOT_TEST_DIR/testdata/foo.txt /_test/kms/foo2 + assert_success + run $HADOOP_FS -cat /_test/kms/foo2 + assert_output "bar" + else + skip "workaroud hadoop error: illegal reflective access operation has occurred" + fi +} + +@test "kms: tail" { + run $HDFS put $ROOT_TEST_DIR/testdata/mobydick.txt /_test/kms/ + assert_success + + run bash -c "$HDFS tail /_test/kms/mobydick.txt > $BATS_TMPDIR/mobydick_test.txt" + assert_success + + SHA=`tail $ROOT_TEST_DIR/testdata/mobydick.txt | shasum | awk '{ print $1 }'` + assert_equal $SHA `shasum < $BATS_TMPDIR/mobydick_test.txt | awk '{ print $1 }'` +}