Skip to content

Commit

Permalink
Merge pull request apache#107 from mesosphere/SPARK-389-hdfs-tests
Browse files Browse the repository at this point in the history
[WIP] [SPARK-389] teragen hdfs integration test
  • Loading branch information
mgummelt authored Jan 12, 2017
2 parents 60afd8f + 25d822a commit 1809a87
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 26 deletions.
28 changes: 2 additions & 26 deletions bin/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -60,35 +60,11 @@ configure_cli() {
fi
}

install_spark() {
notify_github pending "Installing Spark"

setup_permissions() {
if [ "$SECURITY" = "strict" ]; then
# custom configuration to enable auth stuff:
${COMMONS_TOOLS_DIR}/setup_permissions.sh nobody "*" # spark's default service.role
echo '{ "service": { "user": "nobody", "principal": "service-acct", "secret_name": "secret" } }' > /tmp/spark.json
dcos --log-level=INFO package install spark --options=/tmp/spark.json --yes
else
dcos --log-level=INFO package install spark --yes
fi

if [ $? -ne 0 ]; then
notify_github failure "Spark install failed"
exit 1
fi

SECONDS=0
while [[ $(dcos marathon app list --json | jq '.[] | select(.id=="/spark") | .tasksHealthy') -ne "1" ]]
do
sleep 5
if [ $SECONDS -gt 600 ]; then # 10 mins
notify_github failure "Spark install timed out"
exit 1
fi
done

# sleep 30s due to mesos-dns propagation delays to /service/sparkcli/
sleep 30
}

run_tests() {
Expand All @@ -113,7 +89,7 @@ fetch_commons_tools
start_cluster
# TODO: Migrate the following three commands to dcos-commons-tools/run-tests.py
configure_cli
install_spark
setup_permissions
run_tests

notify_github success "Tests Passed"
53 changes: 53 additions & 0 deletions tests/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,64 @@

from boto.s3.connection import S3Connection
from boto.s3.key import Key
import dcos.config
import dcos.http
import dcos.package
import os
import pytest
import re
import shakedown
import subprocess
import urllib


def setup_module(module):
_require_package('hdfs')
_install_spark()


def _install_spark():
options = {"hdfs":
{"config-url":
"http://hdfs.marathon.mesos:9000/v1/connection"}}

if os.environ.get('SECURITY') == 'strict':
options['service'] = {"user": "nobody",
"principal": "service-acct",
"secret_name": "secret" }

shakedown.install_package('spark', options_json=options, wait_for_completion=True)

def pred():
dcos_url = dcos.config.get_config_val("core.dcos_url")
spark_url = urllib.parse.urljoin(dcos_url, "/service/spark")
status_code = dcos.http.get(spark_url).status_code
return status_code == 200

shakedown.spinner.wait_for(pred)


def _require_package(pkg_name):
pkg_manager = dcos.package.get_package_manager()
installed_pkgs = dcos.package.installed_packages(pkg_manager, None, None, False)
if not any(pkg['name'] == pkg_name for pkg in installed_pkgs):
shakedown.install_package(pkg_name, wait_for_completion=True)
shakedown.wait_for(_is_hdfs_ready, ignore_exceptions=False, timeout_seconds=600)


DEFAULT_HDFS_TASK_COUNT=8
def _is_hdfs_ready(expected_tasks = DEFAULT_HDFS_TASK_COUNT):
running_tasks = [t for t in shakedown.get_service_tasks('hdfs') \
if t['state'] == 'TASK_RUNNING']
return len(running_tasks) >= expected_tasks


def test_teragen():
jar_url = "https://downloads.mesosphere.io/spark/examples/spark-terasort-1.0-jar-with-dependencies_2.11.jar"
_run_tests(jar_url,
"1g hdfs:///terasort_in",
"Number of records written",
{"--class": "com.github.ehiggs.spark.terasort.TeraGen"})


def test_jar():
Expand Down

0 comments on commit 1809a87

Please sign in to comment.