Merge pull request apache#4 from pbailis/improving-harness

Adding point cloud generator, additional command line params
jegonzal · Jul 22, 2014 · 7fb92c8 · 7fb92c8
2 parents db14f64 + f97c2b6
commit 7fb92c8
Show file tree

Hide file tree

Showing 4 changed files with 277 additions and 67 deletions.
diff --git a/commands.txt b/commands.txt
@@ -0,0 +1,47 @@
+
+# if interrupted, run the following again, but add flag --resume
+ec2/spark-ec2 --slaves=5 --spot-price=1.0 --instance-type=m2.4xlarge --region=us-west-2 --key-pair kaiju --identity-file=/Users/pbailis/.ssh/kaiju_rsa.pub launch testing
+
+export MASTER=ec2-50-112-10-225.us-west-2.compute.amazonaws.com
+
+ssh root@$MASTER
+
+# on MASTER
+
+# check out, build and ship jar
+mv spark spark-old
+git clone https://github.com/pbailis/spark.git
+cd spark
+git checkout --track origin/improving-harness
+sbt/sbt assembly
+cp -r ~/spark-old/conf/* conf/
+yum install pssh; pssh -h ~/spark-ec2/slaves rm -rf ~/spark
+cd ~; spark-ec2/copy-dir spark
+
+# to load the data into HDFS
+# first, load volume in /dev/sdp using web console
+mkdir /mnt/testdata; mount /dev/sdp /mnt/testdata; cd /mnt/testdata
+~/ephemeral-hdfs/bin/hadoop fs -put ./flights flights
+~/ephemeral-hdfs/bin/hadoop fs -put ./weather weather
+~/ephemeral-hdfs/bin/hadoop fs -put ./bismarck_data bismarck_data
+
+# to start the master
+cd ~/spark; sbin/stop-all.sh; sleep 5; sbin/start-all.sh 
+
+
+# to run flights
+cd ~/spark; ./bin/spark-submit --class org.apache.spark.examples.mllib.research.SynchronousADMMTests examples/target/scala-*/spark-examples-*.jar --algorithm SVMADMM --regType L2 --regParam 1.0 --input hdfs://$MASTER:9000/user/root/flights/2008* --format flights
+
+cd ~/spark; ./bin/spark-submit --class org.apache.spark.examples.mllib.research.SynchronousADMMTests examples/target/scala-*/spark-examples-*.jar --algorithm SVMADMM --regType L2 --regParam 1.0 --format bismarck --input hdfs://$MASTER:9000/user/root/bismarck_data/forest* --numPartitions 40 --sweepIterationStart 1 --sweepIterationEnd 12 --sweepIterationStep 2 | grep RESULT | cut -c 9-
+
+cd ~/spark; ./bin/spark-submit --class org.apache.spark.examples.mllib.research.SynchronousADMMTests examples/target/scala-*/spark-examples-*.jar --algorithm SVM --regType L2 --regParam 1.0 --format cloud  --numPartitions 40 --pointCloudPointsPerPartition 10000 --pointCloudPartitionSkew 0 --pointCloudLabelNoise 0.1 --pointCloudDimension 100 --sweepIterationStart 1 --sweepIterationEnd 12 --sweepIterationStep 2 | grep RESULT | cut -c 9-
+
+cd ~/spark; ./bin/spark-submit --class org.apache.spark.examples.mllib.research.SynchronousADMMTests examples/target/scala-*/spark-examples-*.jar --algorithm LR --regType L2 --regParam 1.0 --format cloud  --numPartitions 40 --pointCloudPointsPerPartition 10000 --pointCloudPartitionSkew 0 --pointCloudLabelNoise 0.1 --pointCloudDimension 100 --sweepIterationStart 1 --sweepIterationEnd 12 --sweepIterationStep 2 | grep RESULT | cut -c 9-
+
+
+
+# to rebuild THE EXPERIMENTS
+cd ~/spark; git pull; sbt/sbt "project examples" "assembly"; cd ~; spark-ec2/copy-dir spark; cd spark; sbin/stop-all.sh; sleep 5; sbin/start-all.sh
+
+# to rebuild ALL
+cd ~/spark; git pull; sbt/sbt assembly; cd ~; spark-ec2/copy-dir spark; cd spark; sbin/stop-all.sh; sleep 5; sbin/start-all.sh