diff --git a/dataproc/pom.xml b/dataproc/pom.xml
index d6f21df03ba..78a70a3ee8a 100644
--- a/dataproc/pom.xml
+++ b/dataproc/pom.xml
@@ -34,6 +34,16 @@
+
+
+ org.codehaus.mojo
+ exec-maven-plugin
+ 1.6.0
+
+ Quickstart
+
+
+
@@ -65,5 +75,4 @@
test
-
\ No newline at end of file
diff --git a/dataproc/src/main/java/Quickstart.java b/dataproc/src/main/java/Quickstart.java
index 0a0d177b1d0..d5ac76259f3 100644
--- a/dataproc/src/main/java/Quickstart.java
+++ b/dataproc/src/main/java/Quickstart.java
@@ -15,6 +15,19 @@
*/
// [START dataproc_quickstart]
+/* This quickstart sample walks a user through creating a Cloud Dataproc
+ * cluster, submitting a PySpark job from Google Cloud Storage to the
+ * cluster, reading the output of the job and deleting the cluster, all
+ * using the Java client library.
+ *
+ * Usage:
+ * mvn clean package -DskipTests
+ *
+ * mvn exec:java -Dexec.args=" "
+ *
+ * You can also set these arguments in the main function instead of providing them via the CLI.
+ */
+
import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.dataproc.v1.Cluster;
import com.google.cloud.dataproc.v1.ClusterConfig;
@@ -60,15 +73,6 @@ public static Job waitForJobCompletion(
}
}
- public static void quickstart() throws IOException, InterruptedException {
- // TODO(developer): Replace these variables before running the sample.
- String projectId = "your-project-id";
- String region = "your-project-region";
- String clusterName = "your-cluster-name";
- String jobFilePath = "your-job-file-path";
- quickstart(projectId, region, clusterName, jobFilePath);
- }
-
public static void quickstart(
String projectId, String region, String clusterName, String jobFilePath)
throws IOException, InterruptedException {
@@ -82,10 +86,10 @@ public static void quickstart(
JobControllerSettings jobControllerSettings =
JobControllerSettings.newBuilder().setEndpoint(myEndpoint).build();
- // Create both a cluster controller client and job controller client with the configured
- // settings. The client only needs to be created once and can be reused for multiple requests.
- // Using a try-with-resources closes the client, but this can also be done manually with
- // the .close() method.
+ // Create both a cluster controller client and job controller client with the
+ // configured settings. The client only needs to be created once and can be reused for
+ // multiple requests. Using a try-with-resources closes the client, but this can also be done
+ // manually with the .close() method.
try (ClusterControllerClient clusterControllerClient =
ClusterControllerClient.create(clusterControllerSettings);
JobControllerClient jobControllerClient =
@@ -114,7 +118,8 @@ public static void quickstart(
OperationFuture createClusterAsyncRequest =
clusterControllerClient.createClusterAsync(projectId, region, cluster);
Cluster response = createClusterAsyncRequest.get();
- System.out.printf("Cluster created successfully: %s", response.getClusterName());
+ System.out.println(
+ String.format("Cluster created successfully: %s", response.getClusterName()));
// Configure the settings for our job.
JobPlacement jobPlacement = JobPlacement.newBuilder().setClusterName(clusterName).build();
@@ -133,7 +138,7 @@ public static void quickstart(
int timeout = 10;
try {
Job jobInfo = finishedJobFuture.get(timeout, TimeUnit.MINUTES);
- System.out.printf("Job %s finished successfully.", jobId);
+ System.out.println(String.format("Job %s finished successfully.", jobId));
// Cloud Dataproc job output gets saved to a GCS bucket allocated to it.
Cluster clusterInfo = clusterControllerClient.getCluster(projectId, region, clusterName);
@@ -163,5 +168,21 @@ public static void quickstart(
System.err.println(String.format("Error executing quickstart: %s ", e.getMessage()));
}
}
+
+ public static void main(String... args) throws IOException, InterruptedException {
+ if (args.length != 4) {
+ System.err.println(
+ "Insufficient number of parameters provided. Please make sure a "
+ + "PROJECT_ID, REGION, CLUSTER_NAME and JOB_FILE_PATH are provided, in this order.");
+ return;
+ }
+
+ String projectId = args[0]; // project-id of project to create the cluster in
+ String region = args[1]; // region to create the cluster
+ String clusterName = args[2]; // name of the cluster
+ String jobFilePath = args[3]; // location in GCS of the PySpark job
+
+ quickstart(projectId, region, clusterName, jobFilePath);
+ }
}
// [END dataproc_quickstart]
diff --git a/dataproc/src/test/java/QuickstartTest.java b/dataproc/src/test/java/QuickstartTest.java
index 3296e224828..bf31e0325d6 100644
--- a/dataproc/src/test/java/QuickstartTest.java
+++ b/dataproc/src/test/java/QuickstartTest.java
@@ -88,7 +88,7 @@ public void setUp() {
@Test
public void quickstartTest() throws IOException, InterruptedException {
- Quickstart.quickstart(PROJECT_ID, REGION, CLUSTER_NAME, JOB_FILE_PATH);
+ Quickstart.main(PROJECT_ID, REGION, CLUSTER_NAME, JOB_FILE_PATH);
String output = bout.toString();
assertThat(output, CoreMatchers.containsString("Cluster created successfully"));