diff --git a/dataproc/pom.xml b/dataproc/pom.xml index d6f21df03ba..78a70a3ee8a 100644 --- a/dataproc/pom.xml +++ b/dataproc/pom.xml @@ -34,6 +34,16 @@ + + + org.codehaus.mojo + exec-maven-plugin + 1.6.0 + + Quickstart + + + @@ -65,5 +75,4 @@ test - \ No newline at end of file diff --git a/dataproc/src/main/java/Quickstart.java b/dataproc/src/main/java/Quickstart.java index 0a0d177b1d0..d5ac76259f3 100644 --- a/dataproc/src/main/java/Quickstart.java +++ b/dataproc/src/main/java/Quickstart.java @@ -15,6 +15,19 @@ */ // [START dataproc_quickstart] +/* This quickstart sample walks a user through creating a Cloud Dataproc + * cluster, submitting a PySpark job from Google Cloud Storage to the + * cluster, reading the output of the job and deleting the cluster, all + * using the Java client library. + * + * Usage: + * mvn clean package -DskipTests + * + * mvn exec:java -Dexec.args=" " + * + * You can also set these arguments in the main function instead of providing them via the CLI. + */ + import com.google.api.gax.longrunning.OperationFuture; import com.google.cloud.dataproc.v1.Cluster; import com.google.cloud.dataproc.v1.ClusterConfig; @@ -60,15 +73,6 @@ public static Job waitForJobCompletion( } } - public static void quickstart() throws IOException, InterruptedException { - // TODO(developer): Replace these variables before running the sample. - String projectId = "your-project-id"; - String region = "your-project-region"; - String clusterName = "your-cluster-name"; - String jobFilePath = "your-job-file-path"; - quickstart(projectId, region, clusterName, jobFilePath); - } - public static void quickstart( String projectId, String region, String clusterName, String jobFilePath) throws IOException, InterruptedException { @@ -82,10 +86,10 @@ public static void quickstart( JobControllerSettings jobControllerSettings = JobControllerSettings.newBuilder().setEndpoint(myEndpoint).build(); - // Create both a cluster controller client and job controller client with the configured - // settings. The client only needs to be created once and can be reused for multiple requests. - // Using a try-with-resources closes the client, but this can also be done manually with - // the .close() method. + // Create both a cluster controller client and job controller client with the + // configured settings. The client only needs to be created once and can be reused for + // multiple requests. Using a try-with-resources closes the client, but this can also be done + // manually with the .close() method. try (ClusterControllerClient clusterControllerClient = ClusterControllerClient.create(clusterControllerSettings); JobControllerClient jobControllerClient = @@ -114,7 +118,8 @@ public static void quickstart( OperationFuture createClusterAsyncRequest = clusterControllerClient.createClusterAsync(projectId, region, cluster); Cluster response = createClusterAsyncRequest.get(); - System.out.printf("Cluster created successfully: %s", response.getClusterName()); + System.out.println( + String.format("Cluster created successfully: %s", response.getClusterName())); // Configure the settings for our job. JobPlacement jobPlacement = JobPlacement.newBuilder().setClusterName(clusterName).build(); @@ -133,7 +138,7 @@ public static void quickstart( int timeout = 10; try { Job jobInfo = finishedJobFuture.get(timeout, TimeUnit.MINUTES); - System.out.printf("Job %s finished successfully.", jobId); + System.out.println(String.format("Job %s finished successfully.", jobId)); // Cloud Dataproc job output gets saved to a GCS bucket allocated to it. Cluster clusterInfo = clusterControllerClient.getCluster(projectId, region, clusterName); @@ -163,5 +168,21 @@ public static void quickstart( System.err.println(String.format("Error executing quickstart: %s ", e.getMessage())); } } + + public static void main(String... args) throws IOException, InterruptedException { + if (args.length != 4) { + System.err.println( + "Insufficient number of parameters provided. Please make sure a " + + "PROJECT_ID, REGION, CLUSTER_NAME and JOB_FILE_PATH are provided, in this order."); + return; + } + + String projectId = args[0]; // project-id of project to create the cluster in + String region = args[1]; // region to create the cluster + String clusterName = args[2]; // name of the cluster + String jobFilePath = args[3]; // location in GCS of the PySpark job + + quickstart(projectId, region, clusterName, jobFilePath); + } } // [END dataproc_quickstart] diff --git a/dataproc/src/test/java/QuickstartTest.java b/dataproc/src/test/java/QuickstartTest.java index 3296e224828..bf31e0325d6 100644 --- a/dataproc/src/test/java/QuickstartTest.java +++ b/dataproc/src/test/java/QuickstartTest.java @@ -88,7 +88,7 @@ public void setUp() { @Test public void quickstartTest() throws IOException, InterruptedException { - Quickstart.quickstart(PROJECT_ID, REGION, CLUSTER_NAME, JOB_FILE_PATH); + Quickstart.main(PROJECT_ID, REGION, CLUSTER_NAME, JOB_FILE_PATH); String output = bout.toString(); assertThat(output, CoreMatchers.containsString("Cluster created successfully"));