Skip to content

Commit

Permalink
sample: update flaky dataproc sample and test to be more stable (#5665)
Browse files Browse the repository at this point in the history
* test: update flaky dataproc sample test to be more stable

* Update quickstart.js

* Update quickstart.js

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* Update quickstart.js

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* Update quickstart.test.js

* Update quickstart.test.js

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
  • Loading branch information
sofisl and gcf-owl-bot[bot] authored Sep 10, 2024
1 parent 426f9f7 commit 768314e
Show file tree
Hide file tree
Showing 7 changed files with 144 additions and 312 deletions.
161 changes: 57 additions & 104 deletions packages/google-cloud-dataproc/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,113 +57,66 @@ npm install @google-cloud/dataproc
### Using the client library

```javascript
// This quickstart sample walks a user through creating a Dataproc
// cluster, submitting a PySpark job from Google Cloud Storage to the
// cluster, reading the output of the job and deleting the cluster, all
// using the Node.js client library.

'use strict';

function main(projectId, region, clusterName, jobFilePath) {
const dataproc = require('@google-cloud/dataproc');
const {Storage} = require('@google-cloud/storage');

// Create a cluster client with the endpoint set to the desired cluster region
const clusterClient = new dataproc.v1.ClusterControllerClient({
apiEndpoint: `${region}-dataproc.googleapis.com`,
projectId: projectId,
});

// Create a job client with the endpoint set to the desired cluster region
const jobClient = new dataproc.v1.JobControllerClient({
apiEndpoint: `${region}-dataproc.googleapis.com`,
projectId: projectId,
});

async function quickstart() {
// Create the cluster config
const cluster = {
projectId: projectId,
region: region,
cluster: {
clusterName: clusterName,
config: {
masterConfig: {
numInstances: 1,
machineTypeUri: 'n1-standard-2',
},
workerConfig: {
numInstances: 2,
machineTypeUri: 'n1-standard-2',
},
},
},
};

// Create the cluster
const [operation] = await clusterClient.createCluster(cluster);
const [response] = await operation.promise();

// Output a success message
console.log(`Cluster created successfully: ${response.clusterName}`);

const job = {
projectId: projectId,
region: region,
job: {
placement: {
clusterName: clusterName,
},
pysparkJob: {
mainPythonFileUri: jobFilePath,
},
},
};

const [jobOperation] = await jobClient.submitJobAsOperation(job);
const [jobResponse] = await jobOperation.promise();

const matches =
jobResponse.driverOutputResourceUri.match('gs://(.*?)/(.*)');

const storage = new Storage();

const output = await storage
.bucket(matches[1])
.file(`${matches[2]}.000000000`)
.download();

// Output a success message.
console.log(`Job finished successfully: ${output}`);

// Delete the cluster once the job has terminated.
const deleteClusterReq = {
projectId: projectId,
region: region,
clusterName: clusterName,
};

const [deleteOperation] =
await clusterClient.deleteCluster(deleteClusterReq);
await deleteOperation.promise();

// Output a success message
console.log(`Cluster ${clusterName} successfully deleted.`);
/**
* TODO(developer): Uncomment these variables before running the sample.
*/
/**
* Required. The ID of the Google Cloud Platform project that the cluster
* belongs to.
*/
// const projectId = 'abc123'
/**
* Required. The Dataproc region in which to handle the request.
*/
// const region = 'us-central1'
/**
* Optional. A filter constraining the clusters to list. Filters are
* case-sensitive and have the following syntax:
* field = value AND field = value ...
* where **field** is one of `status.state`, `clusterName`, or `labels.KEY`,
* and `[KEY]` is a label key. **value** can be `*` to match all values.
* `status.state` can be one of the following: `ACTIVE`, `INACTIVE`,
* `CREATING`, `RUNNING`, `ERROR`, `DELETING`, or `UPDATING`. `ACTIVE`
* contains the `CREATING`, `UPDATING`, and `RUNNING` states. `INACTIVE`
* contains the `DELETING` and `ERROR` states.
* `clusterName` is the name of the cluster provided at creation time.
* Only the logical `AND` operator is supported; space-separated items are
* treated as having an implicit `AND` operator.
* Example filter:
* status.state = ACTIVE AND clusterName = mycluster
* AND labels.env = staging AND labels.starred = *
*/
// const filter = 'abc123'
/**
* Optional. The standard List page size.
*/
// const pageSize = 1234
/**
* Optional. The standard List page token.
*/
// const pageToken = 'abc123'

// Imports the Dataproc library
const {ClusterControllerClient} = require('@google-cloud/dataproc').v1;

// Instantiates a client
const dataprocClient = new ClusterControllerClient();

async function callListClusters() {
// Construct request
const request = {
projectId,
region,
};

// Run request
const iterable = dataprocClient.listClustersAsync(request);
for await (const response of iterable) {
console.log(response);
}

quickstart();
}

const args = process.argv.slice(2);

if (args.length !== 4) {
console.log(
'Insufficient number of parameters provided. Please make sure a ' +
'PROJECT_ID, REGION, CLUSTER_NAME and JOB_FILE_PATH are provided, in this order.'
);
}

main(...args);
callListClusters();

```

Expand Down
2 changes: 1 addition & 1 deletion packages/google-cloud-dataproc/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,4 @@
"typescript": "^5.1.6"
},
"homepage": "https://github.com/googleapis/google-cloud-node/tree/main/packages/google-cloud-dataproc"
}
}
2 changes: 1 addition & 1 deletion packages/google-cloud-dataproc/samples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -830,7 +830,7 @@ View the [source code](https://github.com/googleapis/google-cloud-node/blob/main
__Usage:__


`node quickstart.js <PROJECT_ID> <REGION> <CLUSTER_NAME> <JOB_FILE_PATH>`
`node packages/google-cloud-dataproc/samples/quickstart.js`



Expand Down
2 changes: 1 addition & 1 deletion packages/google-cloud-dataproc/samples/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@
"mocha": "^8.0.0",
"uuid": "^9.0.0"
}
}
}
173 changes: 66 additions & 107 deletions packages/google-cloud-dataproc/samples/quickstart.js
Original file line number Diff line number Diff line change
@@ -1,127 +1,86 @@
// Copyright 2017 Google LLC
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// sample-metadata:
// title: Quickstart
// usage: node quickstart.js <PROJECT_ID> <REGION> <CLUSTER_NAME> <JOB_FILE_PATH>

// [START dataproc_quickstart]
// This quickstart sample walks a user through creating a Dataproc
// cluster, submitting a PySpark job from Google Cloud Storage to the
// cluster, reading the output of the job and deleting the cluster, all
// using the Node.js client library.

'use strict';

function main(projectId, region, clusterName, jobFilePath) {
const dataproc = require('@google-cloud/dataproc');
const {Storage} = require('@google-cloud/storage');

// Create a cluster client with the endpoint set to the desired cluster region
const clusterClient = new dataproc.v1.ClusterControllerClient({
apiEndpoint: `${region}-dataproc.googleapis.com`,
projectId: projectId,
});

// Create a job client with the endpoint set to the desired cluster region
const jobClient = new dataproc.v1.JobControllerClient({
apiEndpoint: `${region}-dataproc.googleapis.com`,
projectId: projectId,
});

async function quickstart() {
// Create the cluster config
const cluster = {
projectId: projectId,
region: region,
cluster: {
clusterName: clusterName,
config: {
masterConfig: {
numInstances: 1,
machineTypeUri: 'n1-standard-2',
},
workerConfig: {
numInstances: 2,
machineTypeUri: 'n1-standard-2',
},
},
},
};

// Create the cluster
const [operation] = await clusterClient.createCluster(cluster);
const [response] = await operation.promise();

// Output a success message
console.log(`Cluster created successfully: ${response.clusterName}`);

const job = {
projectId: projectId,
region: region,
job: {
placement: {
clusterName: clusterName,
},
pysparkJob: {
mainPythonFileUri: jobFilePath,
},
},
};

const [jobOperation] = await jobClient.submitJobAsOperation(job);
const [jobResponse] = await jobOperation.promise();

const matches =
jobResponse.driverOutputResourceUri.match('gs://(.*?)/(.*)');

const storage = new Storage();

const output = await storage
.bucket(matches[1])
.file(`${matches[2]}.000000000`)
.download();

// Output a success message.
console.log(`Job finished successfully: ${output}`);

// Delete the cluster once the job has terminated.
const deleteClusterReq = {
projectId: projectId,
region: region,
clusterName: clusterName,
function main(projectId, region) {
// [START dataproc_v1_generated_quickstart]
/**
* TODO(developer): Uncomment these variables before running the sample.
*/
/**
* Required. The ID of the Google Cloud Platform project that the cluster
* belongs to.
*/
// const projectId = 'abc123'
/**
* Required. The Dataproc region in which to handle the request.
*/
// const region = 'us-central1'
/**
* Optional. A filter constraining the clusters to list. Filters are
* case-sensitive and have the following syntax:
* field = value AND field = value ...
* where **field** is one of `status.state`, `clusterName`, or `labels.KEY`,
* and `[KEY]` is a label key. **value** can be `*` to match all values.
* `status.state` can be one of the following: `ACTIVE`, `INACTIVE`,
* `CREATING`, `RUNNING`, `ERROR`, `DELETING`, or `UPDATING`. `ACTIVE`
* contains the `CREATING`, `UPDATING`, and `RUNNING` states. `INACTIVE`
* contains the `DELETING` and `ERROR` states.
* `clusterName` is the name of the cluster provided at creation time.
* Only the logical `AND` operator is supported; space-separated items are
* treated as having an implicit `AND` operator.
* Example filter:
* status.state = ACTIVE AND clusterName = mycluster
* AND labels.env = staging AND labels.starred = *
*/
// const filter = 'abc123'
/**
* Optional. The standard List page size.
*/
// const pageSize = 1234
/**
* Optional. The standard List page token.
*/
// const pageToken = 'abc123'

// Imports the Dataproc library
const {ClusterControllerClient} = require('@google-cloud/dataproc').v1;

// Instantiates a client
const dataprocClient = new ClusterControllerClient();

async function callListClusters() {
// Construct request
const request = {
projectId,
region,
};

const [deleteOperation] =
await clusterClient.deleteCluster(deleteClusterReq);
await deleteOperation.promise();

// Output a success message
console.log(`Cluster ${clusterName} successfully deleted.`);
// Run request
const iterable = dataprocClient.listClustersAsync(request);
for await (const response of iterable) {
console.log(response);
}
}

quickstart();
}

const args = process.argv.slice(2);

if (args.length !== 4) {
console.log(
'Insufficient number of parameters provided. Please make sure a ' +
'PROJECT_ID, REGION, CLUSTER_NAME and JOB_FILE_PATH are provided, in this order.'
);
callListClusters();
// [END dataproc_v1_generated_quickstart]
}

main(...args);
// [END dataproc_quickstart]
process.on('unhandledRejection', err => {
console.error(err.message);
process.exitCode = 1;
});
main(...process.argv.slice(2));
Loading

0 comments on commit 768314e

Please sign in to comment.