Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: tpu_queued_resources_startup_script/create_network/time_bound #3907

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Prev Previous commit
Next Next commit
feat: tpu_queued_resources_create_network
  • Loading branch information
Joanna Grycz committed Dec 3, 2024
commit 50cc2e8e1b2859dbadce2a9e79fec803a88b62d8
127 changes: 127 additions & 0 deletions tpu/queuedResources/createQueuedResourceNetwork.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
/*
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

'use strict';

async function main(
nodeName,
queuedResourceName,
zone,
tpuType,
tpuSoftwareVersion
) {
// [START tpu_queued_resources_create_network]
// Import the TPU library
const {TpuClient} = require('@google-cloud/tpu').v2alpha1;
const {Node, NetworkConfig, QueuedResource} =
require('@google-cloud/tpu').protos.google.cloud.tpu.v2alpha1;

// Instantiate a tpuClient
const tpuClient = new TpuClient();

/**
* TODO(developer): Update/uncomment these variables before running the sample.
*/
// Project ID or project number of the Google Cloud project, where you want to create queued resource.
const projectId = await tpuClient.getProjectId();

// The name of the network you want the node to connect to. The network should be assigned to your project.
const networkName = 'compute-tpu-network';

// The region of the network, that you want the node to connect to.
const region = 'europe-west4';
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as in another PR, this value can be just taken from the zone value. No need to explicitly define.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


// The name for your queued resource.
// queuedResourceName = 'queued-resource-1';

// The name for your node.
// nodeName = 'node-name-1';

// The zone in which to create the node.
// For more information about supported TPU types for specific zones,
// see https://cloud.google.com/tpu/docs/regions-zones
// zone = 'europe-west4-a';

// The accelerator type that specifies the version and size of the node you want to create.
// For more information about supported accelerator types for each TPU version,
// see https://cloud.google.com/tpu/docs/system-architecture-tpu-vm#versions.
// tpuType = 'v2-8';

// Software version that specifies the version of the node runtime to install. For more information,
// see https://cloud.google.com/tpu/docs/runtimes
// tpuSoftwareVersion = 'tpu-vm-tf-2.14.1';

async function callCreateQueuedResourceNetwork() {
// Specify the network and subnetwork that you want to connect your TPU to.
const networkConfig = new NetworkConfig({
enableExternalIps: true,
network: `projects/${projectId}/global/networks/${networkName}`,
subnetwork: `projects/${projectId}/regions/${region}/subnetworks/${networkName}`,
});

// Create a node
const node = new Node({
name: nodeName,
zone,
acceleratorType: tpuType,
runtimeVersion: tpuSoftwareVersion,
networkConfig,
queuedResource: `projects/${projectId}/locations/${zone}/queuedResources/${queuedResourceName}`,
});

// Define parent for requests
const parent = `projects/${projectId}/locations/${zone}`;

// Create queued resource
const queuedResource = new QueuedResource({
name: queuedResourceName,
tpu: {
nodeSpec: [
{
parent,
node,
nodeId: nodeName,
},
],
},
});

const request = {
parent: `projects/${projectId}/locations/${zone}`,
queuedResource,
queuedResourceId: queuedResourceName,
};

const [operation] = await tpuClient.createQueuedResource(request);

// Wait for the create operation to complete.
const [response] = await operation.promise();

// You can wait until TPU Node is READY,
// and check its status using getTpuVm() from `tpu_vm_get` sample.
console.log(
`Queued resource ${queuedResourceName} with specified network created.`
);
console.log(JSON.stringify(response));
}
await callCreateQueuedResourceNetwork();
// [END tpu_queued_resources_create_network]
}

main(...process.argv.slice(2)).catch(err => {
console.error(err);
process.exitCode = 1;
});
7 changes: 4 additions & 3 deletions tpu/queuedResources/createQueuedResourceStartupScript.js
Original file line number Diff line number Diff line change
@@ -81,8 +81,8 @@ async function main(
metadata: {
// The script updates numpy to the latest version and logs the output to a file.
'startup-script': `#!/bin/bash
echo "Hello World" > /var/log/hello.log
sudo pip3 install --upgrade numpy >> /var/log/hello.log 2>&1`,
echo "Hello World" > /var/log/hello.log
sudo pip3 install --upgrade numpy >> /var/log/hello.log 2>&1`,
},
});

@@ -112,13 +112,14 @@ async function main(
const [operation] = await tpuClient.createQueuedResource(request);

// Wait for the create operation to complete.
await operation.promise();
const [response] = await operation.promise();

// You can wait until TPU Node is READY,
// and check its status using getTpuVm() from `tpu_vm_get` sample.
console.log(
`Queued resource ${queuedResourceName} with start-up script created.`
);
console.log(JSON.stringify(response));
}
await callCreateQueuedResourceStartupScript();
// [END tpu_queued_resources_startup_script]
72 changes: 72 additions & 0 deletions tpu/test/createQueuedResourceNetwork.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

'use strict';

const path = require('path');
const assert = require('node:assert/strict');
const {after, before, describe, it} = require('mocha');
const cp = require('child_process');
const {TpuClient} = require('@google-cloud/tpu').v2alpha1;

const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'});
const cwd = path.join(__dirname, '..');

describe('TPU queued resource with specified network', async () => {
const queuedResourceName = `queued-resource-with-network-${Math.floor(Math.random() * 1000 + 1)}`;
const nodeName = `node-with-network-2a2b3c${Math.floor(Math.random() * 1000 + 1)}`;
const zone = 'us-south1-a';
const tpuType = 'v5litepod-1';
const tpuSoftwareVersion = 'tpu-vm-tf-2.14.1';
let projectId;

before(async () => {
const tpuClient = new TpuClient();
projectId = await tpuClient.getProjectId();
});

after(() => {
// Delete queued resource
execSync(
`node ./queuedResources/forceDeleteQueuedResource.js ${queuedResourceName} ${zone}`,
{
cwd,
}
);
});

it('should create queued resource with specified network', () => {
const networkConfig = {
network: `projects/${projectId}/global/networks/compute-tpu-network`,
subnetwork: `projects/${projectId}/regions/europe-west4/subnetworks/compute-tpu-network`,
enableExternalIps: true,
};

const response = execSync(
`node ./queuedResources/createQueuedResourceNetwork.js ${nodeName} ${queuedResourceName} ${zone} ${tpuType} ${tpuSoftwareVersion}`,
{
cwd,
}
);

assert(
response.includes(
`Queued resource ${queuedResourceName} with specified network created.`
)
);
assert(response.includes(JSON.stringify(networkConfig)));
});
});
6 changes: 6 additions & 0 deletions tpu/test/createQueuedResourceStartupScript.test.js
Original file line number Diff line number Diff line change
@@ -42,6 +42,11 @@ describe('TPU queued resource with start-up script', async () => {
});

it('should create queued resource with start-up script', () => {
const metadata = {
'startup-script':
'#!/bin/bash\n echo "Hello World" > /var/log/hello.log\n sudo pip3 install --upgrade numpy >> /var/log/hello.log 2>&1',
};

const response = execSync(
`node ./queuedResources/createQueuedResourceStartupScript.js ${nodeName} ${queuedResourceName} ${zone} ${tpuType} ${tpuSoftwareVersion}`,
{
@@ -54,5 +59,6 @@ describe('TPU queued resource with start-up script', async () => {
`Queued resource ${queuedResourceName} with start-up script created.`
)
);
assert(response.includes(JSON.stringify(metadata)));
});
});