diff --git a/cloud-service-provider/gcp/gke/terraform/custom-values.yaml b/cloud-service-provider/gcp/gke/terraform/custom-values.yaml new file mode 100644 index 00000000..2b001b1d --- /dev/null +++ b/cloud-service-provider/gcp/gke/terraform/custom-values.yaml @@ -0,0 +1,12 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +tgi: + podAnnotations: + gke-gcsfuse/volumes: "true" +tei: + podAnnotations: + gke-gcsfuse/volumes: "true" +teirerank: + podAnnotations: + gke-gcsfuse/volumes: "true" diff --git a/cloud-service-provider/gcp/gke/terraform/main.tf b/cloud-service-provider/gcp/gke/terraform/main.tf new file mode 100644 index 00000000..72c84830 --- /dev/null +++ b/cloud-service-provider/gcp/gke/terraform/main.tf @@ -0,0 +1,173 @@ +data "google_client_config" "default" {} +data "google_project" "current" { project_id = var.project_id } + +provider "kubernetes" { + host = "https://${module.gke.endpoint}" + token = data.google_client_config.default.access_token + cluster_ca_certificate = base64decode(module.gke.ca_certificate) +} + +provider "helm" { + kubernetes { + host = "https://${module.gke.endpoint}" + token = data.google_client_config.default.access_token + cluster_ca_certificate = base64decode(module.gke.ca_certificate) + } +} + +resource "google_compute_network" "default" { + name = "standalone" + auto_create_subnetworks = false +} + +resource "google_compute_subnetwork" "default" { + name = "example-subnetwork" + region = var.region + ip_cidr_range = "10.0.0.0/16" + stack_type = "IPV4_ONLY" + + network = google_compute_network.default.id + secondary_ip_range { + range_name = "services-range" + ip_cidr_range = "192.168.0.0/24" + } + + secondary_ip_range { + range_name = "pod-ranges" + ip_cidr_range = "172.16.0.0/12" + } +} + +module "gke" { + source = "terraform-google-modules/kubernetes-engine/google" + version = "34.0.0" + project_id = var.project_id + name = var.cluster_name + region = var.region + kubernetes_version = var.cluster_version + network = google_compute_network.default.name + subnetwork = google_compute_subnetwork.default.name + ip_range_pods = google_compute_subnetwork.default.secondary_ip_range[1].range_name + ip_range_services = google_compute_subnetwork.default.secondary_ip_range[0].range_name + gcs_fuse_csi_driver = true + deletion_protection = false + remove_default_node_pool = true + node_pools = var.cpu_pool + + node_pools_oauth_scopes = { + all = [ + "https://www.googleapis.com/auth/cloud-platform", + "https://www.googleapis.com/auth/logging.write", + "https://www.googleapis.com/auth/monitoring", + "https://www.googleapis.com/auth/service.management.readonly", + "https://www.googleapis.com/auth/servicecontrol", + ] + } +} + +resource "null_resource" "kubectl" { + provisioner "local-exec" { + command = "gcloud container clusters get-credentials ${var.cluster_name} --region ${var.region}" + } + depends_on = [ module.gke ] +} + +resource "kubernetes_namespace" "opea_app" { + metadata { + name = var.namespace + } +} + +resource "kubernetes_service_account" "opea_gcs_sa" { + metadata { + name = "opea-gcs-sa" + namespace = var.namespace + } + depends_on = [kubernetes_namespace.opea_app] +} + +resource "google_storage_bucket" "model" { + name = "${var.gcs_bucket_name}" + location = var.gcs_bucket_location + force_destroy = true + + uniform_bucket_level_access = true +} + +resource "google_storage_bucket_iam_binding" "opea_gcs_sa_binding" { + bucket = google_storage_bucket.model.name + role = "roles/storage.objectUser" + members = [ + # FIXME: we can't use the SA we created due to #532 + # "principal://iam.googleapis.com/projects/${data.google_project.current.number}/locations/global/workloadIdentityPools/${data.google_project.current.project_id}.svc.id.goog/subject/ns/${kubernetes_service_account.opea_gcs_sa.metadata[0].namespace}/sa/${kubernetes_service_account.opea_gcs_sa.metadata[0].name}", + "principal://iam.googleapis.com/projects/${data.google_project.current.number}/locations/global/workloadIdentityPools/${data.google_project.current.project_id}.svc.id.goog/subject/ns/${kubernetes_service_account.opea_gcs_sa.metadata[0].namespace}/sa/default", + ] + depends_on = [kubernetes_service_account.opea_gcs_sa] +} + +resource "kubernetes_persistent_volume_claim" "model" { + metadata { + name = "model-volume" + namespace = var.namespace + } + spec { + storage_class_name = "dummy" + access_modes = ["ReadWriteMany"] + resources { + requests = { + storage = "50Gi" + } + } + volume_name = "${kubernetes_persistent_volume.model.metadata.0.name}" + } + depends_on = [ null_resource.kubectl ] +} + +resource "kubernetes_persistent_volume" "model" { + metadata { + name = "opea-model-pv" + } + spec { + capacity = { + storage = "50Gi" + } + storage_class_name = "dummy" + access_modes = ["ReadWriteMany"] + persistent_volume_source { + csi { + driver = "gcsfuse.csi.storage.gke.io" + volume_handle = google_storage_bucket.model.name + } + } + mount_options = [ "implicit-dirs", "uid=1000", "gid=1000" ] + } + depends_on = [ null_resource.kubectl ] +} + +resource "helm_release" "app" { + chart = "../../../../helm-charts/chatqna" + repository = "chatqna" + name = "chatqna" + namespace = var.namespace + + values = [ file("./custom-values.yaml") ] + + set { + name = "global.HUGGINGFACEHUB_API_TOKEN" + value = var.hf_token + } + set { + name = "serviceAccount.name" + value = kubernetes_service_account.opea_gcs_sa.metadata[0].name + } + set { + name = "global.modelUsePVC" + value = "model-volume" + } + set { + name = "nginx.service.type" + value = "LoadBalancer" + } + timeout = 600 + depends_on = [ null_resource.kubectl ] +} \ No newline at end of file diff --git a/cloud-service-provider/gcp/gke/terraform/opea-chatqna.tfvars b/cloud-service-provider/gcp/gke/terraform/opea-chatqna.tfvars new file mode 100644 index 00000000..24919fee --- /dev/null +++ b/cloud-service-provider/gcp/gke/terraform/opea-chatqna.tfvars @@ -0,0 +1,15 @@ +hf_token = "" +project_id = "" +region = "europe-west4" +cluster_name = "opea" +app_name = "chatqna" +namespace = "chatqna" +cpu_pool = [ { + name: "cpu-pool" + machine_type: "c4-standard-32" + autoscaling: false + min_count: 1 + max_count: 5 + disk_size_gb: 100 + disk_type: "hyperdisk-balanced" +} ] \ No newline at end of file diff --git a/cloud-service-provider/gcp/gke/terraform/terraform.tf b/cloud-service-provider/gcp/gke/terraform/terraform.tf new file mode 100644 index 00000000..bd24835d --- /dev/null +++ b/cloud-service-provider/gcp/gke/terraform/terraform.tf @@ -0,0 +1,11 @@ +terraform { + required_providers { + google = { + source = "hashicorp/google" + } + kubernetes = { + source = "hashicorp/kubernetes" + } + } + required_version = ">= 0.13" +} \ No newline at end of file diff --git a/cloud-service-provider/gcp/gke/terraform/variables.tf b/cloud-service-provider/gcp/gke/terraform/variables.tf new file mode 100644 index 00000000..399a54bf --- /dev/null +++ b/cloud-service-provider/gcp/gke/terraform/variables.tf @@ -0,0 +1,97 @@ +variable "hf_token" { + description = "Hugginface API token" + type = string +} + +variable "project_id" { + description = "Google Cloud PROJECT_ID" + type = string +} + +variable "region" { + description = "Google Cloud region" + type = string + default = "europe-west1" +} + +variable "zone" { + description = "Google zone" + type = string + default = "a" +} + +variable "cluster_name" { + description = "GKE cluster name" + type = string + default = null +} + +variable "cluster_version" { + description = "GKE cluster version" + type = string + default = "1.31" +} + +variable "namespace" { + description = "OPEA application namespace" + type = string + default = "default" +} + +variable "app_name" { + description = "OPEA application name" + type = string +} + +variable "cpu_pool" { + type = list(map(any)) +} + +variable "disk_size" { + description = "Disk size in GiB for nodes." + type = number + default = 20 +} + +variable "capacity_type" { + description = "EC2 spot or on-demad instance types" + type = string + default = "ON_DEMAND" +} + +variable "min_size" { + description = "min size" + type = number + default = 1 +} + +variable "max_size" { + description = "max size" + type = number + default = 10 +} + +variable "desired_size" { + description = "desired size" + type = number + default = 1 +} + +variable "compute_engine_service_account" { + description = "SA for managing the nodes" + type = string + default = null +} + +variable "gcs_bucket_name" { + description = "Bucket name for storing model data" + type = string + default = "opea-models" +} + +variable "gcs_bucket_location" { + description = "Bucket location" + type = string + default = "EU" +} +