diff --git a/lib/single-new-eks-opensource-observability-pattern/neuron/neuron-device-plugin-addon.ts b/lib/single-new-eks-opensource-observability-pattern/neuron/neuron-device-plugin-addon.ts deleted file mode 100644 index 1ba15c5b..00000000 --- a/lib/single-new-eks-opensource-observability-pattern/neuron/neuron-device-plugin-addon.ts +++ /dev/null @@ -1,44 +0,0 @@ -import { KubernetesManifest, ServiceAccount } from "aws-cdk-lib/aws-eks"; -import { ClusterAddOn, ClusterInfo } from '@aws-quickstart/eks-blueprints'; -import { loadExternalYaml, readYamlDocument, loadYaml } from "@aws-quickstart/eks-blueprints/dist/utils"; - -export class NeuronDevicePluginAddOn implements ClusterAddOn { - deploy(clusterInfo: ClusterInfo): void { - const sa = this.createServiceAccount(clusterInfo); - - const cluster = clusterInfo.cluster; - - const plugin = loadExternalYaml( - "https://raw.githubusercontent.com/aws-neuron/aws-neuron-sdk/master/src/k8/k8s-neuron-device-plugin.yml" - ); - - const neuronDevicePluginManifest = new KubernetesManifest(cluster.stack, "neuron-plugin-manifest", { - cluster, - manifest: plugin, - overwrite: true - }); - - neuronDevicePluginManifest.node.addDependency(sa); - - let doc = readYamlDocument(`${__dirname}/neuron-device-plugin-role.yaml`); - const roleManifest = doc.split("---").map(e => loadYaml(e)); - - const neuronDevicePluginRoleManifest = new KubernetesManifest(cluster.stack, "neuron-device-plugin-role-manifest", { - cluster, - manifest: roleManifest, - overwrite: true - }); - - neuronDevicePluginRoleManifest.node.addDependency(sa); - neuronDevicePluginManifest.node.addDependency(neuronDevicePluginRoleManifest); - } - - - protected createServiceAccount(clusterInfo: ClusterInfo): ServiceAccount { - const sa = clusterInfo.cluster.addServiceAccount('neuron-device-plugin', { - name: "neuron-device-plugin", - namespace: "kube-system" - }); - return sa; - } -} diff --git a/lib/single-new-eks-opensource-observability-pattern/neuron/neuron-device-plugin-role.yaml b/lib/single-new-eks-opensource-observability-pattern/neuron/neuron-device-plugin-role.yaml deleted file mode 100644 index 43764ac6..00000000 --- a/lib/single-new-eks-opensource-observability-pattern/neuron/neuron-device-plugin-role.yaml +++ /dev/null @@ -1,32 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: neuron-device-plugin-cluster-role - namespace: kube-system -rules: - - apiGroups: [""] - resources: ["namespaces"] - verbs: ["get"] - - apiGroups: [""] - resources: ["nodes"] - verbs: ["get", "list", "watch"] - - apiGroups: [""] - resources: ["nodes/status"] - verbs: ["get", "list", "watch", "patch"] - - apiGroups: [""] - resources: ["pods"] - verbs: ["get", "list", "watch"] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: neuron-device-plugin-role-binding - namespace: kube-system -subjects: -- kind: ServiceAccount - name: neuron-device-plugin - namespace: kube-system -roleRef: - kind: ClusterRole - name: neuron-device-plugin-cluster-role - apiGroup: rbac.authorization.k8s.io diff --git a/lib/single-new-eks-opensource-observability-pattern/neuron/neuron-index.ts b/lib/single-new-eks-opensource-observability-pattern/neuron/neuron-index.ts index ea7ba1de..64a350f9 100644 --- a/lib/single-new-eks-opensource-observability-pattern/neuron/neuron-index.ts +++ b/lib/single-new-eks-opensource-observability-pattern/neuron/neuron-index.ts @@ -6,9 +6,7 @@ import * as amp from 'aws-cdk-lib/aws-aps'; import { ObservabilityBuilder } from '@aws-quickstart/eks-blueprints'; import * as eks from "aws-cdk-lib/aws-eks"; import * as ec2 from "aws-cdk-lib/aws-ec2"; -import { NeuronDevicePluginAddOn } from './neuron-device-plugin-addon'; -import { NeuronMonitorAddOn } from './neuron-monitor-addon'; - +import { NeuronDevicePluginAddOn, NeuronMonitorAddOn } from '@aws-quickstart/eks-blueprints'; interface NeuronNodeGroupProps { instanceClass: "inf1" @@ -65,8 +63,8 @@ export default class SingleNewEksNeuronOpenSourceObservabilityPattern { new blueprints.addons.FluxCDAddOn({"repositories": [fluxRepository]}), new GrafanaOperatorSecretAddon(), new blueprints.addons.VpcCniAddOn(), - new NeuronDevicePluginAddOn(), - new NeuronMonitorAddOn() + new NeuronMonitorAddOn(), + new NeuronDevicePluginAddOn() ]; ObservabilityBuilder.builder() diff --git a/lib/single-new-eks-opensource-observability-pattern/neuron/neuron-monitor-addon.ts b/lib/single-new-eks-opensource-observability-pattern/neuron/neuron-monitor-addon.ts deleted file mode 100644 index be1fab88..00000000 --- a/lib/single-new-eks-opensource-observability-pattern/neuron/neuron-monitor-addon.ts +++ /dev/null @@ -1,18 +0,0 @@ -import { KubernetesManifest } from "aws-cdk-lib/aws-eks"; -import { ClusterAddOn, ClusterInfo } from '@aws-quickstart/eks-blueprints'; -import { readYamlDocument, loadYaml } from "@aws-quickstart/eks-blueprints/dist/utils"; - -export class NeuronMonitorAddOn implements ClusterAddOn { - deploy(clusterInfo: ClusterInfo): void { - const cluster = clusterInfo.cluster; - - const neuronMonitorDoc = readYamlDocument(__dirname + '/neuron-monitor.yaml'); - const neuronMonitorManifest = neuronMonitorDoc.split("---").map(e => loadYaml(e)); - - new KubernetesManifest(cluster.stack, "neuron-monitor-manifest", { - cluster, - manifest: neuronMonitorManifest, - overwrite: true - }); - } -} diff --git a/lib/single-new-eks-opensource-observability-pattern/neuron/neuron-monitor.yaml b/lib/single-new-eks-opensource-observability-pattern/neuron/neuron-monitor.yaml deleted file mode 100644 index 9cf1a1be..00000000 --- a/lib/single-new-eks-opensource-observability-pattern/neuron/neuron-monitor.yaml +++ /dev/null @@ -1,85 +0,0 @@ -kind: DaemonSet -apiVersion: apps/v1 -metadata: - name: neuron-monitor - namespace: kube-system - labels: - app: neuron-monitor - role: master -spec: - selector: - matchLabels: - app: neuron-monitor - role: master - template: - metadata: - labels: - app: neuron-monitor - role: master - spec: - containers: - - name: app - image: public.ecr.aws/h6c7e9p3/neuron/neuron-tools:latest - command: ["/bin/sh"] - args: ["-c", "neuron-monitor | neuron-monitor-prometheus.py --port 9010"] - ports: - - name: prom-node-exp - containerPort: 9010 - hostPort: 9010 - volumeMounts: - - name: dev - mountPath: /dev - securityContext: - privileged: true - - tolerations: - - key: aws.amazon.com/neuron - operator: Exists - effect: NoSchedule - - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: "node.kubernetes.io/instance-type" - operator: In - values: - - inf1.xlarge - - inf1.2xlarge - - inf1.6xlarge - - inf1.24xlarge - - inf2.xlarge - - inf2.4xlarge - - inf2.8xlarge - - inf2.24xlarge - - inf2.48xlarge - - trn1.2xlarge - - trn1.32xlarge - - trn1n.32xlarge - volumes: - - name: dev - hostPath: - path: /dev - restartPolicy: Always ---- -apiVersion: v1 -kind: Service -metadata: - annotations: - prometheus.io/scrape: 'true' - prometheus.io/app-metrics: 'true' - prometheus.io/port: '9010' - name: neuron-monitor - namespace: kube-system - labels: - app: neuron-monitor -spec: - clusterIP: None - ports: - - name: neuron-monitor - port: 9010 - protocol: TCP - selector: - app: neuron-monitor - type: ClusterIP