diff --git a/.devcontainer/demo/README.md b/.devcontainer/demo/README.md deleted file mode 100644 index 1221fbeffee..00000000000 --- a/.devcontainer/demo/README.md +++ /dev/null @@ -1,24 +0,0 @@ -# Run - -Run this when the script completes - -```bash -# If you are at the root of the repo -cd .devcontainer/demo -./armadactl create queue test --priority-factor 1 -./armadactl submit jobs.yaml -./armadactl watch test job-set-1 -``` - -# View Lookout - -If you want to view lookout, click the bottom right icon and select "Open in Vscode Desktop". - -Forward these ports: - -- 8089: Lookout -- 8082: Binoculars -- 10000: Lookoutv2 API -- 8080: Armada Server API - -and go to: http://localhost:8089 diff --git a/.devcontainer/demo/devcontainer.json b/.devcontainer/demo/devcontainer.json deleted file mode 100644 index 027ea964a92..00000000000 --- a/.devcontainer/demo/devcontainer.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "image": "mcr.microsoft.com/devcontainers/universal:2", - "features": { - "ghcr.io/mpriscella/features/kind:1": {} - }, - "postAttachCommand": "./start.sh", - "customizations": { - "vscode": { - "extensions": [ - "golang.go", - "ms-python.python" - ] - } - }, - // Open workspace in .devcontainer/demo folder - "workspaceFolder": "/workspaces/armada/.devcontainer/demo", - "name": "Demo" - } \ No newline at end of file diff --git a/.devcontainer/demo/jobs.yaml b/.devcontainer/demo/jobs.yaml deleted file mode 100644 index 47473cfe3c8..00000000000 --- a/.devcontainer/demo/jobs.yaml +++ /dev/null @@ -1,24 +0,0 @@ -queue: test -jobSetId: job-set-1 -jobs: - - priority: 0 - namespace: personal-anonymous - podSpec: - terminationGracePeriodSeconds: 0 - restartPolicy: Never - containers: - - name: sleep - imagePullPolicy: IfNotPresent - image: alpine:latest - command: - - sh - - -c - args: - - sleep $(( (RANDOM % 30) + 30 )) - resources: - limits: - memory: 64Mi - cpu: 150m - requests: - memory: 64Mi - cpu: 150m \ No newline at end of file diff --git a/.devcontainer/demo/start.sh b/.devcontainer/demo/start.sh deleted file mode 100755 index 0dbff52073f..00000000000 --- a/.devcontainer/demo/start.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -sh ../../docs/local/armadactl.sh - -cd ../../ - -export ARMADA_IMAGE=gresearchdev/armada-full-bundle-dev -export ARMADA_IMAGE_TAG=a08830b4911c6784d67d1227367f8505243fd167 - -# Install mage -go install github.com/magefile/mage@latest - -# Run the demo -mage localdev no-build - diff --git a/README.md b/README.md index 4f0ff26c5a9..a0f4c348e8c 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,13 @@ For an overview of Armada, see the following videos: The Armada project adheres to the CNCF [Code of Conduct](https://github.com/cncf/foundation/blob/master/code-of-conduct.md). +## Installation + +For installation instructions, easiest way is to use the Armada Operator. +For more information, see the [Armada Operator repository](https://github.com/armadaproject/armada-operator). + +Alternatively, you can install Armada manually by using the Helm charts defined in the `deployment` directory. + ## Documentation For documentation, see the following: diff --git a/deployment/armada-bundle/.helmignore b/deployment/armada-bundle/.helmignore deleted file mode 100644 index 0e8a0eb36f4..00000000000 --- a/deployment/armada-bundle/.helmignore +++ /dev/null @@ -1,23 +0,0 @@ -# Patterns to ignore when building packages. -# This supports shell glob matching, relative path matching, and -# negation (prefixed with !). Only one pattern per line. -.DS_Store -# Common VCS dirs -.git/ -.gitignore -.bzr/ -.bzrignore -.hg/ -.hgignore -.svn/ -# Common backup files -*.swp -*.bak -*.tmp -*.orig -*~ -# Various IDEs -.project -.idea/ -*.tmproj -.vscode/ diff --git a/deployment/armada-bundle/Chart.lock b/deployment/armada-bundle/Chart.lock deleted file mode 100644 index f9da1510436..00000000000 --- a/deployment/armada-bundle/Chart.lock +++ /dev/null @@ -1,33 +0,0 @@ -dependencies: -- name: redis-ha - repository: https://dandydeveloper.github.io/charts - version: 4.15.0 -- name: postgresql - repository: https://charts.bitnami.com/bitnami - version: 11.1.27 -- name: armada-lookout-migration - repository: https://armadaproject.github.io/charts/ - version: v0.3.20 -- name: stan - repository: https://nats-io.github.io/k8s/helm/charts - version: 0.13.0 -- name: pulsar - repository: https://pulsar.apache.org/charts - version: 2.9.3 -- name: armada - repository: https://armadaproject.github.io/charts/ - version: v0.3.36 -- name: armada-executor - repository: https://armadaproject.github.io/charts/ - version: v0.3.36 -- name: armada-lookout - repository: https://armadaproject.github.io/charts/ - version: v0.3.36 -- name: armada-lookout-ingester - repository: https://armadaproject.github.io/charts/ - version: v0.3.36 -- name: executor-cluster-monitoring - repository: https://armadaproject.github.io/charts - version: v0.1.9 -digest: sha256:07796bf433ad7f1f17836c7032692d6dc617fee16a34a05a9050ed3671d8fe81 -generated: "2022-10-26T17:56:44.975573+02:00" diff --git a/deployment/armada-bundle/Chart.yaml b/deployment/armada-bundle/Chart.yaml deleted file mode 100644 index 6da7c06d0bf..00000000000 --- a/deployment/armada-bundle/Chart.yaml +++ /dev/null @@ -1,46 +0,0 @@ -apiVersion: v2 -description: A helm chart which bundles Armada components -name: armada-bundle -version: 0.0.1 -appVersion: 0.0.0-latest -dependencies: - - name: redis-ha - version: 4.15.0 - repository: https://dandydeveloper.github.io/charts - condition: dependencies.redis-ha - - name: postgresql - version: 11.1.27 - repository: https://charts.bitnami.com/bitnami - condition: dependencies.postgresql - - name: armada-lookout-migration - version: v0.3.20 - repository: https://g-research.github.io/charts - condition: dependencies.armada-lookout-migration - - name: stan - version: 0.13.0 - repository: https://nats-io.github.io/k8s/helm/charts - condition: dependencies.stan - - name: pulsar - version: 2.9.3 - repository: https://pulsar.apache.org/charts - condition: dependencies.pulsar - - name: armada - version: v0.3.36 - repository: https://armadaproject.github.io/charts/ - condition: dependencies.armada-server - - name: armada-executor - version: v0.3.36 - repository: https://armadaproject.github.io/charts/ - condition: dependencies.armada-executor - - name: armada-lookout - version: v0.3.36 - repository: https://armadaproject.github.io/charts/ - condition: dependencies.armada-lookout - - name: armada-lookout-ingester - version: v0.3.36 - repository: https://armadaproject.github.io/charts/ - condition: dependencies.armada-lookout-ingester - - name: executor-cluster-monitoring - version: v0.1.9 - repository: https://armadaproject.github.io/charts - condition: dependencies.executor-cluster-monitoring diff --git a/deployment/armada-bundle/README.md b/deployment/armada-bundle/README.md deleted file mode 100644 index 24cacdade98..00000000000 --- a/deployment/armada-bundle/README.md +++ /dev/null @@ -1,131 +0,0 @@ -# armada-bundle - -![Version: 0.0.1](https://img.shields.io/badge/Version-0.0.1-informational?style=flat-square) ![AppVersion: 0.0.0-latest](https://img.shields.io/badge/AppVersion-0.0.0--latest-informational?style=flat-square) - -A helm chart which bundles Armada components - -## Requirements - -| Repository | Name | Version | -|------------|------|---------| -| https://charts.bitnami.com/bitnami | postgresql | 11.1.27 | -| https://dandydeveloper.github.io/charts | redis-ha | 4.15.0 | -| https://armadaproject.github.io/charts/ | armada | v0.3.36 | -| https://armadaproject.github.io/charts/ | armada-executor | v0.3.36 | -| https://armadaproject.github.io/charts/ | armada-lookout | v0.3.36 | -| https://armadaproject.github.io/charts/ | armada-lookout-ingester | v0.3.36 | -| https://armadaproject.github.io/charts/ | armada-lookout-migration | v0.3.20 | -| https://armadaproject.github.io/charts | executor-cluster-monitoring | v0.1.9 | -| https://nats-io.github.io/k8s/helm/charts | stan | 0.13.0 | -| https://pulsar.apache.org/charts | pulsar | 2.9.3 | - -## Values - -| Key | Type | Default | Description | -|-----|------|---------|-------------| -| armada-executor.applicationConfig.apiConnection.armadaUrl | string | `"armada.default.svc.cluster.local:50051"` | URL of Armada Server gRPC endpoint | -| armada-executor.applicationConfig.apiConnection.forceNoTls | bool | `true` | Only to be used for development purposes and in cases where Armada server does not have a certificate | -| armada-executor.applicationConfig.kubernetes.minimumPodAge | string | `"0s"` | | -| armada-executor.image.repository | string | `"gresearchdev/armada-executor"` | | -| armada-executor.image.tag | string | `"v0.3.36"` | | -| armada-executor.nodeSelector | string | `"nil"` | | -| armada-executor.prometheus.enabled | bool | `true` | Toggle whether to create ServiceMonitor for Armada Executor | -| armada-lookout-ingester.applicationConfig.postgres.connMaxLifetime | string | `"30m"` | Postgres connection max lifetime | -| armada-lookout-ingester.applicationConfig.postgres.connection.dbname | string | `"postgres"` | Postgres database | -| armada-lookout-ingester.applicationConfig.postgres.connection.host | string | `"postgresql.armada.svc.cluster.local"` | Postgres host | -| armada-lookout-ingester.applicationConfig.postgres.connection.password | string | `"psw"` | Postgres user password | -| armada-lookout-ingester.applicationConfig.postgres.connection.port | int | `5432` | Postgres port | -| armada-lookout-ingester.applicationConfig.postgres.connection.sslmode | string | `"disable"` | Postgres SSL mode | -| armada-lookout-ingester.applicationConfig.postgres.connection.user | string | `"postgres"` | Postgres username | -| armada-lookout-ingester.applicationConfig.postgres.maxIdleConns | int | `25` | Postgres max idle connections | -| armada-lookout-ingester.applicationConfig.postgres.maxOpenConns | int | `100` | Postgres max open connections | -| armada-lookout-ingester.applicationConfig.pulsar.URL | string | `"pulsar://pulsar-broker.armada.svc.cluster.local:6650"` | Pulsar connection string | -| armada-lookout-ingester.applicationConfig.pulsar.enabled | bool | `true` | Toggle whether to connect to Pulsar | -| armada-lookout-ingester.applicationConfig.pulsar.jobsetEventsTopic | string | `"persistent://armada/armada/events"` | | -| armada-lookout-ingester.image.repository | string | `"gresearchdev/armada-lookout-ingester-dev"` | | -| armada-lookout-ingester.image.tag | string | `"88ea8f0b8124c7dbbb44f7c7315c0fca13655f18"` | | -| armada-lookout-migration.applicationConfig.postgres.connMaxLifetime | string | `"30m"` | Postgres connection max lifetime | -| armada-lookout-migration.applicationConfig.postgres.connection.dbname | string | `"postgres"` | Postgres database | -| armada-lookout-migration.applicationConfig.postgres.connection.host | string | `"postgresql.armada.svc.cluster.local"` | Postgres host | -| armada-lookout-migration.applicationConfig.postgres.connection.password | string | `"psw"` | Postgres user password | -| armada-lookout-migration.applicationConfig.postgres.connection.port | int | `5432` | Postgres port | -| armada-lookout-migration.applicationConfig.postgres.connection.user | string | `"postgres"` | Postgres username | -| armada-lookout-migration.applicationConfig.postgres.maxIdleConns | int | `25` | Postgres max idle connections | -| armada-lookout-migration.applicationConfig.postgres.maxOpenConns | int | `100` | Postgres max open connections | -| armada-lookout-migration.clusterIssuer | string | `"letsencrypt-dev"` | | -| armada-lookout-migration.image.tag | string | `"v0.3.36"` | | -| armada-lookout-migration.ingressClass | string | `"nginx"` | | -| armada-lookout-migration.prometheus.enabled | bool | `true` | | -| armada-lookout.applicationConfig.disableEventProcessing | bool | `true` | Armada does not require a streaming backend anymore so this options turns off processing via streaming backend (Jetstream, SNAT) | -| armada-lookout.applicationConfig.eventQueue | string | `"ArmadaLookoutEventProcessor"` | | -| armada-lookout.applicationConfig.postgres.connection.dbname | string | `"postgres"` | Postgres database | -| armada-lookout.applicationConfig.postgres.connection.host | string | `"postgresql.armada.svc.cluster.local"` | Postgres host | -| armada-lookout.applicationConfig.postgres.connection.password | string | `"psw"` | Postgres user password | -| armada-lookout.applicationConfig.postgres.connection.port | int | `5432` | Postgres port | -| armada-lookout.applicationConfig.postgres.connection.user | string | `"postgres"` | Postgres username | -| armada-lookout.clusterIssuer | string | `"dev-ca"` | ClusterIssuer from whom a Let's Encrypt certificate will be requested | -| armada-lookout.hostnames | list | `[]` | Ingress hostnames | -| armada-lookout.image.repository | string | `"gresearchdev/armada-lookout"` | | -| armada-lookout.image.tag | string | `"v0.3.36"` | | -| armada-lookout.ingress.annotations | object | `{}` | Ingress annotations | -| armada-lookout.ingress.labels | object | `{}` | Ingress labels | -| armada-lookout.ingressClass | string | `"nginx"` | Ingress class | -| armada-lookout.prometheus.enabled | bool | `true` | Toggle whether to create a ServiceMonitor for Lookout | -| armada.applicationConfig.auth | object | `{}` | Armada auth config | -| armada.applicationConfig.eventsNats.QueueGroup | string | `"ArmadaEventRedisProcessor"` | | -| armada.applicationConfig.eventsNats.clusterId | string | `"armada-cluster"` | STAN cluster ID | -| armada.applicationConfig.eventsNats.servers | list | `[]` | events STAN URIs | -| armada.applicationConfig.eventsNats.subject | string | `"ArmadaTest"` | | -| armada.applicationConfig.eventsNats.timeout | string | `"10s"` | | -| armada.applicationConfig.eventsRedis.addrs | list | `[]` | events Redis cluster instance URLs | -| armada.applicationConfig.eventsRedis.masterName | string | `"mymaster"` | | -| armada.applicationConfig.eventsRedis.poolSize | int | `1000` | | -| armada.applicationConfig.grpcPort | int | `50051` | | -| armada.applicationConfig.httpPort | int | `8080` | | -| armada.applicationConfig.pulsar.URL | string | `"pulsar://pulsar-broker.armada.svc.cluster.local:6650"` | Pulsar connection string | -| armada.applicationConfig.pulsar.enabled | bool | `true` | Toggle whether to connect to Pulsar | -| armada.applicationConfig.redis.addrs | list | `[]` | master Redis cluster instance URLs | -| armada.applicationConfig.redis.masterName | string | `"mymaster"` | | -| armada.applicationConfig.scheduling.defaultJobTolerations | list | `[]` | default node tolerations for Armada jobs | -| armada.applicationConfig.scheduling.probabilityOfUsingNewScheduler | int | `1` | | -| armada.clusterIssuer | string | `"dev-ca"` | ClusterIssuer from whom a Let's Encrypt certificate will be requested | -| armada.hostnames | list | `[]` | | -| armada.image.repository | string | `"gresearchdev/armada-server"` | | -| armada.image.tag | string | `"v0.3.36"` | | -| armada.ingressClass | string | `"nginx"` | Ingress class | -| armada.nodePort | int | `30000` | | -| armada.prometheus.enabled | bool | `true` | Toggle whether to create a ServiceMonitor for Armada Server | -| dependencies.armada-executor | bool | `true` | Toggle whether to install Armada Executor | -| dependencies.armada-lookout | bool | `true` | Toggle whether to install Armada Lookout UI | -| dependencies.armada-lookout-ingester | bool | `true` | Toggle whether to install Armada Lookout Ingester | -| dependencies.armada-lookout-migration | bool | `true` | Toggle whether to install Armada Lookout migrations | -| dependencies.armada-server | bool | `true` | Toggle whether to install Armada Server | -| dependencies.executor-cluster-monitoring | bool | `true` | Toggle whether to install executor cluster monitoring rules (NOTE: requires Prometheus CRDs) | -| dependencies.postgresql | bool | `true` | Toggle whether to install PostgreSQL | -| dependencies.pulsar | bool | `true` | Toggle whether to install Pulsar | -| dependencies.redis-ha | bool | `true` | Toggle whether to install Redis HA cluster | -| dependencies.stan | bool | `true` | Toggle whether to install Streaming NATS | -| executor-cluster-monitoring.additionalLabels.app | string | `"prometheus-operator"` | | -| executor-cluster-monitoring.additionalLabels.release | string | `"prometheus-operator"` | | -| executor-cluster-monitoring.interval | string | `"5s"` | | -| postgresql.auth.postgresPassword | string | `"psw"` | | -| postgresql.fullnameOverride | string | `"postgresql"` | | -| pulsar.armadaInit.adminPort | int | `8080` | Pulsar admin (REST) port | -| pulsar.armadaInit.brokerHost | string | `"pulsar-broker.armada.svc.cluster.local"` | Pulsar Broker host | -| pulsar.armadaInit.enabled | bool | `false` | Toggle whether to enable the job which creates necessary Pulsar resources needed by Armada | -| pulsar.armadaInit.image.repository | string | `"apachepulsar/pulsar"` | Pulsar image which contains pulsar-admin | -| pulsar.armadaInit.image.tag | string | `"2.10.2"` | Pulsar image tag | -| pulsar.armadaInit.port | int | `6650` | Pulsar application port | -| pulsar.armadaInit.protocol | string | `"http"` | Protocol used for connecting to Pulsar Broker host (either `http` or `https`) | -| pulsar.fullnameOverride | string | `"pulsar"` | Fullname override for Pulsar release | -| pulsar.grafana.service.type | string | `"ClusterIP"` | Pulsar Grafana kubernetes service type | -| pulsar.initialize | bool | `true` | | -| pulsar.proxy.service.type | string | `"ClusterIP"` | Pulsar Proxy kubernetes service type | -| redis-ha.fullnameOverride | string | `"redis-ha"` | | -| redis-ha.hardAntiAffinity | bool | `false` | | -| redis-ha.persistentVolume.enabled | bool | `false` | | -| stan.nameOverride | string | `"stan"` | | -| stan.stan.clusterID | string | `"armada-cluster"` | Streaming NATS Cluster ID (set during install and saved in the persistence backend) | - ----------------------------------------------- -Autogenerated from chart metadata using [helm-docs v1.11.0](https://github.com/norwoodj/helm-docs/releases/v1.11.0) diff --git a/deployment/armada-bundle/templates/pulsar-init-job.yaml b/deployment/armada-bundle/templates/pulsar-init-job.yaml deleted file mode 100644 index ccd2163ecf6..00000000000 --- a/deployment/armada-bundle/templates/pulsar-init-job.yaml +++ /dev/null @@ -1,43 +0,0 @@ -{{- if .Values.pulsar.armadaInit.enabled }} -apiVersion: batch/v1 -kind: Job -metadata: - name: pulsar-init -spec: - template: - spec: - initContainers: - - name: wait-for-pulsar - image: alpine:3.16 - command: - - /bin/sh - - -c - - | - echo "Waiting for Pulsar... ({{ .Values.pulsar.armadaInit.brokerHost }}:{{ .Values.pulsar.armadaInit.port }})" - - while ! nc -z {{ .Values.pulsar.armadaInit.brokerHost }} {{ .Values.pulsar.armadaInit.port }}; do - sleep 1 - done - - echo "Pulsar started!" - containers: - - name: init-pulsar - image: {{ .Values.pulsar.armadaInit.image.repository }}:{{ .Values.pulsar.armadaInit.image.tag }} - {{- $pulsarAdminUrl := printf "%s://%s:%s" .Values.pulsar.armadaInit.protocol .Values.pulsar.armadaInit.brokerHost (toString .Values.pulsar.armadaInit.adminPort) }} - command: - - sh - - -c - - | - echo -e "Initializing pulsar ({{ $pulsarAdminUrl }})" - bin/pulsar-admin --admin-url {{ $pulsarAdminUrl }} tenants create armada - bin/pulsar-admin --admin-url {{ $pulsarAdminUrl }} namespaces create armada/armada - bin/pulsar-admin --admin-url {{ $pulsarAdminUrl }} topics delete-partitioned-topic persistent://armada/armada/events -f || true - bin/pulsar-admin --admin-url {{ $pulsarAdminUrl }} topics create-partitioned-topic persistent://armada/armada/events -p 2 - - # Disable topic auto-creation to ensure an error is thrown on using the wrong topic - # (Pulsar automatically created the public tenant and default namespace). - bin/pulsar-admin --admin-url {{ $pulsarAdminUrl }} namespaces set-auto-topic-creation public/default --disable - bin/pulsar-admin --admin-url {{ $pulsarAdminUrl }} namespaces set-auto-topic-creation armada/armada --disable - restartPolicy: Never - backoffLimit: 0 -{{- end }} diff --git a/deployment/armada-bundle/values.yaml b/deployment/armada-bundle/values.yaml deleted file mode 100644 index 74d975288c2..00000000000 --- a/deployment/armada-bundle/values.yaml +++ /dev/null @@ -1,267 +0,0 @@ -# Default values for armada-bundle. -# This is a YAML-formatted file. -# Declare variables to be passed into your templates. - -dependencies: - # -- Toggle whether to install Armada Server - armada-server: true - # -- Toggle whether to install Armada Executor - armada-executor: true - # -- Toggle whether to install Armada Lookout UI - armada-lookout: true - # -- Toggle whether to install Armada Lookout migrations - armada-lookout-migration: true - # -- Toggle whether to install Armada Lookout Ingester - armada-lookout-ingester: true - # -- Toggle whether to install Redis HA cluster - redis-ha: true - # -- Toggle whether to install Streaming NATS - stan: true - # -- Toggle whether to install Pulsar - pulsar: true - # -- Toggle whether to install executor cluster monitoring rules (NOTE: requires Prometheus CRDs) - executor-cluster-monitoring: true - # -- Toggle whether to install PostgreSQL - postgresql: true - -redis-ha: - fullnameOverride: redis-ha - hardAntiAffinity: false - persistentVolume: - enabled: false - -postgresql: - fullnameOverride: postgresql - auth: - postgresPassword: psw - -stan: - stan: - # -- Streaming NATS Cluster ID (set during install and saved in the persistence backend) - clusterID: "armada-cluster" - nameOverride: stan - -armada: - image: - repository: gresearchdev/armada-server - tag: v0.3.36 - # -- Ingress class - ingressClass: nginx - # -- ClusterIssuer from whom a Let's Encrypt certificate will be requested - clusterIssuer: dev-ca - hostnames: [] - - applicationConfig: - grpcPort: 50051 - httpPort: 8080 - # -- Armada auth config - auth: {} -# Here is an example auth config which disables basic auth and allows anybody to submit jobs -# NOTE: This setup should never be used in production environments -# basicAuth: -# enableAuthentication: false -# anonymousAuth: true -# permissionGroupMapping: -# submit_any_jobs: [ "everyone" ] -# create_queue: [ "everyone" ] -# delete_queue: [ "everyone" ] -# cancel_any_jobs: [ "everyone" ] -# reprioritize_any_jobs: [ "everyone" ] -# watch_all_events: [ "everyone" ] -# execute_jobs: [ "everyone" ] - redis: - masterName: "mymaster" - # -- master Redis cluster instance URLs - addrs: [] -# - "redis-ha-announce-0.armada.svc.cluster.local:26379" -# - "redis-ha-announce-1.armada.svc.cluster.local:26379" -# - "redis-ha-announce-2.armada.svc.cluster.local:26379" - eventsRedis: - masterName: "mymaster" - # -- events Redis cluster instance URLs - addrs: [] -# - "redis-ha-announce-0.armada.svc.cluster.local:26379" -# - "redis-ha-announce-1.armada.svc.cluster.local:26379" -# - "redis-ha-announce-2.armada.svc.cluster.local:26379" - poolSize: 1000 - eventsNats: - timeout: 10s - # -- events STAN URIs - servers: [] -# - "nats://stan.armada.svc.cluster.local:4222" - # -- STAN cluster ID - clusterId: "armada-cluster" - subject: "ArmadaTest" - QueueGroup: "ArmadaEventRedisProcessor" - scheduling: - probabilityOfUsingNewScheduler: 1 - # -- default node tolerations for Armada jobs - defaultJobTolerations: [] -# - key: armada.io/batch -# operator: Exists -# effect: NoSchedule - pulsar: - # -- Toggle whether to connect to Pulsar - enabled: true - # -- Pulsar connection string - URL: "pulsar://pulsar-broker.armada.svc.cluster.local:6650" - prometheus: - # -- Toggle whether to create a ServiceMonitor for Armada Server - enabled: true - - nodePort: 30000 - -armada-executor: - image: - repository: gresearchdev/armada-executor - tag: v0.3.36 - nodeSelector: nil - applicationConfig: - apiConnection: - # -- URL of Armada Server gRPC endpoint - armadaUrl: armada.default.svc.cluster.local:50051 - ## Please note that this setting is insecure - ## Do not use this setting in a production environment - ## This should only be used for the quickstart and local testing - # -- Only to be used for development purposes and in cases where Armada server does not have a certificate - forceNoTls: true - kubernetes: - minimumPodAge: 0s - - prometheus: - # -- Toggle whether to create ServiceMonitor for Armada Executor - enabled: true - -executor-cluster-monitoring: - additionalLabels: - app: prometheus-operator - release: prometheus-operator - - interval: 5s - -armada-lookout: - image: - repository: gresearchdev/armada-lookout - tag: v0.3.36 - # -- Ingress class - ingressClass: nginx - ingress: - # -- Ingress annotations - annotations: {} - # -- Ingress labels - labels: {} - # -- ClusterIssuer from whom a Let's Encrypt certificate will be requested - clusterIssuer: dev-ca - # -- Ingress hostnames - hostnames: [] - - prometheus: - # -- Toggle whether to create a ServiceMonitor for Lookout - enabled: true - - applicationConfig: - # -- Armada does not require a streaming backend anymore so this options turns off processing via streaming backend (Jetstream, SNAT) - disableEventProcessing: true - eventQueue: "ArmadaLookoutEventProcessor" - postgres: - connection: - # -- Postgres host - host: postgresql.armada.svc.cluster.local - # -- Postgres port - port: 5432 - # -- Postgres username - user: postgres - # -- Postgres user password - password: psw - # -- Postgres database - dbname: postgres - -armada-lookout-migration: - image: - tag: v0.3.36 - ingressClass: nginx - clusterIssuer: letsencrypt-dev - - prometheus: - enabled: true - - applicationConfig: - postgres: - # -- Postgres max open connections - maxOpenConns: 100 - # -- Postgres max idle connections - maxIdleConns: 25 - # -- Postgres connection max lifetime - connMaxLifetime: 30m - connection: - # -- Postgres host - host: postgresql.armada.svc.cluster.local - # -- Postgres port - port: 5432 - # -- Postgres username - user: postgres - # -- Postgres user password - password: psw - # -- Postgres database - dbname: postgres - -armada-lookout-ingester: - image: - repository: gresearchdev/armada-lookout-ingester-dev - tag: 88ea8f0b8124c7dbbb44f7c7315c0fca13655f18 - applicationConfig: - postgres: - # -- Postgres max open connections - maxOpenConns: 100 - # -- Postgres max idle connections - maxIdleConns: 25 - # -- Postgres connection max lifetime - connMaxLifetime: 30m - connection: - # -- Postgres host - host: postgresql.armada.svc.cluster.local - # -- Postgres port - port: 5432 - # -- Postgres username - user: postgres - # -- Postgres user password - password: psw - # -- Postgres database - dbname: postgres - # -- Postgres SSL mode - sslmode: disable - pulsar: - # -- Toggle whether to connect to Pulsar - enabled: true - # -- Pulsar connection string - URL: "pulsar://pulsar-broker.armada.svc.cluster.local:6650" - jobsetEventsTopic: "persistent://armada/armada/events" - -pulsar: - armadaInit: - # -- Toggle whether to enable the job which creates necessary Pulsar resources needed by Armada - enabled: false - image: - # -- Pulsar image which contains pulsar-admin - repository: apachepulsar/pulsar - # -- Pulsar image tag - tag: 2.10.2 - # -- Pulsar Broker host - brokerHost: pulsar-broker.armada.svc.cluster.local - # -- Protocol used for connecting to Pulsar Broker host (either `http` or `https`) - protocol: http - # -- Pulsar admin (REST) port - adminPort: 8080 - # -- Pulsar application port - port: 6650 - proxy: - service: - # -- Pulsar Proxy kubernetes service type - type: ClusterIP - grafana: - service: - # -- Pulsar Grafana kubernetes service type - type: ClusterIP - initialize: true - # -- Fullname override for Pulsar release - fullnameOverride: pulsar diff --git a/deployment/armada-bundle/values/example.values.yaml b/deployment/armada-bundle/values/example.values.yaml deleted file mode 100644 index c96c3e8c70f..00000000000 --- a/deployment/armada-bundle/values/example.values.yaml +++ /dev/null @@ -1,273 +0,0 @@ -# Default values for armada-bundle. -# This is a YAML-formatted file. -# Declare variables to be passed into your templates. - -dependencies: - armada-server: true - armada-executor: true - armada-lookout: true - armada-lookout-migration: true - armada-lookout-ingester: true - redis-ha: true - stan: true - pulsar: true - executor-cluster-monitoring: true - postgresql: true - -redis-ha: - fullnameOverride: redis-ha - hardAntiAffinity: false - persistentVolume: - enabled: false - -postgresql: - fullnameOverride: postgresql - auth: - postgresPassword: psw - -stan: - stan: - # -- Streaming NATS Cluster ID (set during install and saved in the persistence backend) - clusterID: "armada-cluster" - nameOverride: stan - -kube-prometheus-stack: - alertmanager: - enabled: false - - prometheus: - prometheusSpec: - serviceMonitorSelectorNilUsesHelmValues: false - ruleSelectorNilUsesHelmValues: false - - grafana: - service: - type: NodePort - nodePort: 30001 - - prometheusOperator: - admissionWebhooks: - enabled: false - tls: - enabled: false - tlsProxy: - enabled: false - createCustomResource: false - -armada: - image: - repository: gresearchdev/armada-server - tag: v0.3.36 - # -- Ingress class - ingressClass: nginx - ingress: - labels: - # -- Ingress class as label, used usually as a workaround by external-dns as they currently do not support filtering by ingressClass field - kubernetes.io/ingress.class: nginx - # -- ClusterIssuer from whom a Let's Encrypt certificate will be requested - clusterIssuer: dev-ca - hostnames: [] - - applicationConfig: - grpcPort: 50051 - httpPort: 8080 - auth: - basicAuth: - enableAuthentication: false - anonymousAuth: true - permissionGroupMapping: - submit_any_jobs: [ "everyone" ] - create_queue: [ "everyone" ] - delete_queue: [ "everyone" ] - cancel_any_jobs: [ "everyone" ] - reprioritize_any_jobs: [ "everyone" ] - watch_all_events: [ "everyone" ] - execute_jobs: [ "everyone" ] - redis: - masterName: "mymaster" - addrs: - - "redis-ha-announce-0.armada.svc.cluster.local:26379" - - "redis-ha-announce-1.armada.svc.cluster.local:26379" - - "redis-ha-announce-2.armada.svc.cluster.local:26379" - eventsRedis: - masterName: "mymaster" - addrs: - - "redis-ha-announce-0.armada.svc.cluster.local:26379" - - "redis-ha-announce-1.armada.svc.cluster.local:26379" - - "redis-ha-announce-2.armada.svc.cluster.local:26379" - poolSize: 1000 - eventsNats: - timeout: 10s - servers: - - "nats://stan.armada.svc.cluster.local:4222" - clusterId: "armada-cluster" - subject: "ArmadaTest" - QueueGroup: "ArmadaEventRedisProcessor" - scheduling: - probabilityOfUsingNewScheduler: 1 - defaultJobTolerations: - - key: armada.io/batch - operator: Exists - effect: NoSchedule - pulsar: - # -- Toggle whether to connect to Pulsar - enabled: true - # -- Pulsar connection string - URL: "pulsar://pulsar-broker.armada.svc.cluster.local:6650" - prometheus: - # -- Toggle whether to create a ServiceMonitor for Armada Server - enabled: true - - nodePort: 30000 - -armada-executor: - image: - repository: gresearchdev/armada-executor - tag: v0.3.36 - nodeSelector: nil - applicationConfig: - apiConnection: - # -- URL of Armada Server gRPC endpoint - armadaUrl: armada.default.svc.cluster.local:50051 - ## Please note that this setting is insecure - ## Do not use this setting in a production environment - ## This should only be used for the quickstart and local testing - # -- Only to be used for development purposes and in cases where Armada server does not have a certificate - forceNoTls: true - kubernetes: - minimumPodAge: 0s - - prometheus: - # -- Toggle whether to create ServiceMonitor for Armada Executor - enabled: true - -executor-cluster-monitoring: - additionalLabels: - app: prometheus-operator - release: prometheus-operator - - interval: 5s - -armada-lookout: - image: - repository: gresearchdev/armada-lookout - tag: v0.3.36 - # -- Ingress class - ingressClass: nginx - ingress: - annotations: {} - labels: {} - # -- ClusterIssuer from whom a Let's Encrypt certificate will be requested - clusterIssuer: dev-ca - hostnames: [] - - prometheus: - # -- Toggle whether to create a ServiceMonitor for Lookout - enabled: true - - applicationConfig: - # -- Armada does not require a streaming backend anymore so this options turns off processing via streaming backend (Jetstream, SNAT) - disableEventProcessing: true - eventQueue: "ArmadaLookoutEventProcessor" - postgres: - connection: - # -- Postgres host - host: postgresql.armada.svc.cluster.local - # -- Postgres port - port: 5432 - # -- Postgres username - user: postgres - # -- Postgres user password - password: psw - # -- Postgres database - dbname: postgres - -armada-lookout-migration: - image: - tag: v0.3.36 - ingressClass: nginx - clusterIssuer: letsencrypt-dev - - prometheus: - enabled: true - - applicationConfig: - postgres: - # -- Postgres max open connections - maxOpenConns: 100 - # -- Postgres max idle connections - maxIdleConns: 25 - # -- Postgres connection max lifetime - connMaxLifetime: 30m - connection: - # -- Postgres host - host: postgresql.armada.svc.cluster.local - # -- Postgres port - port: 5432 - # -- Postgres username - user: postgres - # -- Postgres user password - password: psw - # -- Postgres database - dbname: postgres - -armada-lookout-ingester: - image: - repository: gresearchdev/armada-lookout-ingester-dev - tag: 88ea8f0b8124c7dbbb44f7c7315c0fca13655f18 - applicationConfig: - postgres: - # -- Postgres max open connections - maxOpenConns: 100 - # -- Postgres max idle connections - maxIdleConns: 25 - # -- Postgres connection max lifetime - connMaxLifetime: 30m - connection: - # -- Postgres host - host: postgresql.armada.svc.cluster.local - # -- Postgres port - port: 5432 - # -- Postgres username - user: postgres - # -- Postgres user password - password: psw - # -- Postgres database - dbname: postgres - # -- Postgres SSL mode - sslmode: disable - pulsar: - # -- Toggle whether to connect to Pulsar - enabled: true - # -- Pulsar connection string - URL: "pulsar://pulsar-broker.armada.svc.cluster.local:6650" - jobsetEventsTopic: "persistent://armada/armada/events" - -pulsar: - armadaInit: - # -- Toggle whether to enable the job which creates necessary Pulsar resources needed by Armada - enabled: false - image: - # -- Pulsar image which contains pulsar-admin - repository: apachepulsar/pulsar - # -- Pulsar image tag - tag: 2.10.2 - # -- Pulsar Broker host - brokerHost: pulsar-broker.armada.svc.cluster.local - # -- Protocol used for connecting to Pulsar Broker host (either `http` or `https`) - protocol: http - # -- Pulsar admin (REST) port - adminPort: 8080 - # -- Pulsar application port - port: 6650 - proxy: - service: - # -- Pulsar Proxy kubernetes service type - type: ClusterIP - grafana: - service: - # -- Pulsar Grafana kubernetes service type - type: ClusterIP - initialize: true - # -- Fullname override for Pulsar release - fullnameOverride: pulsar diff --git a/docs/demo.md b/docs/demo.md deleted file mode 100644 index 7b042944695..00000000000 --- a/docs/demo.md +++ /dev/null @@ -1,181 +0,0 @@ -# Armada Demo - -This guide will show you how to take a quick test drive of an Armada -instance already deployed to AWS EKS. - -## EKS - -The Armada UI (lookout) can be found at this URL: - -- [https://ui.demo.armadaproject.io](https://ui.demo.armadaproject.io) - -## Local prerequisites - -- Git -- Go 1.21 - -## Obtain the armada source -Clone [this](https://github.com/armadaproject/armada) repository: - -```bash -git clone https://github.com/armadaproject/armada.git -cd armada -``` -All commands are intended to be run from the root of the repository. - -## Armadactl configuration -Armadactl config files are structured as follows: -```yaml -currentContext: main # Default context to be used by Armadactl -contexts: - main: - armadaUrl: - execAuth: - cmd: - args: - - - test: - armadaUrl: - execAuth: - cmd: - args: - - -``` - -By default, armadactl assumes that a configuration file exists at `$HOME/.armadactl.yaml`. You can provide your own -config file by specifying `--config $CONFIG_FILE_PATH` when running armadactl. - -We also support a legacy armadactl config structure, although this will soon be deprecated: -```yaml -armadaUrl: -execAuth: - cmd: - args: - - -``` - -Under both structures, BasicAuth and various oidc auth methods are also supported. -See ApiConnectionDetails under pkg/client/connection.go for all supported auth methods. - -### Using contexts -It's quite common for Armada users to interact with multiple Armada instances, which we refer to as _contexts_. We have various -armadactl commands that allow users to get, set and view contexts. -```bash -armadactl config use-context test # Sets the context for future armadactl calls to "test" -armadactl config get-contexts # Gets all contexts defined in the current config -armadactl config current-context # Returns the current context -``` -Contexts are not supported with the legacy armadactl configuration. - -## Setup an easy-to-use alias -If you are on a Windows System, use a linux-supported terminal to run this command, for example [Git Bash](https://git-scm.com/downloads) or [Hyper](https://hyper.is/) -```bash -alias armadactl='go run cmd/armadactl/main.go' -``` - -## Create queues and jobs -Create queues, submit some jobs, and monitor progress: - -### Queue Creation -Use a unique name for the queue. Make sure you remember it for the next steps. -```bash -armadactl create queue $QUEUE_NAME --priority-factor 1 -armadactl create queue $QUEUE_NAME --priority-factor 2 -``` - -For queues created in this way, user and group owners of the queue have permissions to: -- submit jobs -- cancel jobs -- reprioritize jobs -- watch queue - -For more control, queues can be created via `armadactl create`, which allows for setting specific permission; see the following example. - -```bash -armadactl create -f ./docs/quickstart/queue-a.yaml -armadactl create -f ./docs/quickstart/queue-b.yaml -``` - -Make sure to manually edit both of these `yaml` files using a code or text editor before running the commands above. - -``` -name: $QUEUE_NAME -``` - -### Job Submission -``` -armadactl submit ./docs/quickstart/job-queue-a.yaml -armadactl submit ./docs/quickstart/job-queue-b.yaml -``` - -Make sure to manually edit both of these `yaml` files using a code or text editor before running the commands above. -``` -queue: $QUEUE_NAME -``` - -### Monitor Job Progress - -```bash -armadactl watch $QUEUE_NAME job-set-1 -``` -```bash -armadactl watch $QUEUE_NAME job-set-1 -``` - -Try submitting lots of jobs and see queues get built and processed: - -#### Windows (using Git Bash): - -Use a text editor of your choice. -Copy and paste the following lines into the text editor: -``` -#!/bin/bash - -for i in {1..50} -do - armadactl submit ./docs/quickstart/job-queue-a.yaml - armadactl submit ./docs/quickstart/job-queue-b.yaml -done -``` -Save the file with a ".sh" extension (e.g., myscript.sh) in the root directory of the project. -Open Git Bash, navigate to the project's directory using the 'cd' command, and then run the script by typing ./myscript.sh and pressing Enter. - -#### Linux: - -Open a text editor (e.g., Nano or Vim) in the terminal and create a new file by running: nano myscript.sh (replace "nano" with your preferred text editor if needed). -Copy and paste the script content from above into the text editor. -Save the file and exit the text editor. -Make the script file executable by running: chmod +x myscript.sh. -Run the script by typing ./myscript.sh in the terminal and pressing Enter. - -#### macOS: - -Follow the same steps as for Linux, as macOS uses the Bash shell by default. -With this approach, you create a shell script file that contains your multi-line script, and you can run it as a whole by executing the script file in the terminal. - -## Observing job progress - -CLI: - -```bash -$ armadactl watch queue-a job-set-1 -Watching job set job-set-1 -Nov 4 11:43:36 | Queued: 0, Leased: 0, Pending: 0, Running: 0, Succeeded: 0, Failed: 0, Cancelled: 0 | event: *api.JobSubmittedEvent, job id: 01drv3mey2mzmayf50631tzp9m -Nov 4 11:43:36 | Queued: 1, Leased: 0, Pending: 0, Running: 0, Succeeded: 0, Failed: 0, Cancelled: 0 | event: *api.JobQueuedEvent, job id: 01drv3mey2mzmayf50631tzp9m -Nov 4 11:43:36 | Queued: 1, Leased: 0, Pending: 0, Running: 0, Succeeded: 0, Failed: 0, Cancelled: 0 | event: *api.JobSubmittedEvent, job id: 01drv3mf7b6fd1rraeq1f554fn -Nov 4 11:43:36 | Queued: 2, Leased: 0, Pending: 0, Running: 0, Succeeded: 0, Failed: 0, Cancelled: 0 | event: *api.JobQueuedEvent, job id: 01drv3mf7b6fd1rraeq1f554fn -Nov 4 11:43:38 | Queued: 1, Leased: 1, Pending: 0, Running: 0, Succeeded: 0, Failed: 0, Cancelled: 0 | event: *api.JobLeasedEvent, job id: 01drv3mey2mzmayf50631tzp9m -Nov 4 11:43:38 | Queued: 0, Leased: 2, Pending: 0, Running: 0, Succeeded: 0, Failed: 0, Cancelled: 0 | event: *api.JobLeasedEvent, job id: 01drv3mf7b6fd1rraeq1f554fn -Nov 4 11:43:38 | Queued: 0, Leased: 1, Pending: 1, Running: 0, Succeeded: 0, Failed: 0, Cancelled: 0 | event: *api.JobPendingEvent, job id: 01drv3mey2mzmayf50631tzp9m -Nov 4 11:43:38 | Queued: 0, Leased: 0, Pending: 2, Running: 0, Succeeded: 0, Failed: 0, Cancelled: 0 | event: *api.JobPendingEvent, job id: 01drv3mf7b6fd1rraeq1f554fn -Nov 4 11:43:41 | Queued: 0, Leased: 0, Pending: 1, Running: 1, Succeeded: 0, Failed: 0, Cancelled: 0 | event: *api.JobRunningEvent, job id: 01drv3mf7b6fd1rraeq1f554fn -Nov 4 11:43:41 | Queued: 0, Leased: 0, Pending: 0, Running: 2, Succeeded: 0, Failed: 0, Cancelled: 0 | event: *api.JobRunningEvent, job id: 01drv3mey2mzmayf50631tzp9m -Nov 4 11:44:17 | Queued: 0, Leased: 0, Pending: 0, Running: 1, Succeeded: 1, Failed: 0, Cancelled: 0 | event: *api.JobSucceededEvent, job id: 01drv3mf7b6fd1rraeq1f554fn -Nov 4 11:44:26 | Queued: 0, Leased: 0, Pending: 0, Running: 0, Succeeded: 2, Failed: 0, Cancelled: 0 | event: *api.JobSucceededEvent, job id: 01drv3mey2mzmayf50631tzp9m -``` - -Web UI: - -Open [https://ui.demo.armadaproject.io](https://ui.demo.armadaproject.io) in your browser. - -![Lookout UI](./quickstart/img/lookout.png "Lookout UI") diff --git a/docs/developer/aws-ec2.md b/docs/developer/aws-ec2.md deleted file mode 100644 index 66ffdbe468e..00000000000 --- a/docs/developer/aws-ec2.md +++ /dev/null @@ -1,236 +0,0 @@ -# EC2 Developer Setup - -## Background - -For development, you might want to set up an Amazon EC2 instance as the resource requirements for Armada are substantial. A typical Armada installation requires a system with at least 16GB of memory to perform well. Running Armada on a laptop made before ~2017 will typically eat battery life and result in a slower UI. - -Note: As of June 2022, not all Armada dependencies reliably build on a Mac M1 using standard package management. So if you have an M1 Mac, working on EC2 or another external server is your best bet. - -## Instructions - -- We suggest a t3.xlarge instance from aws ec2 with AmazonLinux as the OS. 16 GB of memory is suggested. -- During selection of instance, Add a large volume to your ec2 instance. 100 gb of storage is recommended. -- When selecting the instance, you will have the opportunity to choose a security group. You may need to make a new one. Be sure to add a rule allowing inbound communication on port 22 so that you can access your server via SSH. We recommend that you restrict access to the IP address from which you access the Internet, or a small CIDR block containing it. - -If you want to use your browser to access Armada Lookout UI or other web-based interfaces, you will also need to grant access to their respective ports. For added security, consider using an [SSH tunnel](https://www.ssh.com/academy/ssh/tunneling/example) from your local machine to your development server instead of opening those ports. You can add LocalForward to your ssh config: `LocalForward 4000 localhost:3000` - -- ### Install [Docker](https://www.cyberciti.biz/faq/how-to-install-docker-on-amazon-linux-2/) - -The procedure to install Docker on AMI 2 (Amazon Linux 2) running on either EC2 or Lightsail instance is as follows: - -1. Login into remote AWS server using the ssh command: - -``` -ssh ec2-user@ec2-ip-address-dns-name-here -``` - -2. Apply pending updates using the yum command: - -``` -sudo yum update -``` - -3. Search for Docker package: - -``` -sudo yum search docker -``` - -4. Get version information: - -``` -sudo yum info docker -``` -

- -

- -5. Install docker, run: - -``` -sudo yum install docker -``` - -6. Add group membership for the default ec2-user so you can run all docker commands without using the sudo command: - -``` -sudo usermod -a -G docker ec2-user -id ec2-user -# Reload a Linux user's group assignments to docker w/o logout -newgrp docker -``` - - -- ### Install [docker-compose](https://www.cyberciti.biz/faq/how-to-install-docker-on-amazon-linux-2/) - -```bash -$ cd $HOME/.docker -$ mkdir cli-plugins -$ cd cli-plugins -$ curl -SL https://github.com/docker/compose/releases/download/v2.17.3/docker-compose-linux-x86_64 -o docker-compose -$ chmod 755 docker-compose -``` - -Then verify it with: - -```bash -docker-compose version -``` - -- ### Getting the [Docker Compose Plugin](https://docs.docker.com/compose/install/linux/#install-the-plugin-manually) - -Armadas setup assumes You have the docker compose plugin installed. If you do not have it installed, you can use the following guide: - -* https://docs.docker.com/compose/install/linux/#install-the-plugin-manually - -Then test it with: - -```bash -docker compose version -``` - - -- ### Install [Go](https://go.dev/doc/install) - -ssh into your EC2 instance, become root and download the go package from [golang.org](https://go.dev/doc/install). - -1. Extract the archive you downloaded into /usr/local, creating a Go tree in /usr/local/go with the following command: - -``` -rm -rf /usr/local/go && tar -C /usr/local -xzf go1.21.1.linux-amd64.tar.gz -``` - -2. Configure .bashrc - -Switch back to ec2-user and add the following line to your ~/.bashrc file - -``` -export PATH=$PATH:/usr/local/go/bin -``` - -3. Configure go Environment - -Add the following lines to your ~/.bashrc file as well, also create a golang folder under /home/ec2-user. - -``` -# Go envs -export GOVERSION=go1.21.1 -export GO_INSTALL_DIR=/usr/local/go -export GOROOT=$GO_INSTALL_DIR -export GOPATH=/home/ec2-user/golang -export PATH=$GOROOT/bin:$GOPATH/bin:$PATH -export GO111MODULE="on" -export GOSUMDB=off -``` - -4. Test go - -Verify that you’ve installed Go by opening a command prompt and typing the following command: - -``` -go version -go version go1.21.1 linux/amd64 -``` - -- ### Install [Kind](https://dev.to/rajitpaul_savesoil/setup-kind-kubernetes-in-docker-on-linux-3kbd) - -1. Install Kind - -``` -go install sigs.k8s.io/kind@v0.11.1 -# You can replace v0.11.1 with the latest stable kind version -``` - -2. Move the KinD Binary to /usr/local/bin - -``` -- You can find the kind binary inside the directory go/bin -- Move it to /usr/local/bin - mv go/bin/kind /usr/local/bin -- Make sure you have a path setup for /usr/local/bin -``` - -- ### Install [kubectl](https://dev.to/rajitpaul_savesoil/setup-kind-kubernetes-in-docker-on-linux-3kbd) - -1. Install Latest Version of Kubectl: - -``` -curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" -chmod +x kubectl -mv kubectl /usr/local/bin -``` - -- ### Install [helm](https://helm.sh/docs/intro/install/) - -1. Install helm: - -``` -curl -sSL https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 | bash -``` - -2. We can verify the version - -``` -helm version --short -``` - -- ### Install [python3 (>= 3.7)](https://www.geeksforgeeks.org/how-to-install-python3-on-aws-ec2/) - -1. Check if Python is already installed or not on our AWS EC2. - -``` -python --version -``` - -

- -

- -2. At first update, Ubuntu packages by using the following command. - -``` -sudo apt update -``` - -

- -

- -3. If Python3 is not installed on your AWS EC2, then install Python3 using the following command. - -``` -sudo apt-get install python3.7 -``` - -4. We have successfully installed Python3 on AWS EC2, to check if Python3 is successfully installed or not, verify using the following command. - -``` -python3 --version -``` - -- ### Install .NET for [linux](https://docs.microsoft.com/en-us/dotnet/core/install/linux-centos) - -1. Before you install .NET, run the following commands to add the Microsoft package signing key to your list of trusted keys and add the Microsoft package repository. Open a terminal and run the following commands: - -``` -sudo rpm -Uvh https://packages.microsoft.com/config/centos/7/packages-microsoft-prod.rpm -``` - -2. Install the SDK - -``` -sudo yum install dotnet-sdk-7.0 -``` - -3. Install the runtime - -``` -sudo yum install aspnetcore-runtime-7.0 -``` - -- ### We suggest using the [remote code extension](https://code.visualstudio.com/docs/remote/ssh) for VS Code if that is your IDE of choice. - -

- -

- -- ### Please see [Our Developer Docs](../developer.md) for more information on how to get started with the codebase. diff --git a/docs/developer/etcd-localdev.md b/docs/developer/etcd-localdev.md deleted file mode 100644 index fe8c2e309a8..00000000000 --- a/docs/developer/etcd-localdev.md +++ /dev/null @@ -1,74 +0,0 @@ -# Inspect and Debugging etcd in Localdev setup - -When developing or testing Armada in the Localdev setup, it's sometimes helpful -to directly query the etcd database to gather various statistics. However, by -default, the `kind` tool (for running Kubernetes clusters inside a local Docker -side) does not expose the etcd interface for direct querying. The following -steps give a solution for querying an etcd instance running inside of `kind`. - - -First, verify the running nodes and podes. -```bash -$ kubectl get nodes -A -NAME STATUS ROLES AGE VERSION -armada-test-control-plane Ready control-plane 78m v1.24.7 -armada-test-worker Ready 77m v1.24.7 - -$ kubectl get pods -A -NAMESPACE NAME READY STATUS RESTARTS AGE -ingress-nginx ingress-nginx-admission-create-9xnpn 0/1 Completed 0 78m -ingress-nginx ingress-nginx-admission-patch-phkgm 0/1 Completed 1 78m -ingress-nginx ingress-nginx-controller-646df5f698-zbgqz 1/1 Running 0 78m -kube-system coredns-6d4b75cb6d-9z87w 1/1 Running 0 79m -kube-system coredns-6d4b75cb6d-flz4r 1/1 Running 0 79m -kube-system etcd-armada-test-control-plane 1/1 Running 0 79m -kube-system kindnet-nx952 1/1 Running 0 79m -kube-system kindnet-rtqkc 1/1 Running 0 79m -kube-system kube-apiserver-armada-test-control-plane 1/1 Running 0 79m -kube-system kube-controller-manager-armada-test-control-plane 1/1 Running 0 79m -kube-system kube-proxy-cwl2r 1/1 Running 0 79m -kube-system kube-proxy-wjqft 1/1 Running 0 79m -kube-system kube-scheduler-armada-test-control-plane 1/1 Running 0 79m -local-path-storage local-path-provisioner-6b84c5c67f-22m8m 1/1 Running 0 79m -``` -You should see an etcd control plane pod in the list of pods. - -Copy the etcdclient deployment YAML into the cluster control plane node: - -```bash -$ docker cp developer/config/etcdclient.yaml armada-test-control-plane:/ -``` - -Then, open a shell in the control plane node: -```bash -$ docker exec -it -u 0 --privileged armada-test-control-plane /bin/bash -``` - -In the container shell, move the deployment YAML file to the Kubernetes deployments source -directory. Kubernetes (Kind) will notice the file's appearance and will deploy -the new pod. -```bash -root@armada-test-control-plane:/# mv etcdclient.yaml /etc/kubernetes/manifests/ -root@armada-test-control-plane:/# exit -$ kubectl get pods -A -``` -You should see an etcdclient pod running. - -Open a shell in the new etcdclient utility pod, and start using `etcdctl` to query etcd. -```bash -$ kubectl exec -n kube-system -it etcdclient-armada-test-control-plane -- sh -/ # etcdctl endpoint status -w table -+-------------------------+------------------+---------+---------+-----------+-----------+------------+ -| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX | -+-------------------------+------------------+---------+---------+-----------+-----------+------------+ -| https://172.19.0.2:2379 | d7380397c3ec4b90 | 3.5.3 | 3.9 MB | true | 2 | 16727 | -+-------------------------+------------------+---------+---------+-----------+-----------+------------+ -/ # -/ # exit -``` -At this point, you can use `etcdctl` to query etcd for key-value pairs, get the health and/or metrics -of the etcd server. - -## References - -https://mauilion.dev/posts/etcdclient/ diff --git a/docs/developer/manual-localdev.md b/docs/developer/manual-localdev.md deleted file mode 100644 index 24321f740d6..00000000000 --- a/docs/developer/manual-localdev.md +++ /dev/null @@ -1,75 +0,0 @@ -# Manual Local Development - -Here, we give an overview of a development setup for Armada that gives users full control over the Armada components and dependencies. - -Before starting, please ensure you have installed [Go](https://go.dev/doc/install) (version 1.21 or later), gcc (for Windows, see, e.g., [tdm-gcc](https://jmeubank.github.io/tdm-gcc/)), [mage](https://magefile.org/), [docker](https://docs.docker.com/get-docker/), [kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl), and, if you need to compile `.proto` files, [protoc](https://github.com/protocolbuffers/protobuf/releases). - -For a full list of mage commands, run `mage -l`. - -## Setup - -### Note for Arm/M1 Mac Users - -You will need to set the `PULSAR_IMAGE` enviromental variable to an arm64 image. - -We provide an optimised image for this purpose: - -```bash -export PULSAR_IMAGE=richgross/pulsar:2.11.0 -``` - -```bash -# Download Go dependencies. -go mod tidy - -# Install necessary tooling. -mage BootstrapTools - -# Compile .pb.go files from .proto files -# (only necessary after changing a .proto file). -mage proto -mage dotnet - -# Build the Docker images containing all Armada components. -# Only the main "bundle" is needed for quickly testing Armada. -mage buildDockers "bundle,lookout-bundle,jobservice" - -# Setup up a kind (i.e., Kubernetes-in-Docker) cluster; see -# https://kind.sigs.k8s.io/ for details. -mage Kind - -# Start necessary dependencies. -# Verify that dependencies started successfully -# (check that Pulsar has fully started as it is quite slow (~ 1min )). -mage StartDependencies && mage checkForPulsarRunning - -# Start the Armada server and executor. -# Alternatively, run the Armada server and executor directly on the host, -# e.g., through your IDE; see below for details. -docker compose up -d server executor-legacy - -# Wait for Armada to come online -mage checkForArmadaRunning -``` - -Run the Armada test suite against the local environment to verify that it is working correctly. -```bash -# Create an Armada queue to submit jobs to. -go run cmd/armadactl/main.go create queue e2e-test-queue - -# To allow Ingress tests to pass -export ARMADA_EXECUTOR_INGRESS_URL="http://localhost" -export ARMADA_EXECUTOR_INGRESS_PORT=5001 - -# Run the Armada test suite against the local environment. -go run cmd/testsuite/main.go test --tests "testsuite/testcases/basic/*" --junit junit.xml -``` - -Tear down the local environment using the following: -```bash -# Stop Armada components and dependencies. -docker compose down - -# Tear down the kind cluster. -mage KindTeardown -``` diff --git a/docs/developer/ubuntu-setup.md b/docs/developer/ubuntu-setup.md deleted file mode 100644 index db3733c6e50..00000000000 --- a/docs/developer/ubuntu-setup.md +++ /dev/null @@ -1,173 +0,0 @@ -# Setting up an Ubuntu Linux instance for Armada development - -## Introduction - -This document is a list of the steps, packages, and tweaks that need to be done to get an Ubuntu Linux -instance running, with all the tools needed for Armada development and testing. - -The packages and steps were verified on an AWS EC2 instance (type t3.xlarge, 4 vcpu, 16GB RAM, -150GB EBS disk), but should be essentially the same on any comparable hardware system. - -### Install Ubuntu Linux - -Install Ubuntu Linux 22.04 (later versions may work as well). The default package set should -work. If you are setting up a new AWS EC2 instance, the default Ubuntu 22.04 image works well. - -When installing, ensure that the network configuration allows: -- SSH traffic from your client IP(s) -- HTTP traffic -- HTTPS traffic - -Apply all recent updates: -``` -$ sudo apt update -$ sudo apt upgrade -``` -You will likely need to reboot after applying the updates: -``` -$ sudo shutdown -r now -``` -After logging in, clean up any old, unused packages: -``` -$ sudo apt autoremove -``` - -AWS usually creates new EC2 instances with a very small root partion (8GB), which will quickly -fill up when using containers, or doing any serious development. Creating a new, large EBS volume, and -attaching it to the instance, will give a system usable for container work. - -First, provision an EBS volume in the AWS Console - of at least 150GB, or more - and attach it to -the instance. You will need to create the EBS volume in the same availability zone as the EC2 -instance - you can find the latter's AZ by clicking on the 'Networking' tab in the details page -for the instance, and you should see the Availabilty Zone listed in that panel. Once you've created -the volume, attach it to the instance. - -Then, format a filesystem on the volume and mount it. First, determine what block device the -parition is on, by running the `lsblk` comand. There should be a line where the TYPE is 'disk' -and the size matches the size you specified when creating the volume - e.g. -``` -nvme1n1 259:4 0 150G 0 disk -``` -Create a filesystem on that device by running `mkfs`: -``` -$ sudo mkfs -t ext4 /dev/nvme1n1 -``` -Then set a label on the partition - here, we will give it a label of 'VOL1': -``` -$ sudo e2label /dev/nvme1n1 VOL1 -``` -Create the mount-point directory: -``` -$ sudo mkdir /vol1 -``` -Add the following line to the end of `/etc/fstab`, so it will be mounted upon reboot: -``` -LABEL=VOL1 /vol1 ext4 defaults 0 2 -``` -Then mount it by doing `sudo mount -a`, and confirm the available space by running `df -h` - the `/vol1` -filesystem should be listed. - -### Install Language/Tool Packages - -Install several development packages that aren't installed by default in the base system: -``` -$ sudo apt install gcc make unzip -``` - -### Install Go, Protobuffers, and kubectl tools -Install the Go compiler and associated tools. Currently, the latest version is 1.21.1, but there may -be newer versions: - -``` -$ curl --location -O https://go.dev/dl/go1.21.1.linux-amd64.tar.gz -$ sudo tar -C /usr/local -xzvf go1.21.1.linux-amd64.tar.gl -$ echo 'export PATH=$PATH:/usr/local/go/bin' > go.sh -$ sudo cp go.sh /etc/profile.d/ -``` -Then, log out and back in again, then run `go version` to verify your path is now correct. - -Install protoc: -``` -$ curl -O --location https://github.com/protocolbuffers/protobuf/releases/download/v23.3/protoc-23.3-linux-x86_64.zip -$ cd /usr/local && sudo unzip ~/protoc-23.3-linux-x86_64.zip -$ cd ~ -$ type protoc -``` - -Install kubectl: -``` -$ curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" -$ sudo cp kubectl /usr/local/bin -$ sudo chmod 755 /usr/local/bin/kubectl -$ kubectl version -``` - -### Install Docker - -Warning: do not install Docker as provided by the `docker.io` and other packages in the Ubuntu base -packages repository - the version of Docker they provide is out-of-date. - -Instead, follow the instructions for installing Docker on Ubuntu at https://docs.docker.com/engine/install/ubuntu/ . -Specifically, follow the listed steps for installing using an apt repository, and install a recent version. - -*WARNING*: at the time of this writing, there is a known release of the Docker Engine that causes Armada -localdev setups to fail, due to container image loading errors. The broken release is -`5:25.0.0-1~ubuntu.22.04~jammy` (for Ubuntu 22.04) - it may be referenced as Docker Engine v25.0.0. The -most recent prior major version at this time - v24.0.7 - has been verified to work correctly for Armada -development - `5:24.0.7-1~ubuntu.22.04~jammy`. In the docs sub-section *2.0 Install the Docker pages*, click -on the *Specific version* tab for details and instructions how to install an earlier release. However, -it's expected that Docker will have a newer version (higher than 25.0.0) that will fix this issue; please -consult the Docker release notes. - -### Relocate Docker storage directory to secondary volume - -Since Docker can use a lot of filesystem space, the directory where it stores container images, logs, -and other datafiles should be relocated to the separate, larger non-root volume on the system, so that -the root filesystem does not fill up. - -Stop the Docker daemon(s) and copy the existing data directory to the new location: -``` -$ sudo systemctl stop docker -$ ps ax | grep -i docker # no Docker processes should be shown - -$ sudo rsync -av /var/lib/docker /vol1/ -$ sudo rm -rf /var/lib/docker -$ sudo ln -s /vol1/docker /var/lib/docker -``` -Then restart Docker and verify that it's working again: -``` -$ sudo systemctl start docker -$ sudo docker ps -$ sudo docker run hello-world -``` - -### Create user accounts, verify docker access - -First, make a home directory parent in the new larger filesystem: -``` -$ sudo mkdir /vol1/home -``` -Then, for each user to be added, run the following steps - we will be using the account named 'testuser' here. -First, create the account and their home directory. -``` -$ sudo adduser --shell /bin/bash --gecos 'Test User' --home /vol1/home/testuser testuser -``` -Set up their $HOME/.ssh directory and add their SSH public-key: -``` -$ sudo mkdir /vol1/home/testuser/.ssh -$ sudo vim /vol1/home/testuser/.ssh/authorized_keys -# In the editor, add the SSH public key string that the user has given you, save the file and exit -$ sudo chmod 600 /vol1/home/testuser/.ssh/authorized_keys -$ sudo chmod 700 /vol1/home/testuser/.ssh -$ sudo chown -R testuser:testuser /vol1/home/testuser/.ssh -``` -Finally, add them to the `docker` group so they can run Docker commands without `sudo` access: -``` -$ sudo gpasswd -a testuser docker -``` -**sudo Access (OPTIONAL)** - -If you want to give the new user `sudo` privileges, run the following command: -``` -$ sudo gpasswd -a testuser sudo -``` diff --git a/docs/local/armadactl.sh b/docs/local/armadactl.sh deleted file mode 100755 index a5fe56ff5cd..00000000000 --- a/docs/local/armadactl.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/sh -ex - -echo "Downloading armadactl for your platform" - -# Determine Platform -SYSTEM=$(uname | sed 's/MINGW.*/windows/' | tr A-Z a-z) -if [ "$SYSTEM" = "windows" ]; then - ARCHIVE_TYPE=zip - UNARCHIVE="zcat > armadactl.exe" -else - ARCHIVE_TYPE=tar.gz - UNARCHIVE="tar xzf -" -fi - -# Find the latest Armada version -LATEST_GH_URL=$(curl -fsSLI -o /dev/null -w %{url_effective} https://github.com/armadaproject/armada/releases/latest) - -# Hard version set required until https://github.com/armadaproject/armada/pull/2384 is released -# ARMADA_VERSION=${LATEST_GH_URL##*/} -ARMADA_VERSION="v0.3.61" -ARMADACTL_URL="https://github.com/armadaproject/armada/releases/download/$ARMADA_VERSION/armadactl-$ARMADA_VERSION-$SYSTEM-amd64.$ARCHIVE_TYPE" - -# Download and untar/unzip armadactl -if curl -sL $ARMADACTL_URL | sh -c "$UNARCHIVE" ; then - echo "armadactl downloaded successfully" - - # Move armadactl binary to a directory in user's PATH - TARGET_DIR="$HOME/bin" # Change this to the desired target directory in your user's home - mkdir -p "$TARGET_DIR" - cp armadactl "$TARGET_DIR/" - export PATH="$TARGET_DIR:$PATH" - - echo "armadactl is now available on your PATH" - -else - echo "Something is amiss!" - echo "Please visit:" - echo " - https://github.com/armadaproject/armada/releases/latest" - echo "to find the latest armadactl binary for your platform" -fi diff --git a/docs/local/destroy.sh b/docs/local/destroy.sh deleted file mode 100755 index 8b78862c58e..00000000000 --- a/docs/local/destroy.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/sh - -printf "\n*******************************************************\n" -printf "Destroying Armada server cluster" -printf "\n*******************************************************\n" -kind delete cluster --name quickstart-armada-server -printf "\033[1mdone\033[0m" -printf "\n*******************************************************\n" -printf "Destroying first Armada executor cluster" -printf "\n*******************************************************\n" -kind delete cluster --name quickstart-armada-executor-0 -printf "\033[1mdone\033[0m" -printf "\n*******************************************************\n" -printf "Destroying second Armada executor cluster" -printf "\n*******************************************************\n" -kind delete cluster --name quickstart-armada-executor-1 -printf "\033[1mdone\033[0m\n" \ No newline at end of file diff --git a/docs/local/setup.sh b/docs/local/setup.sh deleted file mode 100755 index 93d5aa92e46..00000000000 --- a/docs/local/setup.sh +++ /dev/null @@ -1,165 +0,0 @@ -#!/bin/sh -ex - -KIND_IMG="kindest/node:v1.21.10" -CHART_VERSION_ARMADA="v0.3.20" -CHART_VERSION_ARMADA_EXECUTOR_MONITORING="v0.1.9" -CHART_VERSION_KUBE_PROMETHEUS_STACK="13.0.0" -CHART_VERSION_NATS="0.13.0" -CHART_VERSION_POSTGRES="12.4.2" -CHART_VERSION_PULSAR="2.9.4" -CHART_VERSION_REDIS="4.22.3" - -printf "\n*******************************************************\n" -printf "Running script which will deploy a local Armada cluster" -printf "\n*******************************************************\n" - -##################################################### -# HELM CONFIGURATION # -##################################################### -printf "\n*******************************************************\n" -printf "Registering required helm repositories ..." -printf "\n*******************************************************\n" -helm repo add dandydev https://dandydeveloper.github.io/charts -helm repo add prometheus-community https://prometheus-community.github.io/helm-charts -helm repo add nats https://nats-io.github.io/k8s/helm/charts -helm repo add bitnami https://charts.bitnami.com/bitnami -helm repo add gresearch https://g-research.github.io/charts -helm repo add apache https://pulsar.apache.org/charts -helm repo update - -##################################################### -# ARMADA SERVER # -##################################################### -printf "\n*******************************************************\n" -printf "Deploying Armada server ..." -printf "\n*******************************************************\n" -if kind delete cluster --name quickstart-armada-server; then - printf "Deleting existing quickstart-armada-server ..." - -fi -kind create cluster --name quickstart-armada-server --config ./docs/quickstart/kind/kind-config-server.yaml --image $KIND_IMG - -# Set cluster as current context -kind export kubeconfig --name=quickstart-armada-server - -# Install Redis -printf "\nStarting Redis ...\n" -helm install redis dandydev/redis-ha --version $CHART_VERSION_REDIS -f docs/quickstart/helm/values-redis.yaml - -# Install nats-streaming -printf "\nStarting NATS ...\n" -helm install nats nats/stan --version $CHART_VERSION_NATS --wait - -# Install Apache Pulsar -printf "\nStarting Pulsar ...\n" -helm install pulsar apache/pulsar --version $CHART_VERSION_PULSAR -f docs/quickstart/helm/values-pulsar.yaml - -# Install Prometheus -printf "\nStarting Prometheus ...\n" -helm install kube-prometheus-stack prometheus-community/kube-prometheus-stack --version $CHART_VERSION_KUBE_PROMETHEUS_STACK -f docs/quickstart/helm/values-server-prometheus.yaml - -# Install Armada server -printf "\nStarting Armada server ...\n" -helm install armada-server gresearch/armada --version $CHART_VERSION_ARMADA -f ./docs/quickstart/helm/values-server.yaml - -# Get server IP for executors -SERVER_IP=$(kubectl get nodes quickstart-armada-server-worker -o jsonpath='{.status.addresses[?(@.type=="InternalIP")].address}') -##################################################### - -##################################################### -# ARMADA EXECUTOR 1 # -##################################################### -printf "\n*******************************************************\n" -printf "Deploying first Armada executor cluster ..." -printf "\n*******************************************************\n" -if kind delete cluster --name quickstart-armada-executor-0; then - printf "Deleting existing quickstart-armada-executor-0 cluster ..." - -fi -kind create cluster --name quickstart-armada-executor-0 --config ./docs/quickstart/kind/kind-config-executor.yaml --image $KIND_IMG - -# Set cluster as current context -kind export kubeconfig --name=quickstart-armada-executor-0 - -# Install Prometheus -helm install kube-prometheus-stack prometheus-community/kube-prometheus-stack --version $CHART_VERSION_KUBE_PROMETHEUS_STACK -f docs/quickstart/helm/values-executor-prometheus.yaml - -# Install executor -helm install armada-executor gresearch/armada-executor --version $CHART_VERSION_ARMADA --set applicationConfig.apiConnection.armadaUrl="$SERVER_IP:30000" -f docs/quickstart/helm/values-executor.yaml -helm install armada-executor-cluster-monitoring gresearch/executor-cluster-monitoring --version $CHART_VERSION_ARMADA_EXECUTOR_MONITORING -f docs/quickstart/helm/values-executor-cluster-monitoring.yaml - -# Get executor IP for Grafana -EXECUTOR_0_IP=$(kubectl get nodes quickstart-armada-executor-0-worker -o jsonpath='{.status.addresses[?(@.type=="InternalIP")].address}') -##################################################### - -##################################################### -# ARMADA EXECUTOR 2 # -##################################################### -printf "\n*******************************************************\n" -printf "Deploying second Armada executor cluster ..." -printf "\n*******************************************************\n" -if kind delete cluster --name quickstart-armada-executor-1; then - printf "Deleting existing quickstart-armada-executor-1 cluster ..." - -fi -kind create cluster --name quickstart-armada-executor-1 --config ./docs/quickstart/kind/kind-config-executor.yaml --image $KIND_IMG - -# Set cluster as current context -kind export kubeconfig --name=quickstart-armada-executor-1 - -# Install Prometheus -helm install kube-prometheus-stack prometheus-community/kube-prometheus-stack --version $CHART_VERSION_KUBE_PROMETHEUS_STACK -f docs/quickstart/helm/values-executor-prometheus.yaml - -# Install executor -helm install armada-executor gresearch/armada-executor --version $CHART_VERSION_ARMADA --set applicationConfig.apiConnection.armadaUrl="$SERVER_IP:30000" -f docs/quickstart/helm/values-executor.yaml -helm install armada-executor-cluster-monitoring gresearch/executor-cluster-monitoring --version $CHART_VERSION_ARMADA_EXECUTOR_MONITORING -f docs/quickstart/helm/values-executor-cluster-monitoring.yaml - -# Get executor IP for Grafana -EXECUTOR_1_IP=$(kubectl get nodes quickstart-armada-executor-1-worker -o jsonpath='{.status.addresses[?(@.type=="InternalIP")].address}') -##################################################### - -##################################################### -# ARMADA LOOKOUT # -##################################################### -printf "\n*******************************************************\n" -printf "Deploying Armada Lookout UI ..." -printf "\n*******************************************************\n" -kind export kubeconfig --name=quickstart-armada-server - -# Install postgres -helm install postgres bitnami/postgresql --version $CHART_VERSION_POSTGRES --wait --set auth.postgresPassword=psw - -# Run database migration -helm install lookout-migration gresearch/armada-lookout-migration --version $CHART_VERSION_ARMADA --wait -f docs/quickstart/helm/values-lookout.yaml - -# Install Armada Lookout -helm install lookout gresearch/armada-lookout --version $CHART_VERSION_ARMADA -f docs/quickstart/helm/values-lookout.yaml -##################################################### - -##################################################### -# GRAFANA CONFIG # -##################################################### -printf "\n*******************************************************\n" -printf "Configuring Grafana dashboard for Armada ..." -printf "\n*******************************************************\n" -curl -X POST -i http://admin:prom-operator@localhost:30001/api/datasources -H "Content-Type: application/json" -d '{"name":"cluster-0","type":"prometheus","url":"http://'$EXECUTOR_0_IP':30001","access":"proxy","basicAuth":false}' -curl -X POST -i http://admin:prom-operator@localhost:30001/api/datasources -H "Content-Type: application/json" -d '{"name":"cluster-1","type":"prometheus","url":"http://'$EXECUTOR_1_IP':30001","access":"proxy","basicAuth":false}' -curl -X POST -i http://admin:prom-operator@localhost:30001/api/dashboards/import --data-binary @./docs/quickstart/grafana-armada-dashboard.json -H "Content-Type: application/json" - -printf "\n*******************************************************\n" -printf "Finished deploying local Armada cluster" -printf "\n*******************************************************\n" - -bs="\033[1m" -be="\033[0m" -printf "\nArmada Lookout UI can be accessed by doing the following:" -printf "\n\t* type %bkubectl port-forward svc/armada-lookout 8080:8080%b in your terminal" "$bs" "$be" -printf "\n\t* open %bhttp://localhost:8080%b in your browser\n" "$bs" "$be" - -printf "\nArmada uses Grafana for monitoring, do the following in order to access it:" -printf "\n\t* type %bkubectl port-forward svc/kube-prometheus-stack-grafana 8081:80%b in your terminal" "$bs" "$be" -printf "\n\t* open %bhttp://localhost:8081%b in your browser" "$bs" "$be" -printf "\n\t* use %badmin:prom-operator%b as default admin credentials for login" "$bs" "$be" -printf "\n\t* open the %bArmada - Overview%b dashboard\n" "$bs" "$be" - -./docs/local/armadactl.sh diff --git a/docs/quickstart/grafana-armada-dashboard.json b/docs/quickstart/grafana-armada-dashboard.json deleted file mode 100644 index e718a825c04..00000000000 --- a/docs/quickstart/grafana-armada-dashboard.json +++ /dev/null @@ -1,1156 +0,0 @@ -{ - "dashboard": { - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "id": null, - "iteration": 1572813374561, - "links": [], - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 6, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 0 - }, - "id": 5, - "interval": "1s", - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null as zero", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "armada:queue:size", - "legendFormat": "{{ queueName }}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Queued jobs", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "decimals": 3, - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 0 - }, - "id": 13, - "interval": "3s", - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "armada:queue:priority", - "legendFormat": "{{ queueName }}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Queue priority", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "decimals": 10, - "fill": 7, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 0 - }, - "id": 44, - "interval": "3s", - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null as zero", - "options": { - "dataLinks": [] - }, - "percentage": true, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "(armada:queue:size > bool 1) * (100 / armada:queue:priority) / scalar(sum((armada:queue:size > bool 1) * (1 / armada:queue:priority)))", - "legendFormat": "{{ queueName }}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Queue share", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "percent", - "label": null, - "logBase": 1, - "max": "100", - "min": null, - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 - }, - "id": 35, - "panels": [], - "title": "CPU", - "type": "row" - }, - { - "aliasColors": { - "Failed": "semi-dark-red", - "Pending": "light-blue", - "Running": "light-green", - "Succeeded": "light-yellow", - "Unknown": "dark-purple" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$cluster", - "decimals": null, - "description": "", - "fill": 6, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 9 - }, - "id": 7, - "interval": "3s", - "legend": { - "avg": false, - "current": false, - "hideZero": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null as zero", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "repeat": "cluster", - "repeatDirection": "h", - "scopedVars": { - "cluster": { - "selected": false, - "text": "cluster-0", - "value": "cluster-0" - } - }, - "seriesOverrides": [ - { - "alias": "Available CPU", - "color": "#73BF69", - "fill": 0, - "linewidth": 2, - "stack": false - }, - { - "alias": "Used CPU", - "color": "#FADE2A", - "fill": 0, - "linewidth": 2, - "stack": false - } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "armada:monitoring:armada_pod_cpu_request:sum{phase=\"Running\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ label_armada_queue_id }}", - "refId": "A" - }, - { - "expr": " node:worker_node_allocatable_cpu_cores:sum", - "instant": false, - "legendFormat": "Available CPU", - "refId": "B" - }, - { - "expr": "container:worker_node_cpu_usage_seconds_total:sum", - "legendFormat": "Used CPU", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "$cluster - Running pods cpu request", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Failed": "semi-dark-red", - "Pending": "light-blue", - "Running": "light-green", - "Succeeded": "light-yellow", - "Unknown": "dark-purple" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$cluster", - "decimals": null, - "description": "", - "fill": 6, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 9 - }, - "id": 45, - "interval": "3s", - "legend": { - "avg": false, - "current": false, - "hideZero": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null as zero", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatDirection": "h", - "repeatIteration": 1572813374561, - "repeatPanelId": 7, - "scopedVars": { - "cluster": { - "selected": false, - "text": "cluster-1", - "value": "cluster-1" - } - }, - "seriesOverrides": [ - { - "alias": "Available CPU", - "color": "#73BF69", - "fill": 0, - "linewidth": 2, - "stack": false - }, - { - "alias": "Used CPU", - "color": "#FADE2A", - "fill": 0, - "linewidth": 2, - "stack": false - } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "armada:monitoring:armada_pod_cpu_request:sum{phase=\"Running\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ label_armada_queue_id }}", - "refId": "A" - }, - { - "expr": " node:worker_node_allocatable_cpu_cores:sum", - "instant": false, - "legendFormat": "Available CPU", - "refId": "B" - }, - { - "expr": "container:worker_node_cpu_usage_seconds_total:sum", - "legendFormat": "Used CPU", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "$cluster - Running pods cpu request", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 17 - }, - "id": 29, - "panels": [], - "title": "Memory", - "type": "row" - }, - { - "aliasColors": { - "Failed": "semi-dark-red", - "Pending": "light-blue", - "Running": "light-green", - "Succeeded": "light-yellow", - "Unknown": "dark-purple" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$cluster", - "decimals": null, - "description": "", - "fill": 5, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 18 - }, - "id": 17, - "interval": "3s", - "legend": { - "avg": false, - "current": false, - "hideZero": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null as zero", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "repeat": "cluster", - "repeatDirection": "h", - "scopedVars": { - "cluster": { - "selected": false, - "text": "cluster-0", - "value": "cluster-0" - } - }, - "seriesOverrides": [ - { - "alias": "Available Memory", - "color": "#73BF69", - "fill": 0, - "linewidth": 2, - "stack": false - }, - { - "alias": "Memory used", - "color": "#FADE2A", - "fill": 0, - "linewidth": 2, - "stack": false - } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "armada:monitoring:armada_pod_memory_request:sum{phase=\"Running\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ label_armada_queue_id }}", - "refId": "A" - }, - { - "expr": " node:worker_node_allocatable_memory_bytes:sum", - "instant": false, - "legendFormat": "Available Memory", - "refId": "B" - }, - { - "expr": "container:worker_node_memory_usage_bytes:sum", - "legendFormat": "Memory used", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "$cluster - Running pods memory request by queue", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Failed": "semi-dark-red", - "Pending": "light-blue", - "Running": "light-green", - "Succeeded": "light-yellow", - "Unknown": "dark-purple" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$cluster", - "decimals": null, - "description": "", - "fill": 5, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 18 - }, - "id": 46, - "interval": "3s", - "legend": { - "avg": false, - "current": false, - "hideZero": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null as zero", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatDirection": "h", - "repeatIteration": 1572813374561, - "repeatPanelId": 17, - "scopedVars": { - "cluster": { - "selected": false, - "text": "cluster-1", - "value": "cluster-1" - } - }, - "seriesOverrides": [ - { - "alias": "Available Memory", - "color": "#73BF69", - "fill": 0, - "linewidth": 2, - "stack": false - }, - { - "alias": "Memory used", - "color": "#FADE2A", - "fill": 0, - "linewidth": 2, - "stack": false - } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "armada:monitoring:armada_pod_memory_request:sum{phase=\"Running\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ label_armada_queue_id }}", - "refId": "A" - }, - { - "expr": " node:worker_node_allocatable_memory_bytes:sum", - "instant": false, - "legendFormat": "Available Memory", - "refId": "B" - }, - { - "expr": "container:worker_node_memory_usage_bytes:sum", - "legendFormat": "Memory used", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "$cluster - Running pods memory request by queue", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 26 - }, - "id": 23, - "panels": [], - "title": "Pods", - "type": "row" - }, - { - "aliasColors": { - "Failed": "semi-dark-red", - "Pending": "light-blue", - "Running": "light-green", - "Succeeded": "light-yellow", - "Unknown": "dark-purple" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$cluster", - "decimals": null, - "description": "", - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 27 - }, - "id": 37, - "interval": "3s", - "legend": { - "avg": false, - "current": false, - "hideZero": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null as zero", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "repeat": "cluster", - "repeatDirection": "h", - "scopedVars": { - "cluster": { - "selected": false, - "text": "cluster-0", - "value": "cluster-0" - } - }, - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "namespace:pods_per_phase:sum{namespace=\"default\"} ", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ phase }}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "$cluster - Pods by phase", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Failed": "semi-dark-red", - "Pending": "light-blue", - "Running": "light-green", - "Succeeded": "light-yellow", - "Unknown": "dark-purple" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$cluster", - "decimals": null, - "description": "", - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 27 - }, - "id": 47, - "interval": "3s", - "legend": { - "avg": false, - "current": false, - "hideZero": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null as zero", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatDirection": "h", - "repeatIteration": 1572813374561, - "repeatPanelId": 37, - "scopedVars": { - "cluster": { - "selected": false, - "text": "cluster-1", - "value": "cluster-1" - } - }, - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "namespace:pods_per_phase:sum{namespace=\"default\"} ", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ phase }}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "$cluster - Pods by phase", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "refresh": "5s", - "schemaVersion": 19, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "current": { - "tags": [], - "text": "All", - "value": [ - "$__all" - ] - }, - "hide": 0, - "includeAll": true, - "label": null, - "multi": true, - "name": "cluster", - "options": [], - "query": "prometheus", - "refresh": 1, - "regex": "cluster-.*", - "skipUrlSync": false, - "type": "datasource" - } - ] - }, - "time": { - "from": "now-5m", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ] - }, - "timezone": "", - "title": "Armada - Overview", - "uid": "jzf8f6cZz", - "version": 1 - } -} diff --git a/docs/quickstart/helm/values-executor-cluster-monitoring.yaml b/docs/quickstart/helm/values-executor-cluster-monitoring.yaml deleted file mode 100644 index f9bf6d654b4..00000000000 --- a/docs/quickstart/helm/values-executor-cluster-monitoring.yaml +++ /dev/null @@ -1,5 +0,0 @@ -additionalLabels: - app: prometheus-operator - release: prometheus-operator - -interval: 5s \ No newline at end of file diff --git a/docs/quickstart/helm/values-executor-prometheus.yaml b/docs/quickstart/helm/values-executor-prometheus.yaml deleted file mode 100644 index a8d4fbd5200..00000000000 --- a/docs/quickstart/helm/values-executor-prometheus.yaml +++ /dev/null @@ -1,22 +0,0 @@ -alertmanager: - enabled: false - -grafana: - enabled: false - -prometheus: - prometheusSpec: - serviceMonitorSelectorNilUsesHelmValues: false - ruleSelectorNilUsesHelmValues: false - service: - type: NodePort - nodePort: 30001 - -prometheusOperator: - admissionWebhooks: - enabled: false - tls: - enabled: false - tlsProxy: - enabled: false - createCustomResource: false diff --git a/docs/quickstart/helm/values-executor.yaml b/docs/quickstart/helm/values-executor.yaml deleted file mode 100644 index ca5e67b297e..00000000000 --- a/docs/quickstart/helm/values-executor.yaml +++ /dev/null @@ -1,11 +0,0 @@ -applicationConfig: - apiConnection: - ## Please note that this setting is insecure - ## Do not use this setting in a production environment - ## This should only be used for the quickstart and local testing - forceNoTls: true - kubernetes: - minimumPodAge: 0s - -prometheus: - enabled: true diff --git a/docs/quickstart/helm/values-pulsar.yaml b/docs/quickstart/helm/values-pulsar.yaml deleted file mode 100644 index a2aabc2e6a2..00000000000 --- a/docs/quickstart/helm/values-pulsar.yaml +++ /dev/null @@ -1,54 +0,0 @@ -## deployed with emptyDir -volumes: - persistence: false - -# disabled AntiAffinity -affinity: - anti_affinity: false - -# disable auto recovery -components: - autorecovery: false - -zookeeper: - replicaCount: 1 - -bookkeeper: - replicaCount: 1 - service: - spec: - publishNotReadyAddresses: true - -broker: - replicaCount: 1 - configData: - ## Enable `autoSkipNonRecoverableData` since bookkeeper is running - ## without persistence - autoSkipNonRecoverableData: "true" - # storage settings - managedLedgerDefaultEnsembleSize: "1" - managedLedgerDefaultWriteQuorum: "1" - managedLedgerDefaultAckQuorum: "1" - -proxy: - replicaCount: 1 - -grafana: - ingress: - enabled: true - resources: - requests: - memory: 512Mi - -images: - grafana: - # repository: apachepulsar/pulsar-grafana - #tag: 2.9.2 - #pullPolicy: IfNotPresent - repository: streamnative/apache-pulsar-grafana-dashboard-k8s - tag: 0.0.15 - pullPolicy: IfNotPresent - -extraInitCommand: > - until bin/pulsar-admin --admin-url http://pulsar-broker.default.svc.cluster.local:8080/ tenants list; do echo Waiting for broker... && sleep 1; done; - (bin/pulsar-admin --admin-url http://pulsar-broker.default.svc.cluster.local:8080/ tenants create armada && bin/pulsar-admin --admin-url http://pulsar-broker.default.svc.cluster.local:8080/ namespaces create armada/armada && bin/pulsar-admin --admin-url http://pulsar-broker.default.svc.cluster.local:8080/ topics create-partitioned-topic persistent://armada/armada/events -p 2) || true; diff --git a/docs/quickstart/helm/values-redis.yaml b/docs/quickstart/helm/values-redis.yaml deleted file mode 100644 index 3f34c86fbe4..00000000000 --- a/docs/quickstart/helm/values-redis.yaml +++ /dev/null @@ -1,3 +0,0 @@ -hardAntiAffinity: false -persistentVolume: - enabled: false diff --git a/docs/quickstart/helm/values-server-prometheus.yaml b/docs/quickstart/helm/values-server-prometheus.yaml deleted file mode 100644 index 4f774390cee..00000000000 --- a/docs/quickstart/helm/values-server-prometheus.yaml +++ /dev/null @@ -1,20 +0,0 @@ -alertmanager: - enabled: false - -prometheus: - prometheusSpec: - serviceMonitorSelectorNilUsesHelmValues: false - ruleSelectorNilUsesHelmValues: false -grafana: - service: - type: NodePort - nodePort: 30001 - -prometheusOperator: - admissionWebhooks: - enabled: false - tls: - enabled: false - tlsProxy: - enabled: false - createCustomResource: false diff --git a/docs/quickstart/helm/values-server.yaml b/docs/quickstart/helm/values-server.yaml deleted file mode 100644 index 5d2df06e302..00000000000 --- a/docs/quickstart/helm/values-server.yaml +++ /dev/null @@ -1,40 +0,0 @@ -ingressClass: "nginx" -clusterIssuer: "dummy-value" -hostnames: - - "dummy-value" -replicas: 3 - -applicationConfig: - pulsar: - url: "pulsar://pulsar-broker.default.svc.cluster.local:6650" - redis: - masterName: "mymaster" - addrs: - - "redis-redis-ha-announce-0.default.svc.cluster.local:26379" - - "redis-redis-ha-announce-1.default.svc.cluster.local:26379" - - "redis-redis-ha-announce-2.default.svc.cluster.local:26379" - poolSize: 1000 - eventsRedis: - masterName: "mymaster" - addrs: - - "redis-redis-ha-announce-0.default.svc.cluster.local:26379" - - "redis-redis-ha-announce-1.default.svc.cluster.local:26379" - - "redis-redis-ha-announce-2.default.svc.cluster.local:26379" - poolSize: 1000 - auth: - basicAuth: - enableAuthentication: false - anonymousAuth: true - permissionGroupMapping: - submit_any_jobs: ["everyone"] - create_queue: ["everyone"] - delete_queue: ["everyone"] - cancel_any_jobs: ["everyone"] - reprioritize_any_jobs: ["everyone"] - watch_all_events: ["everyone"] - execute_jobs: ["everyone"] - -prometheus: - enabled: true - -nodePort: 30000 diff --git a/docs/quickstart/img/grafana-screenshot.png b/docs/quickstart/img/grafana-screenshot.png deleted file mode 100644 index 5e2a1beb503..00000000000 Binary files a/docs/quickstart/img/grafana-screenshot.png and /dev/null differ diff --git a/docs/quickstart/index.md b/docs/quickstart/index.md deleted file mode 100644 index 1b77479b092..00000000000 --- a/docs/quickstart/index.md +++ /dev/null @@ -1,129 +0,0 @@ -# Armada Quickstart - -The main purpose of this guide is to install a minimal local Armada deployment for testing and evaluation purposes. - -## Pre-requisites - -- Git -- Docker (Docker Desktop recommended for local development on Windows/OSX) -- Helm v3.5+ -- Kind v0.11.1+ -- Kubectl - -## Installation -This guide will install Armada on 3 local Kubernetes clusters; one server and two executor clusters. - -Clone [this](https://github.com/armadaproject/armada) repository: - -```bash -git clone https://github.com/armadaproject/armada.git -cd armada -``` - -All commands are intended to be run from the root of the repository. - -Armada is a resource intensive application due to the need to run multiple Kubernetes clusters - for a local installation you will need at least 4 CPU cores and 16GB of RAM available. - -### One-click Setup - -To install Armada and all its dependencies you can use this script: - -``` -docs/local/setup.sh -``` - -Once completed, wait for all pods to be running via `kubectl get pod` - -Likewise you can remove the Armada components from your system: - -``` -docs/local/destroy.sh -``` - -## Usage -Create queues, submit some jobs and monitor progress: - -### Queue Creation -```bash -./armadactl create queue queue-a --priority-factor 1 -./armadactl create queue queue-b --priority-factor 2 -``` -For queues created in this way, user and group owners of the queue have permissions to: -- submit jobs -- cancel jobs -- reprioritize jobs -- watch queue - -For more control, queues can be created via `armadactl create`, which allows for setting specific permission; see the following example. - -```bash -./armadactl create -f ./docs/quickstart/queue-a.yaml -./armadactl create -f ./docs/quickstart/queue-b.yaml -``` - - -### Job Submission -``` -./armadactl submit ./docs/quickstart/job-queue-a.yaml -./armadactl submit ./docs/quickstart/job-queue-b.yaml -``` - -Watch individual queues: - -```bash -./armadactl watch queue-a job-set-1 -``` -```bash -./armadactl watch queue-b job-set-1 -``` - -Log in to the Grafana dashboard at [http://localhost:30001](http://localhost:30001) using the default credentials of `admin` / `prom-operator`. -Navigate to the Armada Overview dashboard to get a view of jobs progressing through the system. - -Try submitting lots of jobs and see queues get built and processed: - -```bash -for i in {1..50} -do - ./armadactl submit ./docs/quickstart/job-queue-a.yaml - ./armadactl submit ./docs/quickstart/job-queue-b.yaml -done -``` - -## Example output: - -CLI: - -```bash -$ ./armadactl watch queue-a job-set-1 -Watching job set job-set-1 -Nov 4 11:43:36 | Queued: 0, Leased: 0, Pending: 0, Running: 0, Succeeded: 0, Failed: 0, Cancelled: 0 | event: *api.JobSubmittedEvent, job id: 01drv3mey2mzmayf50631tzp9m -Nov 4 11:43:36 | Queued: 1, Leased: 0, Pending: 0, Running: 0, Succeeded: 0, Failed: 0, Cancelled: 0 | event: *api.JobQueuedEvent, job id: 01drv3mey2mzmayf50631tzp9m -Nov 4 11:43:36 | Queued: 1, Leased: 0, Pending: 0, Running: 0, Succeeded: 0, Failed: 0, Cancelled: 0 | event: *api.JobSubmittedEvent, job id: 01drv3mf7b6fd1rraeq1f554fn -Nov 4 11:43:36 | Queued: 2, Leased: 0, Pending: 0, Running: 0, Succeeded: 0, Failed: 0, Cancelled: 0 | event: *api.JobQueuedEvent, job id: 01drv3mf7b6fd1rraeq1f554fn -Nov 4 11:43:38 | Queued: 1, Leased: 1, Pending: 0, Running: 0, Succeeded: 0, Failed: 0, Cancelled: 0 | event: *api.JobLeasedEvent, job id: 01drv3mey2mzmayf50631tzp9m -Nov 4 11:43:38 | Queued: 0, Leased: 2, Pending: 0, Running: 0, Succeeded: 0, Failed: 0, Cancelled: 0 | event: *api.JobLeasedEvent, job id: 01drv3mf7b6fd1rraeq1f554fn -Nov 4 11:43:38 | Queued: 0, Leased: 1, Pending: 1, Running: 0, Succeeded: 0, Failed: 0, Cancelled: 0 | event: *api.JobPendingEvent, job id: 01drv3mey2mzmayf50631tzp9m -Nov 4 11:43:38 | Queued: 0, Leased: 0, Pending: 2, Running: 0, Succeeded: 0, Failed: 0, Cancelled: 0 | event: *api.JobPendingEvent, job id: 01drv3mf7b6fd1rraeq1f554fn -Nov 4 11:43:41 | Queued: 0, Leased: 0, Pending: 1, Running: 1, Succeeded: 0, Failed: 0, Cancelled: 0 | event: *api.JobRunningEvent, job id: 01drv3mf7b6fd1rraeq1f554fn -Nov 4 11:43:41 | Queued: 0, Leased: 0, Pending: 0, Running: 2, Succeeded: 0, Failed: 0, Cancelled: 0 | event: *api.JobRunningEvent, job id: 01drv3mey2mzmayf50631tzp9m -Nov 4 11:44:17 | Queued: 0, Leased: 0, Pending: 0, Running: 1, Succeeded: 1, Failed: 0, Cancelled: 0 | event: *api.JobSucceededEvent, job id: 01drv3mf7b6fd1rraeq1f554fn -Nov 4 11:44:26 | Queued: 0, Leased: 0, Pending: 0, Running: 0, Succeeded: 2, Failed: 0, Cancelled: 0 | event: *api.JobSucceededEvent, job id: 01drv3mey2mzmayf50631tzp9m -``` - - -### Grafana Configuration - -Run the following commands to setup Grafana in your environment: - -```bash -curl -X POST -i http://admin:prom-operator@localhost:30001/api/datasources -H "Content-Type: application/json" -d '{"name":"cluster-0","type":"prometheus","url":"http://'$EXECUTOR_0_IP':30001","access":"proxy","basicAuth":false}' -curl -X POST -i http://admin:prom-operator@localhost:30001/api/datasources -H "Content-Type: application/json" -d '{"name":"cluster-1","type":"prometheus","url":"http://'$EXECUTOR_1_IP':30001","access":"proxy","basicAuth":false}' -curl -X POST -i http://admin:prom-operator@localhost:30001/api/dashboards/import --data-binary @./docs/quickstart/grafana-armada-dashboard.json -H "Content-Type: application/json" -``` - -Grafana: - -![Armada Grafana dashboard](./img/grafana-screenshot.png "Armada Grafana dashboard") - -Note that the jobs in this demo simply run the `sleep` command so do not consume many resources. diff --git a/docs/quickstart/job-queue-a-preemptive.yaml b/docs/quickstart/job-queue-a-preemptive.yaml deleted file mode 100644 index 137ce0f5bca..00000000000 --- a/docs/quickstart/job-queue-a-preemptive.yaml +++ /dev/null @@ -1,23 +0,0 @@ -queue: queue-a -jobSetId: job-set-1 -jobs: - - priority: 1 - podSpec: - priorityClassName: armada-example-priority-class - terminationGracePeriodSeconds: 0 - restartPolicy: Never - containers: - - name: sleeper - image: alpine:latest - command: - - sh - args: - - -c - - sleep $(( (RANDOM % 60) + 30 )) - resources: - limits: - memory: 100Mi - cpu: 100m - requests: - memory: 100Mi - cpu: 100m diff --git a/docs/quickstart/job-queue-a.yaml b/docs/quickstart/job-queue-a.yaml deleted file mode 100644 index 2ce3e10b53d..00000000000 --- a/docs/quickstart/job-queue-a.yaml +++ /dev/null @@ -1,23 +0,0 @@ -queue: queue-a -jobSetId: job-set-1 -jobs: - - namespace: default - priority: 0 - podSpec: - terminationGracePeriodSeconds: 0 - restartPolicy: Never - containers: - - name: sleeper - image: alpine:latest - command: - - sh - args: - - -c - - sleep $(( (RANDOM % 60) + 10 )) - resources: - limits: - memory: 128Mi - cpu: 0.2 - requests: - memory: 128Mi - cpu: 0.2 diff --git a/docs/quickstart/job-queue-b.yaml b/docs/quickstart/job-queue-b.yaml deleted file mode 100644 index a6de6e50d7c..00000000000 --- a/docs/quickstart/job-queue-b.yaml +++ /dev/null @@ -1,23 +0,0 @@ -queue: queue-b -jobSetId: job-set-1 -jobs: - - namespace: default - priority: 0 - podSpec: - terminationGracePeriodSeconds: 0 - restartPolicy: Never - containers: - - name: sleeper - image: alpine:latest - command: - - sh - args: - - -c - - sleep $(( (RANDOM % 60) + 10 )) - resources: - limits: - memory: 128Mi - cpu: 0.2 - requests: - memory: 128Mi - cpu: 0.2 diff --git a/docs/quickstart/kind/kind-config-executor.yaml b/docs/quickstart/kind/kind-config-executor.yaml deleted file mode 100644 index 1ae052c93c5..00000000000 --- a/docs/quickstart/kind/kind-config-executor.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Cluster -apiVersion: kind.x-k8s.io/v1alpha4 -nodes: - - role: control-plane - image: "kindest/node:v1.21.10" - - role: worker - image: "kindest/node:v1.21.10" diff --git a/docs/quickstart/kind/kind-config-server.yaml b/docs/quickstart/kind/kind-config-server.yaml deleted file mode 100644 index 0456354e678..00000000000 --- a/docs/quickstart/kind/kind-config-server.yaml +++ /dev/null @@ -1,12 +0,0 @@ -kind: Cluster -apiVersion: kind.x-k8s.io/v1alpha4 -nodes: - - role: control-plane - image: "kindest/node:v1.21.10" - - role: worker - image: "kindest/node:v1.21.10" - extraPortMappings: - - containerPort: 30000 - hostPort: 50051 - - containerPort: 30001 - hostPort: 30001 diff --git a/docs/quickstart/priority-class-example.yaml b/docs/quickstart/priority-class-example.yaml deleted file mode 100644 index 2faf77ae307..00000000000 --- a/docs/quickstart/priority-class-example.yaml +++ /dev/null @@ -1,8 +0,0 @@ -apiVersion: scheduling.k8s.io/v1 -kind: PriorityClass -metadata: - name: armada-example-priority-class -value: 10 -preemptionPolicy: PreemptLowerPriority -globalDefault: false -description: "Example priority class for preemptive Armada jobs." diff --git a/docs/quickstart/queue-a.yaml b/docs/quickstart/queue-a.yaml deleted file mode 100644 index 8c45005c492..00000000000 --- a/docs/quickstart/queue-a.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: armadaproject.io/v1beta1 -kind: Queue -name: queue-a -permissions: -- subjects: - - name: group2 - kind: Group - verbs: - - cancel - - reprioritize - - watch -priorityFactor: 3.0 -resourceLimits: - cpu: 1.0 - memory: 1.0 \ No newline at end of file diff --git a/docs/quickstart/queue-b.yaml b/docs/quickstart/queue-b.yaml deleted file mode 100644 index c405d9f6596..00000000000 --- a/docs/quickstart/queue-b.yaml +++ /dev/null @@ -1,25 +0,0 @@ -apiVersion: armadaproject.io/v1beta1 -kind: Queue -name: queue-b -permissions: -- subjects: - - name: group1 - kind: Group - - name: user1 - kind: User - verbs: - - submit - - cancel - - reprioritize - - watch -- subjects: - - name: group2 - kind: Group - verbs: - - cancel - - reprioritize - - watch -priorityFactor: 1.0 -resourceLimits: - cpu: 0.2 - memory: 0.1 \ No newline at end of file diff --git a/example/kind-config.yaml b/example/kind-config.yaml deleted file mode 100644 index cebee6d5b98..00000000000 --- a/example/kind-config.yaml +++ /dev/null @@ -1,5 +0,0 @@ -kind: Cluster -apiVersion: kind.x-k8s.io/v1alpha4 -nodes: - - role: control-plane - - role: worker diff --git a/plugins/armadactl.yml b/plugins/armadactl.yml index 61dc90492a3..6e1cbfd85f4 100644 --- a/plugins/armadactl.yml +++ b/plugins/armadactl.yml @@ -5,12 +5,12 @@ metadata: spec: version: v0.3.88 homepage: https://github.com/armadaproject/armada - shortDescription: Command line utility to submit many jobs to armada + shortDescription: Command line utility used for interacting with Armada API description: | armadactl is a command-line tool used for managing jobs in the Armada workload orchestration system. It provides functionality for creating, updating, and deleting jobs, as well as monitoring job status and resource usage. caveats: | - Before using the Armada CLI, make sure you have working armada enviornment + Before using the Armada CLI, make sure you have working armada environment or a armadactl.yaml file that points to a valid armada cluster. platforms: - selector: @@ -33,4 +33,4 @@ spec: arch: amd64 uri: https://github.com/armadaproject/armada/releases/download/v0.3.8655/armadactl_0.3.8655_windows_amd64.zip sha256: 27774e39b8a29603671c21ed9487fbd073eb408535afe5de5f336e84dc13998b - bin: armadactl.exe \ No newline at end of file + bin: armadactl.exe diff --git a/scripts/get-armadactl.sh b/scripts/get-armadactl.sh new file mode 100755 index 00000000000..1c4f2a74b0d --- /dev/null +++ b/scripts/get-armadactl.sh @@ -0,0 +1,97 @@ +#!/bin/sh + +set -e + +get_arch() { + case $(uname -m) in + "x86_64" | "amd64" ) echo "amd64" ;; + "i386" | "i486" | "i586") echo "386" ;; + "aarch64" | "arm64" | "arm") echo "arm64" ;; + "mips64el") echo "mips64el" ;; + "mips64") echo "mips64" ;; + "mips") echo "mips" ;; + *) echo "unknown" ;; + esac +} + +get_os() { + uname -s | awk '{print tolower($0)}' +} + +get_latest_release() { + curl --silent "https://api.github.com/repos/armadaproject/armada/releases/latest" | \ + grep '"tag_name":' | \ + sed -E 's/.*"([^"]+)".*/\1/' +} + +owner="armadaproject" +repo="armada" +githubUrl="https://github.com" +githubApiUrl="https://api.github.com" +version=$(get_latest_release) +exe_name="armadactl" + +# parse flag +for i in "$@"; do + case $i in + -v=*|--version=*) + version="${i#*=}" + exe_name="armadactl_${version#v}" + shift # past argument=value + ;; + *) + # unknown option + ;; + esac +done + +downloadFolder="${TMPDIR:-/tmp}" +mkdir -p ${downloadFolder} # make sure download folder exists +os=$(get_os) +arch=$(get_arch) +if [ "$os" = "darwin" ]; then + arch="all" +fi +file_name="${exe_name}_${version#v}_${os}_${arch}.tar.gz" # the file name should be download +downloaded_file="${downloadFolder}/${file_name}" # the file path should be download +executable_folder="/usr/local/bin" # Eventually, the executable file will be placed here + +# if version is empty +if [ -z "$version" ]; then + asset_path=$( + curl -L \ + -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + ${githubApiUrl}/repos/${owner}/${repo}/releases | + grep -o "/${owner}/${repo}/releases/download/.*/${file_name}" | + head -n 1 + ) + if [ -z "$asset_path" ]; then + echo "ERROR: unable to find a release asset called ${file_name}" + exit 1 + fi + asset_uri="${githubUrl}${asset_path}" +else + asset_uri="${githubUrl}/${owner}/${repo}/releases/download/${version}/${file_name}" +fi + +echo "[1/3] Download ${asset_uri} to ${downloadFolder}" +rm -f ${downloaded_file} +curl --fail --location --output "${downloaded_file}" "${asset_uri}" + +echo "[2/3] Install ${exe_name} to the ${executable_folder}" +tar -xz -f ${downloaded_file} -C ${executable_folder} +exe=${executable_folder}/${exe_name} +chmod +x ${exe} + +echo "[3/3] Set environment variables" +echo "${exe_name} was installed successfully to ${exe}" +if command -v $exe_name >/dev/null; then + echo "Run '$exe_name --help' to get started" +else + echo "Manually add the directory to your \$HOME/.bash_profile (or similar)" + echo " export PATH=${executable_folder}:\$PATH" + echo "Run '$exe_name --help' to get started" +fi + +exit 0