Skip to content

Commit

Permalink
feat!: Flux localhost bootstrap from 1P, add Sinon cluster
Browse files Browse the repository at this point in the history
- installs Flux in hostNetwork mode binded to localhost to new clusters

- rework Taskfiles for new bootstrap flow, including loading secrets direct from 1Password with no SOPS for secret zero

- use 1Password for both talsecret and talenv for talhelper genconfig

- remove SOPS secrets

- add Sinon cluster, used as NAS

- cleanup ExternalSecret and 1P Connect's Flux ks for smoother bootstrap

- try out 1Password Connect as extraContainer in external-secrets deployment to avoid secrets going over network

- general cleanup
  • Loading branch information
JJGadgets committed May 21, 2024
1 parent 54f4117 commit 5ba9c8a
Show file tree
Hide file tree
Showing 56 changed files with 1,818 additions and 1,087 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,11 @@ clusterconfig/
.agekey
Admins.txt
GameUserSettings.ini
!ostree/*-jj.repo
ostree/*.repo
*.sops.*.tmp
*.code-workspace
*venv*
*venv*/**
.decrypted~*
.ignore~*
26 changes: 14 additions & 12 deletions .rtx.toml → .mise.toml
Original file line number Diff line number Diff line change
@@ -1,23 +1,25 @@
[env]
KUBECTL_INTERACTIVE_DELETE = "true"
KUBECTL_COMMAND_HEADERS = "true"
#SSH_AUTH_SOCK = $(gpgconf --list-dirs agent-ssh-socket)

[tools]
# kubectl = [""]
# krew = [""]
# kubectx = [""]
kubectl = ["1.29.2"]
krew = ["0.4.4"]
kubectx = ["0.9.5"]
fzf = ["0.52.1"] # used by kubectx interactive mode
kustomize = ["5.3.0"]
# kubecolor = [""]
flux2 = ["2.2.3"]
talosctl = ["1.5.4", "1.3.6"]
flux2 = "2.2.3"
talosctl = ["1.6.7", "1.5.4", "1.3.6"]
talhelper = ["1.16.2"]
cilium-cli= ["0.15.14"]
1password-cli = ["2.24.0"]
restic = ["0.16.4"]
k9s = ["0.32.4"]

soft-serve = ["0.7.4"]
#pulumi = ["3.95.0"]
python = ["3.11"]

[env]
KUBECTL_INTERACTIVE_DELETE = "true"
KUBECTL_COMMAND_HEADERS = "true"
#SSH_AUTH_SOCK = $(gpgconf --list-dirs agent-ssh-socket)
_.python.venv = { path = ".venv", create = true } # create the venv if it doesn't exist

[plugins]
talhelper = "https://github.com/bjw-s/asdf-talhelper.git"
Empty file.
72 changes: 72 additions & 0 deletions .taskfiles/bootstrap/Taskfile.dist.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
---
# Tasks related to multi-cluster or cluster-level management, e.g. bootstrap
version: "3"

includes:
flux:
internal: true
taskfile: ../flux

vars:
C: '{{.C | default "biohazard"}}'
# APPLY: '{{.APPLY | default "0"}}'

tasks:
1p-vars-env-yq:
vars:
C: &c '{{ or .C (fail "Missing C environment variable for cluster!") }}'
OUT: &out '{{ .OUT | default "yaml" }}'
cmds:
- op item get --vault {{.C}} ".{{.C}}-vars" --format=json | yq --input-format json --output-format {{ .OUT | default "yaml" }} '.fields | with(.[]; (.label | key) = "key") | (.[] | select(has("value") | not) | .value) |= "" | from_entries'

1p-vars-env-shell:
aliases: [1penv, openv]
vars:
C: *c
# OUT: *out
PRE_ARGS: '{{.PRE_ARGS}}'
POST_ARGS: '{{.POST_ARGS}}'
cmds:
- |
{{.PRE_ARGS}}op item get --vault {{.C}} ".{{.C}}-vars" --format=json | yq --input-format json '.fields | with(.[]; (.label | key) = "key") | (.[] | select(has("value") | not) | .value) |= "" | map(.key + "=" + (.value | @sh)) | .[]'{{.POST_ARGS}} {{.CLI_ARGS}}
1p-vars-env-run:
dir: '{{.USER_WORKING_DIR}}'
vars:
C: &c '{{ or .C (fail "Missing C environment variable for cluster!") }}'
# OUT: *out
cmds:
- export $(task --taskfile /{{.ROOT_DIR}}/Taskfile.dist.yaml 1p-vars-env-shell C={{.C}}); {{.CLI_ARGS}}

k8s-1p-connect-load-credentials:
vars:
C: *c
cmds:
- kubectl get namespace external-secrets || kubectl create namespace external-secrets
- kubectl get namespace onepassword-connect || kubectl create namespace onepassword-connect
# - kubectl delete secret -n onepassword-connect onepassword-connect-secrets || true
- op read "op://{{.C}}/$(op item get --vault {{.C}} '{{.C}} Credentials File' --format=json | yq '.id')/1password-credentials.json" | kubectl create secret -n external-secrets generic onepassword-connect-secrets --from-file=1password-credentials.json=/dev/stdin
- op read "op://{{.C}}/$(op item get --vault {{.C}} '{{.C}} Credentials File' --format=json | yq '.id')/1password-credentials.json" | kubectl create secret -n onepassword-connect generic onepassword-connect-secrets --from-file=1password-credentials.json=/dev/stdin
# - kubectl rollout restart -n onepassword-connect deploy/onepassword-connect || true

bootstrap:
aliases: [bs]
desc: Bootstrap Kubernetes to a GitOps managed stage after OS install.
vars:
C: *c
cmds:
- |
kubectx || kubectl config current-context; read -p "BOOTSTRAP
Press ENTER to confirm the Kubernetes context to bootstrap, or Control+C to exit.
> "; read -p "RECONFIRM: Press ENTER again to really confirm.
> "
- task: flux:install
- task: k8s-1p-connect-load-credentials
vars:
C: *c
- task: 1p-vars-env-shell
vars:
C: *c
POST_ARGS: " | kubectl create secret generic -n flux-system sinon-vars --from-env-file=/dev/stdin"
- kubectl create secret generic -n flux-system sinon-secrets # currently nothing hopefully maybe?
- op read "op://{{.C}}/Flux/agekey" | kubectl create secret generic -n flux-system agekey --from-file=age.agekey=/dev/stdin
15 changes: 14 additions & 1 deletion .taskfiles/flux/Taskfile.dist.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,21 @@ version: "3"

tasks:
install:
vars:
FLUXDIR:
sh: mktemp -d
# renovate: datasource=docker depName=ghcr.io/fluxcd/flux-manifests
FLUX_DEFAULT_VERSION: "v2.2.3"
FLUXVER: '{{ .FLUXVER | default .FLUX_DEFAULT_VERSION }}'
C: '{{ .C | default "biohazard" }}'
cmds:
- kubectl apply --server-side --kustomize /{{.ROOT_DIR}}/kube/bootstrap/flux/
#- defer: rm {{.FLUXDIR}}/*.yaml && rmdir {{.FLUXDIR}}
- flux pull artifact oci://ghcr.io/fluxcd/flux-manifests:{{.FLUXVER}} --output {{.FLUXDIR}}
- |
kubectx || kubectl config current-context; read -p "Press ENTER to confirm the Kubernetes context to install Flux to, or Control+C to exit.
> "
- flux build kustomization zzz-flux --path {{.FLUXDIR}} --kustomization-file /{{.ROOT_DIR}}/kube/bootstrap/flux/flux-install-localhost.yaml --dry-run | kubectl delete -f -
- flux build kustomization zzz-flux --path {{.FLUXDIR}} --kustomization-file /{{.ROOT_DIR}}/kube/bootstrap/flux/flux-install-localhost.yaml --dry-run | kubectl apply --server-side -f -

get-all-watch:
aliases: [ga, fga, getall]
Expand Down
26 changes: 21 additions & 5 deletions .taskfiles/k8s/Taskfile.dist.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,13 @@ tasks:
NAME: &name-fail '{{ or .NAME (fail "Missing `NAME` environment variable!") }}'
NS: *ns-fail
cmds:
- until [[ $(kubectl -n {{.NS}} get pod {{.NAME}} -o jsonpath='{.items[*].status.phase}') == "Pending" ]]; do sleep 1; done
- |
until [[ $(kubectl -n {{.NS}} get pod {{.NAME}} -o jsonpath='{.items[*].status.phase}') == "Pending" ]]; do sleep 1; done
wait-pod-running:
aliases: [waitr]
internal: true
desc: Wait for a job's pod to change its status to running
desc: "Wait for a job's pod to change its status to running"
vars:
NAME: *name-fail
NS: *ns-fail
Expand All @@ -44,7 +45,8 @@ tasks:
NAME: *name-fail
NS: *ns-fail
cmds:
- until kubectl wait pod -n {{.NS}} {{.NAME}} --for-jsonpath='{.items[*].status.phase}'=Running --timeout=-; do sleep 1; done
- |
until kubectl wait pod -n {{.NS}} {{.NAME}} --for-jsonpath='{.items[*].status.phase}'=Running --timeout=-; do sleep 1; done
wait-pod-ready:
internal: true
Expand Down Expand Up @@ -134,8 +136,8 @@ tasks:
shortnames:
desc: List all installed CRDs and their short names.
cmds:
- |-
kubectl get crds -o jsonpath='{range .items[*]}{.spec.names.kind}: {.spec.names.shortNames}{"\n"}{end}'
- |
kubectl get crds -o jsonpath '{range .items[*]}{.spec.names.kind}: {.spec.names.shortNames}{"\n"}{end}'
clear-old-pods:
aliases: [cop]
Expand All @@ -147,6 +149,20 @@ tasks:
kubectl delete pod -A --field-selector=status.phase==Succeeded || true;
done
delete-stuck-pvc:
aliases: [delpvc]
desc: Delete PVC which is stuck, e.g. if it's a local-path PVC and the node has been wiped and reset.
vars:
NS: '{{ or .NS (fail "Missing `NS` environment variable!") }}'
PVC: '{{ or .PVC (fail "Missing `PVC` environment variable!") }}'
cmds:
- |
kubectl delete pvc -n {{.NS}} {{.PVC}} --wait=false
- |
kubectl patch pvc -n {{.NS}} {{.PVC}} --type='json' -p '[{"op": "remove", "path": "/metadata/finalizers"}]'
- |
until kubectl wait --for=delete pvc -n {{.NS}} {{.PVC}} --timeout=3600s; do sleep 1; done
iperf2:
desc: Start a iperf2 server on one node, and iperf2 client on another node, to benchmark network performance.
dir: "/{{.ROOT_DIR}}/.taskfiles/k8s/template/iperf2"
Expand Down
31 changes: 30 additions & 1 deletion .taskfiles/pg/Taskfile.dist.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ version: "3"
# NS: '{{.NS | default "pg"}}'

tasks:
rw:
cnpg-rw:
desc: Get current CloudNativePG master (writable) instance to run command against.
dir: "{{.USER_WORKING_DIR}}"
vars:
Expand All @@ -22,6 +22,25 @@ tasks:
cmds:
- kubectl get clusters.postgresql.cnpg.io -n {{.NS}} {{.PG}} -o yaml | yq '.status.currentPrimary' | xargs -o -I% kubectl -n {{.NS}} {{.CLI_ARGS}}

crunchy-master:
desc: Get current Crunchy-PGO master (writable) instance to run command against.
dir: "{{.USER_WORKING_DIR}}"
vars:
APP: "{{.APP}}"
PG: &pg-default
sh: |-
[[ -n "{{.PG}}" ]] && ( echo -n "{{.PG}}" && exit 0 ) || ( [[ -n "{{.APP}}" ]] && echo -n "pg-{{.APP}}" || echo -n "pg-default" )
NS: &ns-default
sh: |-
[[ -n "{{.NS}}" ]] && echo -n "{{.NS}}" || ( [[ -n "{{.APP}}" ]] && echo -n "{{.APP}}" || echo -n "pg" )
MASTER:
sh: |-
kubectl get pod -n {{.NS}} -l postgres-operator.crunchydata.com/cluster={{.PG}},postgres-operator.crunchydata.com/role=master -o name
# PG: '{{ or .PG (fail "Missing `PG` environment variable!") }}'
# NS: &ns-default '{{.NS | default "pg"}}'
cmds:
- kubectl exec -it -n {{.NS}} {{.MASTER}} --container database -- {{.CLI_ARGS}}

adminer:
desc: Use kubectl netshoot krew plugin to deploy adminer as sidecar to the current primary CNPG replica pod.
dir: "{{.USER_WORKING_DIR}}"
Expand Down Expand Up @@ -98,3 +117,13 @@ tasks:
cmds:
- kubectl exec -it --container database -n {{.NS}} {{.CRUNCHY_PRIMARY}} -- /bin/bash -c 'rm -rf {{.CP_DIR}}/dump.psql && pg_dump --dbname {{.DBNAME}} {{.ARGS}} --file {{.CP_DIR}}/dump.psql'
- kubectl cp --container database {{.NS}}/{{.CRUNCHY_PRIMARY}}:{{.CP_DIR}}/dump.psql {{.DUMP}}

crunchy-expire:
desc: Expire unused backups on dedicated pgBackRest repo host pod of a CrunchyData Postgres cluster.
dir: "{{.USER_WORKING_DIR}}"
vars:
APP: "{{.APP}}"
PG: *pg-default
NS: *ns-default
cmds:
- kubectl get pods -n {{.NS}} -l postgres-operator.crunchydata.com/pgbackrest-dedicated=,postgres-operator.crunchydata.com/cluster={{.PG}} -o name | xargs -oI% kubectl exec -it -n {{.NS}} % -c pgbackrest -- pgbackrest expire --stanza=db --repo=1 --repo1-retention-full=1 --repo1-retention-diff=1
40 changes: 29 additions & 11 deletions .taskfiles/talos/Taskfile.dist.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ includes:
cluster:
internal: true
taskfile: ../cluster
# bootstrap:
# internal: true
# taskfile: ../bootstrap

vars:
C: '{{.C | default "biohazard"}}'
Expand All @@ -18,12 +21,27 @@ tasks:
dir: "/{{.ROOT_DIR}}/kube/clusters/{{.C}}/talos"
vars:
C: '{{ or .C (fail "Missing C environment variable for cluster!") }}'
# dotenv:
# - "/{{.ROOT_DIR}}/.taskfiles/talos/talhelper-secrets-1p.env"
env:
cluster_id: "op://{{.C}}/.{{.C}}-talos/Secrets/cluster_id"
cluster_secret: "op://{{.C}}/.{{.C}}-talos/Secrets/cluster_secret"
secrets_bootstraptoken: "op://{{.C}}/.{{.C}}-talos/Secrets/secrets_bootstraptoken"
secrets_secretboxencryptionsecret: "op://{{.C}}/.{{.C}}-talos/Secrets/secrets_secretboxencryptionsecret"
trustdinfo_token: "op://{{.C}}/.{{.C}}-talos/Secrets/trustdinfo_token"
certs_etcd_crt: "op://{{.C}}/.{{.C}}-talos/Secrets/certs_etcd_crt"
certs_etcd_key: "op://{{.C}}/.{{.C}}-talos/Secrets/certs_etcd_key"
certs_k8s_crt: "op://{{.C}}/.{{.C}}-talos/Secrets/certs_k8s_crt"
certs_k8s_key: "op://{{.C}}/.{{.C}}-talos/Secrets/certs_k8s_key"
certs_k8saggregator_crt: "op://{{.C}}/.{{.C}}-talos/Secrets/certs_k8saggregator_crt"
certs_k8saggregator_key: "op://{{.C}}/.{{.C}}-talos/Secrets/certs_k8saggregator_key"
certs_k8sserviceaccount_key: "op://{{.C}}/.{{.C}}-talos/Secrets/certs_k8sserviceaccount_key"
certs_os_crt: "op://{{.C}}/.{{.C}}-talos/Secrets/certs_os_crt"
certs_os_key: "op://{{.C}}/.{{.C}}-talos/Secrets/certs_os_key"
cmds:
- pwd
- sops --decrypt {{.ROOT_DIR}}/kube/clusters/{{.C}}/config/vars.sops.env | sops --config {{.ROOT_DIR}}/.sops-stdin.yaml --encrypt --output-type yaml --output ./talenv.sops.yaml /dev/stdin
- talhelper genconfig -e ./talenv.sops.yaml
- defer: rm -rf ./talenv.sops.yaml

- op user get --me # check signin status, fail if not signed in
- op run -- task --taskfile /{{.ROOT_DIR}}/.taskfiles/bootstrap/Taskfile.dist.yaml 1p-vars-env-run C={{.C}} -- talhelper genconfig

bootstrap:
aliases: [bs]
Expand All @@ -42,13 +60,13 @@ tasks:
- until talosctl kubeconfig --talosconfig /{{.ROOT_DIR}}/kube/clusters/{{.C}}/talos/clusterconfig/talosconfig --endpoints {{.IP}} --nodes {{.IP}} --context {{.C}} ; do sleep 1; done
- |
sed -i 's/current-context: admin@.*/current-context: admin@{{.C}}/g' ~/.kube/config || true
- task: cilium-bootstrap-apply
vars:
C: '{{.C | default "biohazard"}}'
- task: cluster:cluster-init-config
vars:
C: '{{.C | default "biohazard"}}'
APPLY: "1"
# - task: cilium-bootstrap-apply
# vars:
# C: '{{.C | default "biohazard"}}'
# - task: cluster:cluster-init-config
# vars:
# C: '{{.C | default "biohazard"}}'
# APPLY: "1"

reboot:
desc: Reboot nodes based on Kubernetes InternalIP. Assumes only 1 IP in InternalIP, and the IP known to Kubernetes is the same used by `talosctl`.
Expand Down
Loading

0 comments on commit 5ba9c8a

Please sign in to comment.