From 064401e4b03e18b5c165bcc905b28764c9495ad9 Mon Sep 17 00:00:00 2001 From: Alex Robinson Date: Mon, 24 Oct 2016 10:01:08 -0400 Subject: [PATCH] k8s: Fix potential edge case where a second cluster could get started * Add an init container that checks for whether any other peers exist * Re-enable the tolerate-unready-endpoints option. It's bprashanth's recommendation, and makes the init container less likely to miss anything. * Switch from joining `cockroachdb` to joining `cockroachdb-public`. This is needed due to re-enabling `tolerate-undready-endpoints`. I wish we could just directly re-use the peer-finder container without having to wrap it, but I already burned too much time trying to get it to accepts non-trivial commands in its `--on-start` parameter. Almost everything I tried would just get treated to a "No such file or directory" error. --- cloud/kubernetes/README.md | 16 ++++- cloud/kubernetes/cockroachdb-petset.yaml | 85 ++++++++++++++++++------ cloud/kubernetes/init/Dockerfile | 6 ++ cloud/kubernetes/init/README.md | 30 +++++++++ cloud/kubernetes/init/on-start.sh | 8 +++ 5 files changed, 121 insertions(+), 24 deletions(-) create mode 100644 cloud/kubernetes/init/Dockerfile create mode 100644 cloud/kubernetes/init/README.md create mode 100755 cloud/kubernetes/init/on-start.sh diff --git a/cloud/kubernetes/README.md b/cloud/kubernetes/README.md index 48cf6d2badcd..573cedb2716a 100644 --- a/cloud/kubernetes/README.md +++ b/cloud/kubernetes/README.md @@ -98,8 +98,7 @@ Start up a client pod and open up an interactive, (mostly) Postgres-flavor SQL shell using: ```console -$ kubectl run -it cockroach-client --image=cockroachdb/cockroach --restart=Never --command -- bash -root@cockroach-client # ./cockroach sql --host cockroachdb-public +$ kubectl run -it --rm cockroach-client --image=cockroachdb/cockroach --restart=Never --command -- ./cockroach sql --host cockroachdb-public ``` You can see example SQL statements for inserting and querying data in the @@ -107,6 +106,19 @@ included [demo script](demo.sh), but can use almost any Postgres-style SQL commands. Some more basic examples can be found within [CockroachDB's documentation](https://www.cockroachlabs.com/docs/learn-cockroachdb-sql.html). +## Accessing the admin UI + +If you want to see information about how the cluster is doing, you can try +pulling up the CockroachDB admin UI by port-forwarding from your local machine +to one of the pods: + +```shell +kubectl port-forward cockroachdb-0 8080 +``` + +Once you’ve done that, you should be able to access the admin UI by visiting +http://localhost:8080/ in your web browser. + ## Simulating failures When all (or enough) nodes are up, simulate a failure like this: diff --git a/cloud/kubernetes/cockroachdb-petset.yaml b/cloud/kubernetes/cockroachdb-petset.yaml index cf3c285966c9..c7d5bf4fc536 100644 --- a/cloud/kubernetes/cockroachdb-petset.yaml +++ b/cloud/kubernetes/cockroachdb-petset.yaml @@ -23,17 +23,25 @@ spec: apiVersion: v1 kind: Service metadata: + # This service only exists to create DNS entries for each pet in the petset + # such that they can resolve each other's IP addresses. It does not create a + # load-balanced ClusterIP and should not be used directly by clients in most + # circumstances. + name: cockroachdb + labels: + app: cockroachdb annotations: + # This is needed to make the peer-finder work properly and to help avoid + # edge cases where instance 0 comes up after losing its data and needs to + # decide whether it should create a new cluster or try to join an existing + # one. If it creates a new cluster when it should have joined an existing + # one, we'd end up with two separate clusters listening at the same service + # endpoint, which would be very bad. + service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" # Enable automatic monitoring of all instances when Prometheus is running in the cluster. prometheus.io/scrape: "true" prometheus.io/path: "_status/vars" prometheus.io/port: "8080" - # This service only exists to create DNS entries for each pet in the petset such that they can resolve - # each other's IP addresses. It does not create a load-balanced ClusterIP and should not be used - # directly by clients in most circumstances. - name: cockroachdb - labels: - app: cockroachdb spec: ports: - port: 26257 @@ -59,6 +67,43 @@ spec: app: cockroachdb annotations: pod.alpha.kubernetes.io/initialized: "true" + # Init containers are run only once in the lifetime of a pod, before + # it's started up for the first time. It has to exit successfully + # before the pod's main containers are allowed to start. + # This particular init container does a DNS lookup for other pods in + # the petset to help determine whether or not a cluster already exists. + # If any other pets exist, it creates a file in the cockroach-data + # directory to pass that information along to the primary container that + # has to decide what command-line flags to use when starting CockroachDB. + # This only matters when a pod's persistent volume is empty - if it has + # data from a previous execution, that data will always be used. + pod.alpha.kubernetes.io/init-containers: '[ + { + "name": "bootstrap", + "image": "cockroachdb/cockroach-k8s-init:0.1", + "args": [ + "-on-start=/on-start.sh", + "-service=cockroachdb" + ], + "env": [ + { + "name": "POD_NAMESPACE", + "valueFrom": { + "fieldRef": { + "apiVersion": "v1", + "fieldPath": "metadata.namespace" + } + } + } + ], + "volumeMounts": [ + { + "name": "datadir", + "mountPath": "/cockroach/cockroach-data" + } + ] + } + ]' spec: containers: - name: cockroachdb @@ -94,24 +139,20 @@ spec: # The use of qualified `hostname -f` is crucial: # Other nodes aren't able to look up the unqualified hostname. CRARGS=("start" "--logtostderr" "--insecure" "--host" "$(hostname -f)" "--http-host" "0.0.0.0") - # TODO(tschottdorf): really want to use an init container to do - # the bootstrapping. The idea is that the container would know - # whether it's on the first node and could check whether there's - # already a data directory. If not, it would bootstrap the cluster. - # We will need some version of `cockroach init` back for this to - # work. For now, just do the same in a shell snippet. - # Of course this isn't without danger - if node0 loses its data, - # upon restarting it will simply bootstrap a new cluster and smack - # it into our existing cluster. - # There are likely ways out. For example, the init container could - # query the kubernetes API and see whether any other nodes are - # around, etc. Or, of course, the admin can pre-seed the lost - # volume somehow (and in that case we should provide a better way, - # for example a marker file). + # We only want to initialize a new cluster (by omitting the join flag) + # if we're sure that we're the first node (i.e. index 0) and that + # there aren't any other nodes running as part of the cluster that + # this is supposed to be a part of (which indicates that a cluster + # already exists and we should make sure not to create a new one). + # It's fine to run without --join on a restart if there aren't any + # other nodes. if [ ! "$(hostname)" == "cockroachdb-0" ] || \ - [ -e "/cockroach/cockroach-data/COCKROACHDB_VERSION" ] + [ -e "/cockroach/cockroach-data/cluster_exists_marker" ] then - CRARGS+=("--join" "cockroachdb") + # We don't join cockroachdb in order to avoid a node attempting + # to join itself, which currently doesn't work + # (https://github.com/cockroachdb/cockroach/issues/9625). + CRARGS+=("--join" "cockroachdb-public") fi exec /cockroach/cockroach ${CRARGS[*]} # No pre-stop hook is required, a SIGTERM plus some time is all that's diff --git a/cloud/kubernetes/init/Dockerfile b/cloud/kubernetes/init/Dockerfile new file mode 100644 index 000000000000..4e204ffd9953 --- /dev/null +++ b/cloud/kubernetes/init/Dockerfile @@ -0,0 +1,6 @@ +FROM gcr.io/google_containers/peer-finder:0.1 + +ADD on-start.sh / +RUN chmod -c 755 /on-start.sh + +ENTRYPOINT ["/peer-finder"] diff --git a/cloud/kubernetes/init/README.md b/cloud/kubernetes/init/README.md new file mode 100644 index 000000000000..b5372add6a7e --- /dev/null +++ b/cloud/kubernetes/init/README.md @@ -0,0 +1,30 @@ +# Overview + +The Dockerfile in this directory defines a lightweight wrapper around the +[Kubernetes-maintained "peer-finder" +image](https://github.com/kubernetes/contrib/tree/master/pets/peer-finder), +which finds whether any other instances from the same PetSet currently exist in +the cluster. + +The `on-start.sh` script in this directory is invoked by the peer-finder binary +with a newline separated list of the DNS results matching the provided +Kubernetes service name and namespace. + +We use this to try to help the first CockroachDB instance decide whether it +should try to join an existing cluster or initialize a new one. We have to be +very careful about initializing a new one, since doing so when one alread +exists can cause some real problems. + +# Pushing a new version + +Assuming you're logged in to a Docker Hub account that can push to the +cockroachdb organization, [check the latest tag of the +cockroachdb/cockroach-k8s-init +container](https://hub.docker.com/r/cockroachdb/cockroach-k8s-init/tags/) so +that you know what tag number to use next, then cd to this directory and run: + +```shell +NEW_TAG=0.0 # replace 0.0 with the next appropriate tag number +docker build -t "cockroachdb/cockroach-k8s-init:${NEW_TAG}" . +docker push "cockroachdb/cockroach-k8s-init:${NEW_TAG}" +``` diff --git a/cloud/kubernetes/init/on-start.sh b/cloud/kubernetes/init/on-start.sh new file mode 100755 index 000000000000..51e93c9f22a7 --- /dev/null +++ b/cloud/kubernetes/init/on-start.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +# Simply determine if any instances exist other than this one. If there are any +# others, then assume that a cluster already exists and create a marker to +# signal that we shouldn't create a new one. +if grep -v `hostname -f`; then + mkdir -p cockroach/cockroach-data && touch cockroach/cockroach-data/cluster_exists_marker +fi