From 6ed118bc91a1d94175716810754549c3ade61109 Mon Sep 17 00:00:00 2001 From: Gibbs Cullen Date: Fri, 8 May 2020 12:56:50 -0400 Subject: [PATCH] edits to operator section --- .../how_to_guides/monitoring_m3/_index.md | 2 +- docs-beta/content/how_to_guides/other/sql.md | 6 - .../architecture/coordinator.md | 3 +- .../configurations/annotated_config.md | 2 + .../apis/_index.md} | 4 +- .../configurations/apis/ingest.md | 6 + .../configurations/apis/operator.md | 175 ++++++++++++++ .../configurations/apis/query.md | 6 + .../configurations/operator/_index.md | 24 ++ .../operator/configuration/_index.md | 21 ++ .../operator/configuration/managing_nodes.md | 42 ++++ .../operator/configuration/namespace.md | 225 ++++++++++++++++++ .../operator/getting_started/_index.md | 22 ++ .../operator/getting_started/install.md | 20 ++ .../getting_started/managing_cluster.md} | 94 +------- .../operator/getting_started/monitoring.md | 11 + 16 files changed, 567 insertions(+), 96 deletions(-) delete mode 100644 docs-beta/content/how_to_guides/other/sql.md rename docs-beta/content/reference_docs/{apis.md => configurations/apis/_index.md} (99%) create mode 100644 docs-beta/content/reference_docs/configurations/apis/ingest.md create mode 100644 docs-beta/content/reference_docs/configurations/apis/operator.md create mode 100644 docs-beta/content/reference_docs/configurations/apis/query.md create mode 100644 docs-beta/content/reference_docs/configurations/operator/_index.md create mode 100644 docs-beta/content/reference_docs/configurations/operator/configuration/_index.md create mode 100644 docs-beta/content/reference_docs/configurations/operator/configuration/managing_nodes.md create mode 100644 docs-beta/content/reference_docs/configurations/operator/configuration/namespace.md create mode 100644 docs-beta/content/reference_docs/configurations/operator/getting_started/_index.md create mode 100644 docs-beta/content/reference_docs/configurations/operator/getting_started/install.md rename docs-beta/content/reference_docs/{operator.md => configurations/operator/getting_started/managing_cluster.md} (57%) create mode 100644 docs-beta/content/reference_docs/configurations/operator/getting_started/monitoring.md diff --git a/docs-beta/content/how_to_guides/monitoring_m3/_index.md b/docs-beta/content/how_to_guides/monitoring_m3/_index.md index b77984f37f..15fdd1f3cf 100644 --- a/docs-beta/content/how_to_guides/monitoring_m3/_index.md +++ b/docs-beta/content/how_to_guides/monitoring_m3/_index.md @@ -1,5 +1,5 @@ --- -title: "Monitoring_m3" +title: "Monitoring M3" date: 2020-04-21T20:56:58-04:00 draft: true --- diff --git a/docs-beta/content/how_to_guides/other/sql.md b/docs-beta/content/how_to_guides/other/sql.md deleted file mode 100644 index 8ae1d3ac11..0000000000 --- a/docs-beta/content/how_to_guides/other/sql.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -title: "Querying using SQL" -date: 2020-04-21T20:50:09-04:00 -draft: true ---- - diff --git a/docs-beta/content/reference_docs/architecture/coordinator.md b/docs-beta/content/reference_docs/architecture/coordinator.md index bd4007ed04..4fbe3ce66e 100644 --- a/docs-beta/content/reference_docs/architecture/coordinator.md +++ b/docs-beta/content/reference_docs/architecture/coordinator.md @@ -2,5 +2,4 @@ title: "M3 Coordinator" date: 2020-04-21T21:01:05-04:00 draft: true ---- - +-- \ No newline at end of file diff --git a/docs-beta/content/reference_docs/configurations/annotated_config.md b/docs-beta/content/reference_docs/configurations/annotated_config.md index e565d4cfbc..1ea54572aa 100644 --- a/docs-beta/content/reference_docs/configurations/annotated_config.md +++ b/docs-beta/content/reference_docs/configurations/annotated_config.md @@ -4,3 +4,5 @@ date: 2020-04-21T21:01:32-04:00 draft: true --- +Link to Yaml: https://github.com/chronosphereio/collector/blob/master/config/chronocollector/config.yml + diff --git a/docs-beta/content/reference_docs/apis.md b/docs-beta/content/reference_docs/configurations/apis/_index.md similarity index 99% rename from docs-beta/content/reference_docs/apis.md rename to docs-beta/content/reference_docs/configurations/apis/_index.md index 381dcf5a37..ebe4fb5781 100644 --- a/docs-beta/content/reference_docs/apis.md +++ b/docs-beta/content/reference_docs/configurations/apis/_index.md @@ -1,6 +1,6 @@ --- -title: "APIs" -date: 2020-04-21T21:02:36-04:00 +title: "Apis" +date: 2020-05-08T12:41:49-04:00 draft: true --- diff --git a/docs-beta/content/reference_docs/configurations/apis/ingest.md b/docs-beta/content/reference_docs/configurations/apis/ingest.md new file mode 100644 index 0000000000..d888da7c02 --- /dev/null +++ b/docs-beta/content/reference_docs/configurations/apis/ingest.md @@ -0,0 +1,6 @@ +--- +title: "Ingest APIs" +date: 2020-05-08T12:42:14-04:00 +draft: true +--- + diff --git a/docs-beta/content/reference_docs/configurations/apis/operator.md b/docs-beta/content/reference_docs/configurations/apis/operator.md new file mode 100644 index 0000000000..f0d96bfe74 --- /dev/null +++ b/docs-beta/content/reference_docs/configurations/apis/operator.md @@ -0,0 +1,175 @@ +--- +title: "Operator API" +date: 2020-05-08T12:42:20-04:00 +draft: true +--- + +API Docs +This document enumerates the Custom Resource Definitions used by the M3DB Operator. It is auto-generated from code comments. + +Table of Contents +ClusterCondition +ClusterSpec +IsolationGroup +M3DBCluster +M3DBClusterList +M3DBStatus +NodeAffinityTerm +IndexOptions +Namespace +NamespaceOptions +RetentionOptions +PodIdentity +PodIdentityConfig +ClusterCondition +ClusterCondition represents various conditions the cluster can be in. + +Field Description Scheme Required +type Type of cluster condition. ClusterConditionType false +status Status of the condition (True, False, Unknown). corev1.ConditionStatus false +lastUpdateTime Last time this condition was updated. string false +lastTransitionTime Last time this condition transitioned from one status to another. string false +reason Reason this condition last changed. string false +message Human-friendly message about this condition. string false +Back to TOC + +ClusterSpec +ClusterSpec defines the desired state for a M3 cluster to be converge to. + +Field Description Scheme Required +image Image specifies which docker image to use with the cluster string false +replicationFactor ReplicationFactor defines how many replicas int32 false +numberOfShards NumberOfShards defines how many shards in total int32 false +isolationGroups IsolationGroups specifies a map of key-value pairs. Defines which isolation groups to deploy persistent volumes for data nodes []IsolationGroup false +namespaces Namespaces specifies the namespaces this cluster will hold. []Namespace false +etcdEndpoints EtcdEndpoints defines the etcd endpoints to use for service discovery. Must be set if no custom configmap is defined. If set, etcd endpoints will be templated in to the default configmap template. []string false +keepEtcdDataOnDelete KeepEtcdDataOnDelete determines whether the operator will remove cluster metadata (placement + namespaces) in etcd when the cluster is deleted. Unless true, etcd data will be cleared when the cluster is deleted. bool false +enableCarbonIngester EnableCarbonIngester enables the listener port for the carbon ingester bool false +configMapName ConfigMapName specifies the ConfigMap to use for this cluster. If unset a default configmap with template variables for etcd endpoints will be used. See \"Configuring M3DB\" in the docs for more. *string false +podIdentityConfig PodIdentityConfig sets the configuration for pod identity. If unset only pod name and UID will be used. *PodIdentityConfig false +containerResources Resources defines memory / cpu constraints for each container in the cluster. corev1.ResourceRequirements false +dataDirVolumeClaimTemplate DataDirVolumeClaimTemplate is the volume claim template for an M3DB instance's data. It claims PersistentVolumes for cluster storage, volumes are dynamically provisioned by when the StorageClass is defined. *corev1.PersistentVolumeClaim false +podSecurityContext PodSecurityContext allows the user to specify an optional security context for pods. *corev1.PodSecurityContext false +securityContext SecurityContext allows the user to specify a container-level security context. *corev1.SecurityContext false +imagePullSecrets ImagePullSecrets will be added to every pod. []corev1.LocalObjectReference false +envVars EnvVars defines custom environment variables to be passed to M3DB containers. []corev1.EnvVar false +labels Labels sets the base labels that will be applied to resources created by the cluster. // TODO(schallert): design doc on labeling scheme. map[string]string false +annotations Annotations sets the base annotations that will be applied to resources created by the cluster. map[string]string false +tolerations Tolerations sets the tolerations that will be applied to all M3DB pods. []corev1.Toleration false +priorityClassName PriorityClassName sets the priority class for all M3DB pods. string false +nodeEndpointFormat NodeEndpointFormat allows overriding of the endpoint used for a node in the M3DB placement. Defaults to \"{{ .PodName }}.{{ .M3DBService }}:{{ .Port }}\". Useful if access to the cluster from other namespaces is desired. See \"Node Endpoint\" docs for full variables available. string false +hostNetwork HostNetwork indicates whether M3DB pods should run in the same network namespace as the node its on. This option should be used sparingly due to security concerns outlined in the linked documentation. https://kubernetes.io/docs/concepts/policy/pod-security-policy/#host-namespaces bool false +dnsPolicy DNSPolicy allows the user to set the pod's DNSPolicy. This is often used in conjunction with HostNetwork.+optional *corev1.DNSPolicy false +externalCoordinatorSelector Specify a \"controlling\" coordinator for the cluster It is expected that there is a separate standalone coordinator cluster It is externally managed - not managed by this operator It is expected to have a service endpoint Setup this db cluster, but do not assume a co-located coordinator Instead provide a selector here so we can point to a separate coordinator service Specify here the labels required for the selector map[string]string false +initContainers Custom setup for db nodes can be done via initContainers Provide the complete spec for the initContainer here If any storage volumes are needed in the initContainer see InitVolumes below []corev1.Container false +initVolumes If the InitContainers require any storage volumes Provide the complete specification for the required Volumes here []corev1.Volume false +podMetadata PodMetadata is for any Metadata that is unique to the pods, and does not belong on any other objects, such as Prometheus scrape tags metav1.ObjectMeta false +parallelPodManagement ParallelPodManagement sets StatefulSets created by the operator to have Parallel pod management instead of OrderedReady. This is an EXPERIMENTAL flag and subject to deprecation in a future release. This has not been tested in production and users should not depend on it without validating it for their own use case. bool true +Back to TOC + +IsolationGroup +IsolationGroup defines the name of zone as well attributes for the zone configuration + +Field Description Scheme Required +name Name is the value that will be used in StatefulSet labels, pod labels, and M3DB placement \"isolationGroup\" fields. string true +nodeAffinityTerms NodeAffinityTerms is an array of NodeAffinityTerm requirements, which are ANDed together to indicate what nodes an isolation group can be assigned to. []NodeAffinityTerm false +numInstances NumInstances defines the number of instances. int32 true +storageClassName StorageClassName is the name of the StorageClass to use for this isolation group. This allows ensuring that PVs will be created in the same zone as the pinned statefulset on Kubernetes < 1.12 (when topology aware volume scheduling was introduced). Only has effect if the clusters dataDirVolumeClaimTemplate is non-nil. If set, the volume claim template will have its storageClassName field overridden per-isolationgroup. If unset the storageClassName of the volumeClaimTemplate will be used. string false +Back to TOC + +M3DBCluster +M3DBCluster defines the cluster + +Field Description Scheme Required +metadata metav1.ObjectMeta false +type string true +spec ClusterSpec true +status M3DBStatus false +Back to TOC + +M3DBClusterList +M3DBClusterList represents a list of M3DB Clusters + +Field Description Scheme Required +metadata metav1.ListMeta false +items []M3DBCluster true +Back to TOC + +M3DBStatus +M3DBStatus contains the current state the M3DB cluster along with a human readable message + +Field Description Scheme Required +state State is a enum of green, yellow, and red denoting the health of the cluster M3DBState false +conditions Various conditions about the cluster. []ClusterCondition false +message Message is a human readable message indicating why the cluster is in it's current state string false +observedGeneration ObservedGeneration is the last generation of the cluster the controller observed. Kubernetes will automatically increment metadata.Generation every time the cluster spec is changed. int64 false +Back to TOC + +NodeAffinityTerm +NodeAffinityTerm represents a node label and a set of label values, any of which can be matched to assign a pod to a node. + +Field Description Scheme Required +key Key is the label of the node. string true +values Values is an array of values, any of which a node can have for a pod to be assigned to it. []string true +Back to TOC + +IndexOptions +IndexOptions defines parameters for indexing. + +Field Description Scheme Required +enabled Enabled controls whether metric indexing is enabled. bool false +blockSize BlockSize controls the index block size. string false +Back to TOC + +Namespace +Namespace defines an M3DB namespace or points to a preset M3DB namespace. + +Field Description Scheme Required +name Name is the namespace name. string false +preset Preset indicates preset namespace options. string false +options Options points to optional custom namespace configuration. *NamespaceOptions false +Back to TOC + +NamespaceOptions +NamespaceOptions defines parameters for an M3DB namespace. See https://m3db.github.io/m3/operational_guide/namespace_configuration/ for more details. + +Field Description Scheme Required +bootstrapEnabled BootstrapEnabled control if bootstrapping is enabled. bool false +flushEnabled FlushEnabled controls whether flushing is enabled. bool false +writesToCommitLog WritesToCommitLog controls whether commit log writes are enabled. bool false +cleanupEnabled CleanupEnabled controls whether cleanups are enabled. bool false +repairEnabled RepairEnabled controls whether repairs are enabled. bool false +snapshotEnabled SnapshotEnabled controls whether snapshotting is enabled. bool false +retentionOptions RetentionOptions sets the retention parameters. RetentionOptions false +indexOptions IndexOptions sets the indexing parameters. IndexOptions false +Back to TOC + +RetentionOptions +RetentionOptions defines parameters for data retention. + +Field Description Scheme Required +retentionPeriod RetentionPeriod controls how long data for the namespace is retained. string false +blockSize BlockSize controls the block size for the namespace. string false +bufferFuture BufferFuture controls how far in the future metrics can be written. string false +bufferPast BufferPast controls how far in the past metrics can be written. string false +blockDataExpiry BlockDataExpiry controls the block expiry. bool false +blockDataExpiryAfterNotAccessPeriod BlockDataExpiry controls the not after access period for expiration. string false +Back to TOC + +PodIdentity +PodIdentity contains all the fields that may be used to identify a pod's identity in the M3DB placement. Any non-empty fields will be used to identity uniqueness of a pod for the purpose of M3DB replace operations. + +Field Description Scheme Required +name string false +uid string false +nodeName string false +nodeExternalID string false +nodeProviderID string false +Back to TOC + +PodIdentityConfig +PodIdentityConfig contains cluster-level configuration for deriving pod identity. + +Field Description Scheme Required +sources Sources enumerates the sources from which to derive pod identity. Note that a pod's name will always be used. If empty, defaults to pod name and UID. []PodIdentitySource true +Back to TOC \ No newline at end of file diff --git a/docs-beta/content/reference_docs/configurations/apis/query.md b/docs-beta/content/reference_docs/configurations/apis/query.md new file mode 100644 index 0000000000..5b7b5c402e --- /dev/null +++ b/docs-beta/content/reference_docs/configurations/apis/query.md @@ -0,0 +1,6 @@ +--- +title: "Query APIs" +date: 2020-05-08T12:42:09-04:00 +draft: true +--- + diff --git a/docs-beta/content/reference_docs/configurations/operator/_index.md b/docs-beta/content/reference_docs/configurations/operator/_index.md new file mode 100644 index 0000000000..79ce8e30c0 --- /dev/null +++ b/docs-beta/content/reference_docs/configurations/operator/_index.md @@ -0,0 +1,24 @@ +--- +title: "Operator" +date: 2020-05-08T12:43:53-04:00 +draft: true +--- + +Introduction +Welcome to the documentation for the M3DB operator, a Kubernetes operator for running the open-source timeseries database M3DB on Kubernetes. + +Please note that this is alpha software, and as such its APIs and behavior are subject to breaking changes. While we aim to produce thoroughly tested reliable software there may be undiscovered bugs. + +For more background on the M3DB operator, see our KubeCon keynote on its origins and usage at Uber. + +Philosophy +The M3DB operator aims to automate everyday tasks around managing M3DB. Specifically, it aims to automate: + +Creating M3DB clusters +Destroying M3DB clusters +Expanding clusters (adding instances) +Shrinking clusters (removing instances) +Replacing failed instances +It explicitly does not try to automate every single edge case a user may ever run into. For example, it does not aim to automate disaster recovery if an entire cluster is taken down. Such use cases may still require human intervention, but the operator will aim to not conflict with such operations a human may have to take on a cluster. + +Generally speaking, the operator's philosophy is if it would be unclear to a human what action to take, we will not try to guess. \ No newline at end of file diff --git a/docs-beta/content/reference_docs/configurations/operator/configuration/_index.md b/docs-beta/content/reference_docs/configurations/operator/configuration/_index.md new file mode 100644 index 0000000000..aca74e5688 --- /dev/null +++ b/docs-beta/content/reference_docs/configurations/operator/configuration/_index.md @@ -0,0 +1,21 @@ +--- +title: "Configuration" +date: 2020-05-08T12:49:38-04:00 +draft: true +--- + +Configuring M3DB +By default the operator will apply a configmap with basic M3DB options and settings for the coordinator to direct Prometheus reads/writes to the cluster. This template can be found here. + +To apply custom a configuration for the M3DB cluster, one can set the configMapName parameter of the cluster spec to an existing configmap. + +Environment Warning +If providing a custom config map, the env you specify in your config must be $NAMESPACE/$NAME, where $NAMESPACE is the Kubernetes namespace your cluster is in and $NAME is the name of the cluster. For example, with the following cluster: + +apiVersion: operator.m3db.io/v1alpha1 +kind: M3DBCluster +metadata: + name: cluster-a + namespace: production +... +The value of env in your config MUST be production/cluster-a. This restriction allows multiple M3DB clusters to safely share the same etcd cluster. \ No newline at end of file diff --git a/docs-beta/content/reference_docs/configurations/operator/configuration/managing_nodes.md b/docs-beta/content/reference_docs/configurations/operator/configuration/managing_nodes.md new file mode 100644 index 0000000000..770df5ba2f --- /dev/null +++ b/docs-beta/content/reference_docs/configurations/operator/configuration/managing_nodes.md @@ -0,0 +1,42 @@ +--- +title: "Managing nodes" +date: 2020-05-08T12:47:10-04:00 +draft: true +--- + +Pod Identity +Motivation +M3DB assumes that if a process is started and owns sealed shards marked as Available that its data for those shards is valid and does not have to be fetched from peers. Consequentially this means it will begin serving reads for that data. For more background on M3DB topology, see the M3DB topology docs. + +In most environments in which M3DB has been deployed in production, it has been on a set of hosts predetermined by whomever is managing the cluster. This means that an M3DB instance is identified in a toplogy by its hostname, and that when an M3DB process comes up and finds its hostname in the cluster with Available shards that it can serve reads for those shards. + +This does not work on Kubernetes, particularly when working with StatefulSets, as a pod may be rescheduled on a new node or with new storage attached but its name may stay the same. If we were to naively use an instance's hostname (pod name), and it were to get rescheduled on a new node with no data, it could assume that absence of data is valid and begin returning empty results for read requests. + +To account for this, the M3DB Operator determines an M3DB instance's identity in the topology based on a configurable set of metadata about the pod. + +Configuration +The M3DB operator uses a configurable set of metadata about a pod to determine its identity in the M3DB placement. This is encapsulated in the PodIdentityConfig field of a cluster's spec. In addition to the configures sources, a pod's name will always be included. + +Every pod in an M3DB cluster is annotated with its identity and is passed to the M3DB instance via a downward API volume. + +Sources +This section will be filled out as a number of pending PRs land. + +Recommendations +No Persistent Storage +If not using PVs, you should set sources to PodUID: + +podIdentityConfig: + sources: + - PodUID +This way whenever a container is rescheduled, the operator will initiate a replace and it will stream data from its peers before serving reads. Note that not having persistent storage is not a recommended way to run M3DB. + +Remote Persistent Storage +If using remote storage you do not need to set sources, as it will default to just the pods name. The data for an M3DB instance will move around with its container. + +Local Persistent Storage +If using persistent local volumes, you should set sources to NodeName. In this configuration M3DB will consider a pod to be the same so long as it's on the same node. Replaces will only be triggered if a pod with the same name is moved to a new host. + +Note that if using local SSDs on GKE, node names may stay the same even though a VM has been recreated. We also support ProviderID, which will use the underlying VM's unique ID number in GCE to identity host uniqueness. + + diff --git a/docs-beta/content/reference_docs/configurations/operator/configuration/namespace.md b/docs-beta/content/reference_docs/configurations/operator/configuration/namespace.md new file mode 100644 index 0000000000..2bddf07502 --- /dev/null +++ b/docs-beta/content/reference_docs/configurations/operator/configuration/namespace.md @@ -0,0 +1,225 @@ +--- +title: "Namespace" +date: 2020-05-08T12:46:59-04:00 +draft: true +--- + +Namespaces +M3DB uses the concept of namespaces to determine how metrics are stored and retained. The M3DB operator allows a user to define their own namespaces, or to use a set of presets we consider to be suitable for production use cases. + +Namespaces are configured as part of an m3dbcluster spec. + +Presets +10s:2d +This preset will store metrics at 10 second resolution for 2 days. For example, in your cluster spec: + +spec: +... + namespaces: + - name: metrics-short-term + preset: 10s:2d +1m:40d +This preset will store metrics at 1 minute resolution for 40 days. + +spec: +... + namespaces: + - name: metrics-long-term + preset: 1m:40d +Custom Namespaces +You can also define your own custom namespaces by setting the NamespaceOptions within a cluster spec. The API lists all available fields. As an example, a namespace to store 7 days of data may look like: + +... +spec: +... + namespaces: + - name: custom-7d + options: + bootstrapEnabled: true + flushEnabled: true + writesToCommitLog: true + cleanupEnabled: true + snapshotEnabled: true + repairEnabled: false + retentionOptions: + retentionPeriod: 168h + blockSize: 12h + bufferFuture: 20m + bufferPast: 20m + blockDataExpiry: true + blockDataExpiryAfterNotAccessPeriod: 5m + indexOptions: + enabled: true + blockSize: 12h + + +Node Affinity & Cluster Topology +Node Affinity +Kubernetes allows pods to be assigned to nodes based on various critera through node affinity. + +M3DB was built with failure tolerance as a core feature. M3DB's isolation groups allow shards to be placed across failure domains such that the loss of no single domain can cause the cluster to lose quorum. More details on M3DB's resiliency can be found in the deployment docs. + +By leveraging Kubernetes' node affinity and M3DB's isolation groups, the operator can guarantee that M3DB pods are distributed across failure domains. For example, in a Kubernetes cluster spread across 3 zones in a cloud region, the isolationGroups configuration below would guarantee that no single zone failure could degrade the M3DB cluster. + +M3DB is unaware of the underlying zone topology: it just views the isolation groups as group1, group2, group3 in its placement. Thanks to the Kubernetes scheduler, however, these groups are actually scheduled across separate failure domains. + +apiVersion: operator.m3db.io/v1alpha1 +kind: M3DBCluster +... +spec: + replicationFactor: 3 + isolationGroups: + - name: group1 + numInstances: 3 + nodeAffinityTerms: + - key: failure-domain.beta.kubernetes.io/zone + values: + - us-east1-b + - name: group2 + numInstances: 3 + nodeAffinityTerms: + - key: failure-domain.beta.kubernetes.io/zone + values: + - us-east1-c + - name: group3 + numInstances: 3 + nodeAffinityTerms: + - key: failure-domain.beta.kubernetes.io/zone + values: + - us-east1-d +Tolerations +In addition to allowing pods to be assigned to certain nodes via node affinity, Kubernetes allows pods to be repelled from nodes through taints if they don't tolerate the taint. For example, the following config would ensure: + +Pods are spread across zones. + +Pods are only assigned to nodes in the m3db-dedicated-pool pool. + +No other pods could be assigned to those nodes (assuming they were tainted with the taint m3db-dedicated-taint). + +apiVersion: operator.m3db.io/v1alpha1 +kind: M3DBCluster +... +spec: + replicationFactor: 3 + isolationGroups: + - name: group1 + numInstances: 3 + nodeAffinityTerms: + - key: failure-domain.beta.kubernetes.io/zone + values: + - us-east1-b + - key: nodepool + values: + - m3db-dedicated-pool + - name: group2 + numInstances: 3 + nodeAffinityTerms: + - key: failure-domain.beta.kubernetes.io/zone + values: + - us-east1-c + - key: nodepool + values: + - m3db-dedicated-pool + - name: group3 + numInstances: 3 + nodeAffinityTerms: + - key: failure-domain.beta.kubernetes.io/zone + values: + - us-east1-d + - key: nodepool + values: + - m3db-dedicated-pool + tolerations: + - key: m3db-dedicated + effect: NoSchedule + operator: Exists +Example Affinity Configurations +Zonal Cluster +The examples so far have focused on multi-zone Kubernetes clusters. Some users may only have a cluster in a single zone and accept the reduced fault tolerance. The following configuration shows how to configure the operator in a zonal cluster. + +apiVersion: operator.m3db.io/v1alpha1 +kind: M3DBCluster +... +spec: + replicationFactor: 3 + isolationGroups: + - name: group1 + numInstances: 3 + nodeAffinityTerms: + - key: failure-domain.beta.kubernetes.io/zone + values: + - us-east1-b + - name: group2 + numInstances: 3 + nodeAffinityTerms: + - key: failure-domain.beta.kubernetes.io/zone + values: + - us-east1-b + - name: group3 + numInstances: 3 + nodeAffinityTerms: + - key: failure-domain.beta.kubernetes.io/zone + values: + - us-east1-b +6 Zone Cluster +In the above examples we created clusters with 1 isolation group in each of 3 zones. Because values within a single NodeAffinityTerm are OR'd, we can also spread an isolationgroup across multiple zones. For example, if we had 6 zones available to us: + +apiVersion: operator.m3db.io/v1alpha1 +kind: M3DBCluster +... +spec: + replicationFactor: 3 + isolationGroups: + - name: group1 + numInstances: 3 + nodeAffinityTerms: + - key: failure-domain.beta.kubernetes.io/zone + values: + - us-east1-a + - us-east1-b + - name: group2 + numInstances: 3 + nodeAffinityTerms: + - key: failure-domain.beta.kubernetes.io/zone + values: + - us-east1-c + - us-east1-d + - name: group3 + numInstances: 3 + nodeAffinityTerms: + - key: failure-domain.beta.kubernetes.io/zone + values: + - us-east1-e + - us-east1-f +No Affinity +If there are no failure domains available, one can have a cluster with no affinity where the pods will be scheduled however Kubernetes would place them by default: + +apiVersion: operator.m3db.io/v1alpha1 +kind: M3DBCluster +... +spec: + replicationFactor: 3 + isolationGroups: + - name: group1 + numInstances: 3 + - name: group2 + numInstances: 3 + - name: group3 + numInstances: 3 + +Node Endpoint +M3DB stores an endpoint field on placement instances that is used for communication between DB nodes and from other components such as the coordinator. + +The operator allows customizing the format of this endpoint by setting the nodeEndpointFormat field on a cluster spec. The format of this field uses Go templates, with the following template fields currently supported: + +Field Description +PodName Name of the pod +M3DBService Name of the generated M3DB service +PodNamespace Namespace the pod is in +Port Port M3DB is serving RPCs on +The default format is: + +{{ .PodName }}.{{ .M3DBService }}:{{ .Port }} +As an example of an override, to expose an M3DB cluster to containers in other Kubernetes namespaces nodeEndpointFormat can be set to: + +{{ .PodName }}.{{ .M3DBService }}.{{ .PodNamespace }}:{{ .Port }} \ No newline at end of file diff --git a/docs-beta/content/reference_docs/configurations/operator/getting_started/_index.md b/docs-beta/content/reference_docs/configurations/operator/getting_started/_index.md new file mode 100644 index 0000000000..62e7b28ea8 --- /dev/null +++ b/docs-beta/content/reference_docs/configurations/operator/getting_started/_index.md @@ -0,0 +1,22 @@ +--- +title: "Getting Started" +date: 2020-05-08T12:49:48-04:00 +draft: true +--- + +Requirements +Kubernetes Versions +The M3DB operator current targets Kubernetes 1.11 and 1.12. Given the operator's current production use cases at Uber, we typically target the two most recent minor Kubernetes versions supported by GKE. We welcome community contributions to support more recent versions while meeting the aforementioned GKE targets! + +Multi-Zone Kubernetes Cluster +The M3DB operator is intended to be used with Kubernetes clusters that span at least 3 zones within a region to create highly available clusters and maintain quorum in the event of region failures. Instructions for creating regional clusters on GKE can be found here. + +Etcd +M3DB stores its cluster topology and all other runtime metadata in etcd. + +For testing / non-production use cases, we provide simple manifests for running etcd on Kubernetes in our example manifests: one for running ephemeral etcd containers and one for running etcd using basic persistent volumes. If using the etcd-pd yaml manifest, we recommend a modification to use a StorageClass equivalent to your cloud provider's fastest remote disk (such as pd-ssd on GCP). + +For production use cases, we recommend running etcd (in order of preference): + +External to your Kubernetes cluster to avoid circular dependencies. +Using the etcd operator. \ No newline at end of file diff --git a/docs-beta/content/reference_docs/configurations/operator/getting_started/install.md b/docs-beta/content/reference_docs/configurations/operator/getting_started/install.md new file mode 100644 index 0000000000..dd90b0a9bf --- /dev/null +++ b/docs-beta/content/reference_docs/configurations/operator/getting_started/install.md @@ -0,0 +1,20 @@ +--- +title: "Install" +date: 2020-05-08T12:46:04-04:00 +draft: true +--- + +Installation +Be sure to take a look at the requirements before installing the operator. + +Helm +Add the m3db-operator repo: +helm repo add m3db https://m3-helm-charts.storage.googleapis.com/stable +Install the m3db-operator chart: +helm install m3db/m3db-operator --namespace m3db-operator +Note: If uninstalling an instance of the operator that was installed with Helm, some resources such as the ClusterRole, ClusterRoleBinding, and ServiceAccount may need to be deleted manually. + +Manually +Install the bundled operator manifests in the current namespace: + +kubectl apply -f https://raw.githubusercontent.com/m3db/m3db-operator/master/bundle.yaml diff --git a/docs-beta/content/reference_docs/operator.md b/docs-beta/content/reference_docs/configurations/operator/getting_started/managing_cluster.md similarity index 57% rename from docs-beta/content/reference_docs/operator.md rename to docs-beta/content/reference_docs/configurations/operator/getting_started/managing_cluster.md index c04dfc1106..194250bb80 100644 --- a/docs-beta/content/reference_docs/operator.md +++ b/docs-beta/content/reference_docs/configurations/operator/getting_started/managing_cluster.md @@ -1,74 +1,21 @@ --- -title: "M3Operator" -date: 2020-04-21T21:02:41-04:00 +title: "Managing cluster" +date: 2020-05-08T12:46:31-04:00 draft: true --- -### Requirements -#### Kubernetes Versions -The M3DB operator current targets Kubernetes 1.11 and 1.12. Given the operator's current production use cases at Uber, we typically target the two most recent minor Kubernetes versions supported by GKE. We welcome community contributions to support more recent versions while meeting the aforementioned GKE targets! - -#### Multi-Zone Kubernetes Cluster -The M3DB operator is intended to be used with Kubernetes clusters that span at least 3 zones within a region to create highly available clusters and maintain quorum in the event of region failures. Instructions for creating regional clusters on GKE can be found here. - -#### Etcd -M3DB stores its cluster topology and all other runtime metadata in etcd. - -For testing / non-production use cases, we provide simple manifests for running etcd on Kubernetes in our example manifests: one for running ephemeral etcd containers and one for running etcd using basic persistent volumes. If using the etcd-pd yaml manifest, we recommend a modification to use a StorageClass equivalent to your cloud provider's fastest remote disk (such as pd-ssd on GCP). - -For production use cases, we recommend running etcd (in order of preference): - -External to your Kubernetes cluster to avoid circular dependencies. -Using the etcd operator. - - -### Introduction -Welcome to the documentation for the M3DB operator, a Kubernetes operator for running the open-source timeseries database M3DB on Kubernetes. - -Please note that this is alpha software, and as such its APIs and behavior are subject to breaking changes. While we aim to produce thoroughly tested reliable software there may be undiscovered bugs. - -For more background on the M3DB operator, see our KubeCon keynote on its origins and usage at Uber. - -#### Philosophy -The M3DB operator aims to automate everyday tasks around managing M3DB. Specifically, it aims to automate: - -Creating M3DB clusters -Destroying M3DB clusters -Expanding clusters (adding instances) -Shrinking clusters (removing instances) -Replacing failed instances -It explicitly does not try to automate every single edge case a user may ever run into. For example, it does not aim to automate disaster recovery if an entire cluster is taken down. Such use cases may still require human intervention, but the operator will aim to not conflict with such operations a human may have to take on a cluster. - -Generally speaking, the operator's philosophy is if it would be unclear to a human what action to take, we will not try to guess. - -#### Installation -Be sure to take a look at the requirements before installing the operator. - -#### Helm -Add the m3db-operator repo: -helm repo add m3db https://m3-helm-charts.storage.googleapis.com/stable -Install the m3db-operator chart: -helm install m3db/m3db-operator --namespace m3db-operator -Note: If uninstalling an instance of the operator that was installed with Helm, some resources such as the ClusterRole, ClusterRoleBinding, and ServiceAccount may need to be deleted manually. - -#### Manually -Install the bundled operator manifests in the current namespace: - -kubectl apply -f https://raw.githubusercontent.com/m3db/m3db-operator/master/bundle.yaml - -#### Creating a Cluster +Creating a Cluster Once you've installed the M3DB operator and read over the requirements, you can start creating some M3DB clusters! -#### Basic Cluster +Basic Cluster The following creates an M3DB cluster spread across 3 zones, with each M3DB instance being able to store up to 350gb of data using your Kubernetes cluster's default storage class. For examples of different cluster topologies, such as zonal clusters, see the docs on node affinity. -#### Etcd +Etcd Create an etcd cluster with persistent volumes: kubectl apply -f https://raw.githubusercontent.com/m3db/m3db-operator/v0.6.0/example/etcd/etcd-pd.yaml We recommend modifying the storageClassName in the manifest to one that matches your cloud provider's fastest remote storage option, such as pd-ssd on GCP. -### M3DB apiVersion: operator.m3db.io/v1alpha1 kind: M3DBCluster @@ -117,10 +64,10 @@ spec: storage: 350Gi limits: storage: 350Gi -#### Ephemeral Cluster +Ephemeral Cluster WARNING: This setup is not intended for production-grade clusters, but rather for "kicking the tires" with the operator and M3DB. It is intended to work across almost any Kubernetes environment, and as such has as few dependencies as possible (namely persistent storage). See below for instructions on creating a more durable cluster. -### Etcd +Etcd Create an etcd cluster in the same namespace your M3DB cluster will be created in. If you don't have persistent storage available, this will create a cluster that will not use persistent storage and will likely become unavailable if any of the pods die: kubectl apply -f https://raw.githubusercontent.com/m3db/m3db-operator/v0.6.0/example/etcd/etcd-basic.yaml @@ -189,8 +136,7 @@ We can verify that the cluster has finished streaming data by peers by checking $ kubectl exec simple-cluster-rep2-0 -- curl -sSf localhost:9002/health {"ok":true,"status":"up","bootstrapped":true} - -### Deleting a Cluster +Deleting a Cluster Delete your M3DB cluster with kubectl: kubectl delete m3dbcluster simple-cluster @@ -198,7 +144,7 @@ By default, the operator will delete the placement and namespaces associated wit Under the hood, the operator uses Kubernetes finalizers to ensure the cluster CRD is not deleted until the operator has had a chance to do cleanup. -### Debugging Stuck Cluster Deletion +Debugging Stuck Cluster Deletion If for some reason the operator is unable to delete the placement and namespace for the cluster, the cluster CRD itself will be stuck in a state where it can not be deleted, due to the way finalizers work in Kubernetes. The operator might be unable to clean up the data for many reasons, for example if the M3DB cluster itself is not available to serve the APIs for cleanup or if etcd is down and cannot fulfill the deleted. To allow the CRD to be deleted, you can kubectl edit m3dbcluster $CLUSTER and remove the operator.m3db.io/etcd-deletion finalizer. For example, in the following cluster you'd remove the finalizer from metadata.finalizers: @@ -215,25 +161,3 @@ Note that if you do this, you'll have to manually remove the relevant data in et _sd.placement/$NS/$CLUSTER/m3db _kv/$NS/$CLUSTER/m3db.node.namespaces - -### Monitoring -M3DB exposes metrics via a Prometheus endpoint. If using the Prometheus Operator, you can apply a ServiceMonitor to have your M3DB pods automatically scraped by Prometheus: - -kubectl apply -f https://raw.githubusercontent.com/m3db/m3db-operator/master/example/prometheus-servicemonitor.yaml - -### Configuring M3DB -By default the operator will apply a configmap with basic M3DB options and settings for the coordinator to direct Prometheus reads/writes to the cluster. This template can be found here. - -To apply custom a configuration for the M3DB cluster, one can set the configMapName parameter of the cluster spec to an existing configmap. - -### Environment Warning -If providing a custom config map, the env you specify in your config must be $NAMESPACE/$NAME, where $NAMESPACE is the Kubernetes namespace your cluster is in and $NAME is the name of the cluster. For example, with the following cluster: - -apiVersion: operator.m3db.io/v1alpha1 -kind: M3DBCluster -metadata: - name: cluster-a - namespace: production -... -The value of env in your config MUST be production/cluster-a. This restriction allows multiple M3DB clusters to safely share the same etcd cluster. - diff --git a/docs-beta/content/reference_docs/configurations/operator/getting_started/monitoring.md b/docs-beta/content/reference_docs/configurations/operator/getting_started/monitoring.md new file mode 100644 index 0000000000..ef9b3ce37d --- /dev/null +++ b/docs-beta/content/reference_docs/configurations/operator/getting_started/monitoring.md @@ -0,0 +1,11 @@ +--- +title: "Monitoring" +date: 2020-05-08T12:46:15-04:00 +draft: true +--- + +Monitoring +M3DB exposes metrics via a Prometheus endpoint. If using the Prometheus Operator, you can apply a ServiceMonitor to have your M3DB pods automatically scraped by Prometheus: + +kubectl apply -f https://raw.githubusercontent.com/m3db/m3db-operator/master/example/prometheus-servicemonitor.yaml +You can visit the "targets" page of the Prometheus UI to verify the pods are being scraped. To view these metrics using Grafana, follow the M3 docs to install the M3DB Grafana dashboard. \ No newline at end of file