diff --git a/docs/stability-test-cookbook.md b/docs/stability-test-cookbook.md new file mode 100644 index 00000000000..e163811d0b3 --- /dev/null +++ b/docs/stability-test-cookbook.md @@ -0,0 +1,54 @@ +# Stability Test Cookbook + +> Important notes: this guide is under heavy development and have complicated enviroment pre-requesites, things are ought to change in the future. + +The following commands assumes you are in the `tidb-operator` working directory: +```shell +# image will be tagged as YOUR_DOCKER_REGISTRY/pingcap/tidb-operator-stability-test:latest +$ export DOCKER_REGISTRY=${YOUR_DOCKER_REGISTRY} +$ make stability-test-push +$ kubectl apply -f ./tests/manifests/stability/stability-configmap.yaml +# edit the stability.yaml and change .spec.template.spec.containers[].image to the pushed image +$ vi ./tests/manifests/stability/stability.yaml +# apply the stability test pod +$ kubectl apply -f ./tests/manifests/stability/stability.yaml +``` + +## Alternative: run stability test in your local environment + +Deploy & witness flow can be tedious when developing stability-test, this document introduce that how to run stability-test out of the cluster(your local machine, usually) while still operating the remote cluster. + +### TL;DR: +```shell +$ telepresence --new-deployment ${POD_NAME} +$ go build -o stability ./tests/cmd/stability/main.go +$ ./stability --operator-repo-dir=${ABITRARY_EMPTY_DIR_TO_CLONE_OPERATOR_REPO} --kubeconfig=${YOUR_KUBE_CONFIG_PATH} +``` + +### Explained + +Generally we have three problems to solve: + +1. **Out of cluster client**: Now we try to load configs in the following order: + * if `kubeconfig` command line option provided, use it + * if `KUBECONFIG` env variable set, use it + * try loading `InClusterConfig()` +so you have to specify the `kubeconfig` path by either command line option or env variable if you want to test locally. +2. **Privilege issue**: If you don't want to or cannot run stability test with root privilege, change the working dir or create it in advance: + * git repo dir can be overridden by option `--git-repo-dir=xxxx`, but helm dir must be created manually. +```shell +# helm dir +$ mkdir /charts +$ chmod 777 /charts +# git repo dir if you don't set command line option +$ mkdir /tidb-operator +$ chmod 777 /tidb-operator +``` +3. **DNS and network issue**: Two-way proxy using Telepresence. We cannot resolve cluster dns name and access cluster ip easily, `telepresence` helps with that, it creates a proxy pod in the cluster and open a vpn connection to kubernetes cluster via this pod. Just run ([full documentations](https://www.telepresence.io/reference/install)): +```shell +$ brew cask install osxfuse +$ brew install datawire/blackbird/telepresence +$ telepresence --new-deployment ${POD_NAME} +``` +**PS**: If you cannot resolve cluster dns names after set up, try clear DNS cache. +**PSS**: Typically you can't use telepresence VPN mode with other VPNs (of course SSR is ok). diff --git a/tests/actions.go b/tests/actions.go index 301663a12cb..02af71ef7aa 100644 --- a/tests/actions.go +++ b/tests/actions.go @@ -234,10 +234,10 @@ func (oi *OperatorConfig) OperatorHelmSetString(m map[string]string) string { func (oa *operatorActions) DeployOperator(info *OperatorConfig) error { if info.Tag != "e2e" { - if err := cloneOperatorRepo(); err != nil { + if err := oa.cloneOperatorRepo(); err != nil { return err } - if err := checkoutTag(info.Tag); err != nil { + if err := oa.checkoutTag(info.Tag); err != nil { return err } } @@ -280,7 +280,7 @@ func (oa *operatorActions) CleanOperatorOrDie(info *OperatorConfig) { } func (oa *operatorActions) UpgradeOperator(info *OperatorConfig) error { - if err := checkoutTag(info.Tag); err != nil { + if err := oa.checkoutTag(info.Tag); err != nil { return err } @@ -1318,8 +1318,8 @@ func releaseIsNotFound(err error) bool { return strings.Contains(err.Error(), "not found") } -func cloneOperatorRepo() error { - cmd := fmt.Sprintf("git clone https://github.com/pingcap/tidb-operator.git /tidb-operator") +func (oa *operatorActions) cloneOperatorRepo() error { + cmd := fmt.Sprintf("git clone https://github.com/pingcap/tidb-operator.git %s", oa.cfg.OperatorRepoDir) glog.Info(cmd) res, err := exec.Command("/bin/sh", "-c", cmd).CombinedOutput() if err != nil && !strings.Contains(string(res), "already exists") { @@ -1329,15 +1329,15 @@ func cloneOperatorRepo() error { return nil } -func checkoutTag(tagName string) error { - cmd := fmt.Sprintf(`cd /tidb-operator && +func (oa *operatorActions) checkoutTag(tagName string) error { + cmd := fmt.Sprintf(`cd %s && git stash -u && git checkout %s && mkdir -p /charts/%s && cp -rf charts/tidb-operator /charts/%s/tidb-operator && cp -rf charts/tidb-cluster /charts/%s/tidb-cluster && cp -rf charts/tidb-backup /charts/%s/tidb-backup`, - tagName, tagName, tagName, tagName, tagName) + oa.cfg.OperatorRepoDir, tagName, tagName, tagName, tagName, tagName) glog.Info(cmd) res, err := exec.Command("/bin/sh", "-c", cmd).CombinedOutput() if err != nil { diff --git a/tests/cmd/stability/main.go b/tests/cmd/stability/main.go index f3c105bf670..e9a5733eefa 100644 --- a/tests/cmd/stability/main.go +++ b/tests/cmd/stability/main.go @@ -21,17 +21,16 @@ import ( "github.com/golang/glog" "github.com/jinzhu/copier" + "github.com/pingcap/tidb-operator/tests/pkg/client" "k8s.io/apiserver/pkg/util/logs" "github.com/pingcap/tidb-operator/tests" "github.com/pingcap/tidb-operator/tests/backup" - "github.com/pingcap/tidb-operator/tests/pkg/client" ) func main() { logs.InitLogs() defer logs.FlushLogs() - go func() { glog.Info(http.ListenAndServe("localhost:6060", nil)) }() diff --git a/tests/config.go b/tests/config.go index ff182258c79..7af7be0a73e 100644 --- a/tests/config.go +++ b/tests/config.go @@ -7,7 +7,7 @@ import ( "strings" "github.com/golang/glog" - yaml "gopkg.in/yaml.v2" + "gopkg.in/yaml.v2" ) // Config defines the config of operator tests @@ -22,6 +22,9 @@ type Config struct { Nodes []Nodes `yaml:"nodes" json:"nodes"` ETCDs []Nodes `yaml:"etcds" json:"etcds"` APIServers []Nodes `yaml:"apiservers" json:"apiservers"` + + // For local test + OperatorRepoDir string `yaml:"operator_repo_dir" json:"operator_repo_dir"` } // Nodes defines a series of nodes that belong to the same physical node. @@ -39,6 +42,8 @@ func NewConfig() *Config { flag.StringVar(&cfg.TidbVersions, "tidb-versions", "v2.1.3,v2.1.4", "tidb versions") flag.StringVar(&cfg.OperatorTag, "operator-tag", "master", "operator tag used to choose charts") flag.StringVar(&cfg.OperatorImage, "operator-image", "pingcap/tidb-operator:latest", "operator image") + flag.StringVar(&cfg.OperatorRepoDir, "operator-repo-dir", "/tidb-operator", "local directory to which tidb-operator cloned") + flag.Parse() return cfg } diff --git a/tests/pkg/client/client.go b/tests/pkg/client/client.go index 9c9e6f4354e..080d07d79aa 100644 --- a/tests/pkg/client/client.go +++ b/tests/pkg/client/client.go @@ -1,6 +1,9 @@ package client import ( + "flag" + "fmt" + "os" "time" "github.com/juju/errors" @@ -11,31 +14,45 @@ import ( "k8s.io/client-go/tools/clientcmd" ) +var ( + masterUrl string + kubeconfigPath string +) + +func init() { + flag.StringVar(&kubeconfigPath, "kubeconfig", "", + "path to a kubeconfig. Only required if out-of-cluster.") + flag.StringVar(&masterUrl, "master", "", + "address of the Kubernetes API server. Overrides any value in kubeconfig. "+ + "Only required if out-of-cluster.") +} + func NewCliOrDie() (versioned.Interface, kubernetes.Interface) { - cfg, err := rest.InClusterConfig() + cfg, err := GetConfig() if err != nil { panic(err) } - cfg.Timeout = 30 * time.Second - cli, err := versioned.NewForConfig(cfg) - if err != nil { - panic(err) - } + return buildClientsOrDie(cfg) +} - kubeCli, err := kubernetes.NewForConfig(cfg) - if err != nil { - panic(err) +func GetConfig() (*rest.Config, error) { + // If kubeconfigPath provided, use that + if len(kubeconfigPath) > 0 { + return clientcmd.BuildConfigFromFlags(masterUrl, kubeconfigPath) + } + // If an env variable is specified with the config locaiton, use that + if len(os.Getenv("KUBECONFIG")) > 0 { + return clientcmd.BuildConfigFromFlags(masterUrl, os.Getenv("KUBECONFIG")) + } + // If no explicit location, try the in-cluster config + if c, err := rest.InClusterConfig(); err == nil { + return c, nil } - return cli, kubeCli + return nil, fmt.Errorf("could not locate a kubeconfig") } -var ( - masterUrl string - kubeconfigPath string -) - type Client interface { kubernetes.Interface PingcapV1alpha1() v1alpha1.PingcapV1alpha1Interface @@ -74,3 +91,18 @@ func LoadConfig() (*rest.Config, error) { cfg, err := clientcmd.BuildConfigFromFlags(masterUrl, kubeconfigPath) return cfg, errors.Trace(err) } + +func buildClientsOrDie(cfg *rest.Config) (versioned.Interface, kubernetes.Interface) { + cfg.Timeout = 30 * time.Second + cli, err := versioned.NewForConfig(cfg) + if err != nil { + panic(err) + } + + kubeCli, err := kubernetes.NewForConfig(cfg) + if err != nil { + panic(err) + } + + return cli, kubeCli +}