Skip to content

Commit

Permalink
E2E test for kfctl should deploy kubeflow with basic auth (kubeflow#2705
Browse files Browse the repository at this point in the history
)

* The E2E test attempts to call delete but delete isn't working.

  * kfctl delete fails because there is no kubeconfig file created.
  * The delete step is marked as an expected failure.

Improve logging in kfctl with go binary
   * Log the name and status of GCP DM operations
   * Print out DM creation errors so users see things like quota issues.

Fix kubeflow#2706 - The coordinator commands should only invoke the commands apply/generate/delete for the resources specified.
  * There was a bug in the switch statements and we were always calling
    generate/apply/delete for platform & k8s and not respecting the parameter

Attempt to fix kubeflow#2706

  * We want to eliminate the need to talk to a K8s server when calling ks init
  * Hardcode the server address to 127.0.0.1
  * Hardcode K8s version to 1.11.7
  * The communication with the K8s server was coming because we were trying
    to talk to the K8s master to get the K8s version but we don't need to
    do that if we specify it.

* Add filename log hook to kfctl so that we can emit the filename and line
  number where errors occur.

* On init we Don't need to call GetConfig which tries to read the kubeconfig file on generate.

Add retries to generate and apply because running under argo kfctl seems
to randomly exit.
  • Loading branch information
jlewi authored and k8s-ci-robot committed Mar 16, 2019
1 parent d860215 commit e4c6f89
Show file tree
Hide file tree
Showing 12 changed files with 305 additions and 43 deletions.
9 changes: 9 additions & 0 deletions bootstrap/cmd/kfctl/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,17 @@ package main

import (
"github.com/kubeflow/kubeflow/bootstrap/cmd/kfctl/cmd"
"github.com/onrik/logrus/filename"
log "github.com/sirupsen/logrus"
)

func init() {
// Add filename as one of the fields of the structured log message.
filenameHook := filename.NewHook()
filenameHook.Field = "filename"
log.AddHook(filenameHook)
}

func main() {
cmd.Execute()
}
4 changes: 4 additions & 0 deletions bootstrap/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ require (
github.com/Masterminds/semver v1.4.2 // indirect
github.com/Masterminds/sprig v2.18.0+incompatible // indirect
github.com/NYTimes/gziphandler v1.0.1 // indirect
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc // indirect
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf // indirect
github.com/blang/semver v3.5.1+incompatible // indirect
github.com/cenkalti/backoff v2.1.1+incompatible
github.com/coreos/etcd v3.3.11+incompatible // indirect
Expand Down Expand Up @@ -57,6 +59,7 @@ require (
github.com/opencontainers/go-digest v1.0.0-rc1 // indirect
github.com/pkg/errors v0.8.1 // indirect
github.com/prometheus/client_golang v0.9.2
github.com/prometheus/common v0.0.0-20181126121408-4724e9255275
github.com/russross/blackfriday v0.0.0-00010101000000-000000000000 // indirect
github.com/shazow/go-diff v0.0.0-20160112020656-b6b7b6733b8c // indirect
github.com/sirupsen/logrus v1.3.0
Expand All @@ -71,6 +74,7 @@ require (
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c // indirect
google.golang.org/api v0.1.0
google.golang.org/genproto v0.0.0-20190111180523-db91494dd46c
gopkg.in/alecthomas/kingpin.v2 v2.2.6 // indirect
gopkg.in/natefinch/lumberjack.v2 v2.0.0 // indirect
gopkg.in/resty.v1 v1.11.0
gopkg.in/square/go-jose.v2 v2.3.0 // indirect
Expand Down
84 changes: 84 additions & 0 deletions bootstrap/go.sum

Large diffs are not rendered by default.

73 changes: 55 additions & 18 deletions bootstrap/pkg/client/coordinator/coordinator.go
Original file line number Diff line number Diff line change
Expand Up @@ -319,12 +319,7 @@ type coordinator struct {
}

func (kfapp *coordinator) Apply(resources kftypes.ResourceEnum, options map[string]interface{}) error {
switch resources {
case kftypes.K8S:
fallthrough
case kftypes.PLATFORM:
fallthrough
case kftypes.ALL:
platform := func() error {
if kfapp.Client.Spec.Platform != "" {
platform := kfapp.Platforms[kfapp.Client.Spec.Platform]
if platform != nil {
Expand All @@ -337,23 +332,35 @@ func (kfapp *coordinator) Apply(resources kftypes.ResourceEnum, options map[stri
return fmt.Errorf("%v not in Platforms", kfapp.Client.Spec.Platform)
}
}
return nil
}

k8s := func() error {
for packageManagerName, packageManager := range kfapp.PackageManagers {
packageManagerErr := packageManager.Apply(kftypes.K8S, options)
if packageManagerErr != nil {
return fmt.Errorf("kfApp Apply failed for %v: %v", packageManagerName, packageManagerErr)
}
}
return nil
}

switch resources {
case kftypes.ALL:
if err := platform(); err != nil {
return err
}
return k8s()
case kftypes.PLATFORM:
return platform()
case kftypes.K8S:
return k8s()
}
return nil
}

func (kfapp *coordinator) Delete(resources kftypes.ResourceEnum, options map[string]interface{}) error {
switch resources {
case kftypes.K8S:
fallthrough
case kftypes.PLATFORM:
fallthrough
case kftypes.ALL:
platform := func() error {
if kfapp.Client.Spec.Platform != "" {
platform := kfapp.Platforms[kfapp.Client.Spec.Platform]
if platform != nil {
Expand All @@ -366,23 +373,36 @@ func (kfapp *coordinator) Delete(resources kftypes.ResourceEnum, options map[str
return fmt.Errorf("%v not in Platforms", kfapp.Client.Spec.Platform)
}
}
return nil
}

k8s := func() error {
for packageManagerName, packageManager := range kfapp.PackageManagers {
packageManagerErr := packageManager.Delete(kftypes.K8S, options)
if packageManagerErr != nil {
return fmt.Errorf("kfApp Delete failed for %v: %v", packageManagerName, packageManagerErr)
}
}
return nil
}

switch resources {
case kftypes.ALL:
if err := platform(); err != nil {
return err
}
return k8s()
case kftypes.PLATFORM:
return platform()
case kftypes.K8S:
return k8s()
}
return nil
}

func (kfapp *coordinator) Generate(resources kftypes.ResourceEnum, options map[string]interface{}) error {
switch resources {
case kftypes.K8S:
fallthrough
case kftypes.PLATFORM:
fallthrough
case kftypes.ALL:

platform := func() error {
if kfapp.Client.Spec.Platform != "" {
platform := kfapp.Platforms[kfapp.Client.Spec.Platform]
if platform != nil {
Expand All @@ -395,12 +415,29 @@ func (kfapp *coordinator) Generate(resources kftypes.ResourceEnum, options map[s
return fmt.Errorf("%v not in Platforms", kfapp.Client.Spec.Platform)
}
}
return nil
}

k8s := func() error {
for packageManagerName, packageManager := range kfapp.PackageManagers {
packageManagerErr := packageManager.Generate(kftypes.K8S, options)
if packageManagerErr != nil {
return fmt.Errorf("coordinator Generate failed for %v: %v", packageManagerName, packageManagerErr)
}
}
return nil
}

switch resources {
case kftypes.ALL:
if err := platform(); err != nil {
return err
}
return k8s()
case kftypes.PLATFORM:
return platform()
case kftypes.K8S:
return k8s()
}
return nil
}
Expand Down
13 changes: 10 additions & 3 deletions bootstrap/pkg/client/gcp/gcp.go
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,12 @@ func blockingWait(project string, opName string, deploymentmanagerService *deplo
name := "" + opName
return backoff.Retry(func() error {
op, err := deploymentmanagerService.Operations.Get(p, name).Context(ctx).Do()

if op.Error != nil {
for _, e := range op.Error.Errors {
log.Errorf("Deployment error: %+v", e)
}
}
if op.Status == "DONE" {
if op.HttpErrorStatusCode > 0 {
return backoff.Permanent(fmt.Errorf("Deployment error(%v): %v",
Expand All @@ -322,9 +328,9 @@ func blockingWait(project string, opName string, deploymentmanagerService *deplo
} else if err != nil {
return backoff.Permanent(fmt.Errorf("Deployment error: %v", err))
}
log.Warnf("Deployment service is not ready: %v", op.Status)
log.Warnf("Deployment operation name: %v status: %v", op.Name, op.Status)
name = op.Name
return fmt.Errorf("Deployment is not ready: %v", op.Status)
return fmt.Errorf("Deployment operation did not succeed; name: %v status: %v", op.Name, op.Status)
}, backoff.NewExponentialBackOff())
}

Expand Down Expand Up @@ -354,13 +360,14 @@ func (gcp *Gcp) updateDeployment(deployment string, yamlfile string) error {
resp, err := deploymentmanagerService.Deployments.Get(project, deployment).Context(ctx).Do()
if err == nil {
dp.Fingerprint = resp.Fingerprint
log.Infof("Updating deployment %v", deployment)
op, updateErr := deploymentmanagerService.Deployments.Update(project, deployment, dp).Context(ctx).Do()
if updateErr != nil {
return fmt.Errorf("Update deployment error: %v", updateErr)
}
return blockingWait(project, op.Name, deploymentmanagerService, ctx)
} else {
log.Infof("Get deployment error, creating: %v", err)
log.Infof("Creating deployment %v", deployment)
op, insertErr := deploymentmanagerService.Deployments.Insert(project, dp).Context(ctx).Do()
if insertErr != nil {
return fmt.Errorf("Insert deployment error: %v", insertErr)
Expand Down
13 changes: 6 additions & 7 deletions bootstrap/pkg/client/ksonnet/ksonnet.go
Original file line number Diff line number Diff line change
Expand Up @@ -388,8 +388,7 @@ func setNameVal(entries []configtypes.NameValue, name string, val string) {
func (ksApp *ksApp) Generate(resources kftypes.ResourceEnum, options map[string]interface{}) error {
log.Infof("Ksonnet.Generate Name %v AppDir %v Platform %v", ksApp.KsApp.Name,
ksApp.KsApp.Spec.AppDir, ksApp.KsApp.Spec.Platform)
config := kftypes.GetConfig()
initErr := ksApp.initKs(config)
initErr := ksApp.initKs()
if initErr != nil {
return fmt.Errorf("couldn't initialize KfApi: %v", initErr)
}
Expand All @@ -409,8 +408,6 @@ func (ksApp *ksApp) Generate(resources kftypes.ResourceEnum, options map[string]
setNameVal(ksApp.KsApp.Spec.ComponentParams["application"], "components",
"["+strings.Join(components, " ,")+"]")

log.Infof("Configs for generation: %+v", config)

ksRegistry := kstypes.DefaultRegistry
ksRegistry.Version = ksApp.KsApp.Spec.Version
ksRegistry.RegUri = ksApp.KsApp.Spec.Repo
Expand Down Expand Up @@ -484,16 +481,18 @@ func (ksApp *ksApp) Init(resources kftypes.ResourceEnum, options map[string]inte
return nil
}

func (ksApp *ksApp) initKs(config *rest.Config) error {
func (ksApp *ksApp) initKs() error {
newRoot := path.Join(ksApp.KsApp.Spec.AppDir, ksApp.KsName)
ksApp.KsEnvName = kstypes.KsEnvName
k8sSpec := kftypes.GetServerVersion(kftypes.GetClientset(config))
// We hard code the K8s spec because we won't have a cluster to talk to when calling init.
k8sSpec := "version:v1.11.7"
options := map[string]interface{}{
actions.OptionFs: afero.NewOsFs(),
actions.OptionName: ksApp.KsName,
actions.OptionEnvName: ksApp.KsEnvName,
actions.OptionNewRoot: newRoot,
actions.OptionServer: config.Host,
// Using local host appears to fool ksonnet on init. We will add a new environment later.
actions.OptionServer: "127.0.0.1",
actions.OptionSpecFlag: k8sSpec,
actions.OptionNamespace: ksApp.KsApp.Namespace,
actions.OptionSkipDefaultRegistries: true,
Expand Down
2 changes: 1 addition & 1 deletion bootstrap/pkg/utils/iamutils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ func Test(t *testing.T) {
// Arguments for GetUpdatedPolicy function.
currentPolicy *cloudresourcemanager.Policy
// service account policy pending change
saPolicy *cloudresourcemanager.Policy
saPolicy *cloudresourcemanager.Policy

// Expected output policy
expectedPolicy *cloudresourcemanager.Policy
Expand Down
26 changes: 26 additions & 0 deletions testing/kfctl/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import pytest

def pytest_addoption(parser):
parser.addoption(
"--app_path", action="store", default="",
help="Path where the KF application should be stored")

parser.addoption(
"--kfctl_path", action="store", default="",
help="Path to kfctl.")

parser.addoption(
"--project", action="store", default="kubeflow-ci-deployment",
help="GCP project to deploy Kubeflow to")

@pytest.fixture
def app_path(request):
return request.config.getoption("--app_path")

@pytest.fixture
def kfctl_path(request):
return request.config.getoption("--kfctl_path")

@pytest.fixture
def project(request):
return request.config.getoption("--project")
43 changes: 43 additions & 0 deletions testing/kfctl/kfctl_delete_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""Run kfctl delete as a pytest.
We use this in order to generate a junit_xml file.
"""
import datetime
import logging
import os
import subprocess
import tempfile
import uuid
from retrying import retry

import pytest

from kubeflow.testing import util

# TODO(): Need to make delete work with a KUBECONFIG file.
@pytest.mark.xfail
def test_kfctl_delete(kfctl_path, app_path, project):
if not kfctl_path:
raise ValueError("kfctl_path is required")

if not app_path:
raise ValueError("app_path is required")

logging.info("Using kfctl path %s", kfctl_path)
logging.info("Using app path %s", app_path)

util.run([kfctl_path, "delete", "-V", "all"], cwd=app_path)

# Delete the storage
app_name = os.path.basename(app_path)
util.run(["gcloud", "deployment-manager", "--project=" + project,
"deployments", "delete", app_name + "-storage", "--quiet"])

if __name__ == "__main__":
logging.basicConfig(level=logging.INFO,
format=('%(levelname)s|%(asctime)s'
'|%(pathname)s|%(lineno)d| %(message)s'),
datefmt='%Y-%m-%dT%H:%M:%S',
)
logging.getLogger().setLevel(logging.INFO)
pytest.main()
40 changes: 34 additions & 6 deletions testing/kfctl/kfctl_go_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import datetime
import logging
import os
import subprocess
import tempfile
import uuid
from retrying import retry
Expand All @@ -9,17 +10,44 @@

from kubeflow.testing import util

# We need to use retry builds because when building in the test cluster
# we see intermittent failures pulling dependencies
@retry(stop_max_attempt_number=7)
def build(build_dir):
util.run(["make", "build-kfctl"], cwd=build_dir)
def run_with_retries(*args, **kwargs):
util.run(*args, **kwargs)

def test_build_kfctl_go():
def test_build_kfctl_go(app_path, project):
if not app_path:
logging.info("--app_path not specified")
stamp = datetime.datetime.now().strftime("%H%M")
app_path = os.path.join(tempfile.gettempdir(),
"kfctl-{0}-{1}".format(stamp,
uuid.uuid4().hex[0:4]))
logging.info("Using app path %s", app_path)
this_dir = os.path.dirname(__file__)
root = os.path.abspath(os.path.join(this_dir, "..", ".."))
build_dir = os.path.join(root, "bootstrap")
build(build_dir)

# We need to use retry builds because when building in the test cluster
# we see intermittent failures pulling dependencies
run_with_retries(["make", "build-kfctl"], cwd=build_dir)
kfctl_path = os.path.join(build_dir, "bin", "kfctl")

# We don't want the password to show up in the logs because the logs
# are public. So we use subprocess and not util.run
subprocess.check_call([kfctl_path, "init", app_path, "-V", "--platform=gcp",
"--use_basic_auth", "--skip-init-gcp-project",
"--project=" + project,
"--basic_auth_username=kf-test-user",
"--basic_auth_password=" + uuid.uuid4().hex])

# TODO(jlewi): We need to specify a valid email otherwise we get an error
# when trying to apply the IAM policy.
run_with_retries([kfctl_path, "generate", "-V", "all",
"[email protected]"],
cwd=app_path)

# We need to use retries because if we don't we see random failures
# where kfctl just appears to die.
run_with_retries([kfctl_path, "apply", "-V", "all"], cwd=app_path)

if __name__ == "__main__":
logging.basicConfig(level=logging.INFO,
Expand Down
Loading

0 comments on commit e4c6f89

Please sign in to comment.