From e1769644da257f0f1d6e2d854bd0755f3bdc1922 Mon Sep 17 00:00:00 2001 From: nikita15p Date: Tue, 18 Jun 2019 10:43:39 +0530 Subject: [PATCH 01/26] Lint fix for e2e package except dot import lint failures --- test/e2e/cli_util.go | 5 +++++ test/e2e/mpi.go | 1 + test/e2e/util.go | 5 +++-- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/test/e2e/cli_util.go b/test/e2e/cli_util.go index 341ab6de93..7e1dd0a95c 100644 --- a/test/e2e/cli_util.go +++ b/test/e2e/cli_util.go @@ -24,6 +24,7 @@ import ( . "github.com/onsi/gomega" ) +//ResumeJob resumes the job in the given namespace func ResumeJob(name string, namespace string) string { command := []string{"job", "resume"} Expect(name).NotTo(Equal(""), "Job name should not be empty in Resume job command") @@ -34,6 +35,7 @@ func ResumeJob(name string, namespace string) string { return RunCliCommand(command) } +//SuspendJob suspends the job in the given namepsace func SuspendJob(name string, namespace string) string { command := []string{"job", "suspend"} Expect(name).NotTo(Equal(""), "Job name should not be empty in Suspend job command") @@ -44,6 +46,7 @@ func SuspendJob(name string, namespace string) string { return RunCliCommand(command) } +//ListJobs list all the jobs in the given namespace func ListJobs(namespace string) string { command := []string{"job", "list"} if namespace != "" { @@ -52,6 +55,7 @@ func ListJobs(namespace string) string { return RunCliCommand(command) } +//DeleteJob delete the job in the given namespace func DeleteJob(name string, namespace string) string { command := []string{"job", "delete"} Expect(name).NotTo(Equal(""), "Job name should not be empty in delete job command") @@ -62,6 +66,7 @@ func DeleteJob(name string, namespace string) string { return RunCliCommand(command) } +//RunCliCommand runs the volcano command func RunCliCommand(command []string) string { if masterURL() != "" { command = append(command, "--master", masterURL()) diff --git a/test/e2e/mpi.go b/test/e2e/mpi.go index 268123498a..0fc57b9953 100644 --- a/test/e2e/mpi.go +++ b/test/e2e/mpi.go @@ -13,6 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ + package e2e import ( diff --git a/test/e2e/util.go b/test/e2e/util.go index 03af6c8beb..f2b4e8a8ce 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -92,6 +92,7 @@ func kubeconfigPath(home string) string { return filepath.Join(home, ".kube", "config") // default kubeconfig path is $HOME/.kube/config } +//VolcanoCliBinary function gets the volcano cli binary func VolcanoCliBinary() string { if bin := os.Getenv("VK_BIN"); bin != "" { return filepath.Join(bin, "vkctl") @@ -974,6 +975,7 @@ func preparePatchBytesforNode(nodeName string, oldNode *v1.Node, newNode *v1.Nod return patchBytes, nil } +// IsNodeReady function returns the node ready status func IsNodeReady(node *v1.Node) bool { for _, c := range node.Status.Conditions { if c.Type == v1.NodeReady { @@ -987,9 +989,8 @@ func waitClusterReady(ctx *context) error { return wait.Poll(100*time.Millisecond, oneMinute, func() (bool, error) { if readyNodeAmount(ctx) >= 1 { return true, nil - } else { - return false, nil } + return false, nil }) } From 33655a10e61ab07d062e0bd3cdcd4226e14b75ef Mon Sep 17 00:00:00 2001 From: Thandayuthapani Date: Tue, 18 Jun 2019 15:24:11 +0530 Subject: [PATCH 02/26] UT cases for Job Package --- .../job/job_controller_actions_test.go | 543 ++++++++++++++++++ 1 file changed, 543 insertions(+) create mode 100644 pkg/controllers/job/job_controller_actions_test.go diff --git a/pkg/controllers/job/job_controller_actions_test.go b/pkg/controllers/job/job_controller_actions_test.go new file mode 100644 index 0000000000..042c6d3274 --- /dev/null +++ b/pkg/controllers/job/job_controller_actions_test.go @@ -0,0 +1,543 @@ +/* +Copyright 2019 The Volcano Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package job + +import ( + "fmt" + kbv1aplha1 "github.com/kubernetes-sigs/kube-batch/pkg/apis/scheduling/v1alpha1" + "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "testing" + "volcano.sh/volcano/pkg/apis/batch/v1alpha1" + "volcano.sh/volcano/pkg/controllers/apis" + "volcano.sh/volcano/pkg/controllers/job/state" +) + +func TestKillJobFunc(t *testing.T) { + namespace := "test" + + testcases := []struct { + Name string + Job *v1alpha1.Job + PodGroup *kbv1aplha1.PodGroup + PodRetainPhase state.PhaseMap + UpdateStatus state.UpdateStatusFn + JobInfo *apis.JobInfo + Services []v1.Service + ConfigMaps []v1.ConfigMap + Pods map[string]*v1.Pod + Plugins []string + ExpextVal error + }{ + { + Name: "KillJob Sucess Case", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + }, + PodGroup: &kbv1aplha1.PodGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + }, + PodRetainPhase: state.PodRetainPhaseNone, + UpdateStatus: nil, + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Pods: map[string]map[string]*v1.Pod{ + "task1": { + "pod1": buildPod(namespace, "pod1", v1.PodRunning, nil), + "pod2": buildPod(namespace, "pod2", v1.PodRunning, nil), + }, + }, + }, + Services: []v1.Service{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + }, + }, + ConfigMaps: []v1.ConfigMap{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "job1-ssh", + Namespace: namespace, + }, + }, + }, + Pods: map[string]*v1.Pod{ + "pod1": buildPod(namespace, "pod1", v1.PodRunning, nil), + "pod2": buildPod(namespace, "pod2", v1.PodRunning, nil), + }, + Plugins: []string{"svc", "ssh", "env"}, + ExpextVal: nil, + }, + } + + for i, testcase := range testcases { + + fakeController := newFakeController() + jobPlugins := make(map[string][]string) + + for _, service := range testcase.Services { + _, err := fakeController.kubeClients.CoreV1().Services(namespace).Create(&service) + if err != nil { + t.Error("Error While Creating Service") + } + } + + for _, configMap := range testcase.ConfigMaps { + _, err := fakeController.kubeClients.CoreV1().ConfigMaps(namespace).Create(&configMap) + if err != nil { + t.Error("Error While Creating ConfigMaps") + } + } + + for _, pod := range testcase.Pods { + _, err := fakeController.kubeClients.CoreV1().Pods(namespace).Create(pod) + if err != nil { + t.Error("Error While Creating ConfigMaps") + } + } + + _, err := fakeController.vkClients.BatchV1alpha1().Jobs(namespace).Create(testcase.Job) + if err != nil { + t.Error("Error While Creating Jobs") + } + err = fakeController.cache.Add(testcase.Job) + if err != nil { + t.Error("Error While Adding Job in cache") + } + + for _, plugin := range testcase.Plugins { + jobPlugins[plugin] = make([]string, 0) + } + + testcase.JobInfo.Job = testcase.Job + testcase.JobInfo.Job.Spec.Plugins = jobPlugins + + err = fakeController.killJob(testcase.JobInfo, testcase.PodRetainPhase, testcase.UpdateStatus) + if err != nil { + t.Errorf("Case %d (%s): expected: No Error, but got error %s", i, testcase.Name, err.Error()) + } + + for _, plugin := range testcase.Plugins { + + if plugin == "svc" { + _, err = fakeController.kubeClients.CoreV1().Services(namespace).Get(testcase.Job.Name, metav1.GetOptions{}) + if err == nil { + t.Errorf("Case %d (%s): expected: Service to be deleted, but not deleted because of error %s", i, testcase.Name, err.Error()) + } + } + + if plugin == "ssh" { + _, err := fakeController.kubeClients.CoreV1().ConfigMaps(namespace).Get(fmt.Sprint(testcase.Job.Name, "-ssh"), metav1.GetOptions{}) + if err == nil { + t.Errorf("Case %d (%s): expected: ConfigMap to be deleted, but not deleted because of error %s", i, testcase.Name, err.Error()) + } + } + } + } +} + +func TestCreateJobFunc(t *testing.T) { + namespace := "test" + + testcases := []struct { + Name string + Job *v1alpha1.Job + PodGroup *kbv1aplha1.PodGroup + UpdateStatus state.UpdateStatusFn + JobInfo *apis.JobInfo + Plugins []string + ExpextVal error + }{ + { + Name: "CreateJob Sucess Case", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + }, + PodGroup: &kbv1aplha1.PodGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + }, + UpdateStatus: nil, + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + }, + Plugins: []string{"svc", "ssh", "env"}, + ExpextVal: nil, + }, + } + + for i, testcase := range testcases { + + fakeController := newFakeController() + jobPlugins := make(map[string][]string) + + for _, plugin := range testcase.Plugins { + jobPlugins[plugin] = make([]string, 0) + } + testcase.JobInfo.Job = testcase.Job + testcase.JobInfo.Job.Spec.Plugins = jobPlugins + + _, err := fakeController.vkClients.BatchV1alpha1().Jobs(namespace).Create(testcase.Job) + if err != nil { + t.Errorf("Case %d (%s): expected: No Error, but got error %s", i, testcase.Name, err.Error()) + } + + err = fakeController.cache.Add(testcase.Job) + if err != nil { + t.Error("Error While Adding Job in cache") + } + + err = fakeController.createJob(testcase.JobInfo, testcase.UpdateStatus) + if err != nil { + t.Errorf("Case %d (%s): expected: No Error, but got error %s", i, testcase.Name, err.Error()) + } + + job, err := fakeController.vkClients.BatchV1alpha1().Jobs(namespace).Get(testcase.Job.Name, metav1.GetOptions{}) + if err != nil { + t.Errorf("Case %d (%s): expected: No Error, but got error %s", i, testcase.Name, err.Error()) + } + for _, plugin := range testcase.Plugins { + + if plugin == "svc" { + _, err = fakeController.kubeClients.CoreV1().Services(namespace).Get(testcase.Job.Name, metav1.GetOptions{}) + if err != nil { + t.Errorf("Case %d (%s): expected: Service to be created, but not created because of error %s", i, testcase.Name, err.Error()) + } + + _, err = fakeController.kubeClients.CoreV1().ConfigMaps(namespace).Get(fmt.Sprint(testcase.Job.Name, "-svc"), metav1.GetOptions{}) + if err != nil { + t.Errorf("Case %d (%s): expected: Service to be created, but not created because of error %s", i, testcase.Name, err.Error()) + } + + exist := job.Status.ControlledResources["plugin-svc"] + if exist == "" { + t.Errorf("Case %d (%s): expected: ControlledResources should be added, but not got added", i, testcase.Name) + } + } + + if plugin == "ssh" { + _, err := fakeController.kubeClients.CoreV1().ConfigMaps(namespace).Get(fmt.Sprint(testcase.Job.Name, "-ssh"), metav1.GetOptions{}) + if err != nil { + t.Errorf("Case %d (%s): expected: ConfigMap to be created, but not created because of error %s", i, testcase.Name, err.Error()) + } + exist := job.Status.ControlledResources["plugin-ssh"] + if exist == "" { + t.Errorf("Case %d (%s): expected: ControlledResources should be added, but not got added", i, testcase.Name) + } + } + if plugin == "env" { + exist := job.Status.ControlledResources["plugin-env"] + if exist == "" { + t.Errorf("Case %d (%s): expected: ControlledResources should be added, but not got added", i, testcase.Name) + } + } + } + } +} + +func TestSyncJobFunc(t *testing.T) { + namespace := "test" + + testcases := []struct { + Name string + Job *v1alpha1.Job + PodGroup *kbv1aplha1.PodGroup + PodRetainPhase state.PhaseMap + UpdateStatus state.UpdateStatusFn + JobInfo *apis.JobInfo + Pods map[string]*v1.Pod + Plugins []string + TotalNumPods int + ExpextVal error + }{ + { + Name: "SyncJob Sucess Case", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + Tasks: []v1alpha1.TaskSpec{ + { + Name: "task1", + Replicas: 6, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pods", + Namespace: namespace, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "Containers", + }, + }, + }, + }, + }, + }, + }, + }, + PodGroup: &kbv1aplha1.PodGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + }, + PodRetainPhase: state.PodRetainPhaseNone, + UpdateStatus: nil, + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Pods: map[string]map[string]*v1.Pod{ + "task1": { + "job1-task1-0": buildPod(namespace, "job1-task1-0", v1.PodRunning, nil), + "job1-task1-1": buildPod(namespace, "job1-task1-1", v1.PodRunning, nil), + }, + }, + }, + Pods: map[string]*v1.Pod{ + "job1-task1-0": buildPod(namespace, "job1-task1-0", v1.PodRunning, nil), + "job1-task1-1": buildPod(namespace, "job1-task1-1", v1.PodRunning, nil), + }, + TotalNumPods: 6, + Plugins: []string{"svc", "ssh", "env"}, + ExpextVal: nil, + }, + } + for i, testcase := range testcases { + + fakeController := newFakeController() + jobPlugins := make(map[string][]string) + + for _, plugin := range testcase.Plugins { + jobPlugins[plugin] = make([]string, 0) + } + testcase.JobInfo.Job = testcase.Job + testcase.JobInfo.Job.Spec.Plugins = jobPlugins + + for _, pod := range testcase.Pods { + _, err := fakeController.kubeClients.CoreV1().Pods(namespace).Create(pod) + if err != nil { + t.Error("Error While Creating pods") + } + } + + _, err := fakeController.vkClients.BatchV1alpha1().Jobs(namespace).Create(testcase.Job) + if err != nil { + t.Errorf("Expected no Error while creating job, but got error: %s", err) + } + + err = fakeController.cache.Add(testcase.Job) + if err != nil { + t.Error("Error While Adding Job in cache") + } + + err = fakeController.syncJob(testcase.JobInfo, nil) + if err != testcase.ExpextVal { + t.Errorf("Expected no error while syncing job, but got error: %s", err) + } + + podList, err := fakeController.kubeClients.CoreV1().Pods(namespace).List(metav1.ListOptions{}) + if err != nil { + t.Errorf("Expected no error while listing pods, but got error %s in case %d", err, i) + } + if testcase.TotalNumPods != len(podList.Items) { + t.Errorf("Expected Total number of pods to be same as podlist count: Expected: %d, Got: %d in case: %d", testcase.TotalNumPods, len(podList.Items), i) + } + } +} + +func TestCreateJobIOIfNotExistFunc(t *testing.T) { + namespace := "test" + + testcases := []struct { + Name string + Job *v1alpha1.Job + ExpextVal error + }{ + { + Name: "Create Job IO sucess case", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + Volumes: []v1alpha1.VolumeSpec{ + { + VolumeClaimName: "pvc1", + }, + }, + }, + }, + ExpextVal: nil, + }, + } + + for i, testcase := range testcases { + fakeController := newFakeController() + + err, job := fakeController.createJobIOIfNotExist(testcase.Job) + if err != testcase.ExpextVal { + t.Errorf("Expected Return value to be : %s, but got: %s in testcase %d", testcase.ExpextVal, err, i) + } + + if len(job.Spec.Volumes) == 0 { + t.Errorf("Expected number of volumes to be greater than 0 but got: %d in case: %d", len(job.Spec.Volumes), i) + } + } +} + +func TestCreatePVCFunc(t *testing.T) { + namespace := "test" + + testcases := []struct { + Name string + Job *v1alpha1.Job + VolumeClaim *v1.PersistentVolumeClaimSpec + ExpextVal error + }{ + { + Name: "CreatePVC Sucess Case", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + }, + VolumeClaim: &v1.PersistentVolumeClaimSpec{ + VolumeName: "vol1", + }, + ExpextVal: nil, + }, + } + + for _, testcase := range testcases { + fakeController := newFakeController() + + err := fakeController.createPVC(testcase.Job, "pvc1", testcase.VolumeClaim) + if err != testcase.ExpextVal { + t.Errorf("Expected return value to be equal to expected: %s, but got: %s", testcase.ExpextVal, err) + } + _, err = fakeController.kubeClients.CoreV1().PersistentVolumeClaims(namespace).Get("pvc1", metav1.GetOptions{}) + if err != nil { + t.Error("Expected PVC to get created, but not created") + } + } +} + +func TestCreatePodGroupIfNotExistFunc(t *testing.T) { + namespace := "test" + + testcases := []struct { + Name string + Job *v1alpha1.Job + ExpextVal error + }{ + { + Name: "CreatePodGroup Sucess Case", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: "job1", + }, + }, + ExpextVal: nil, + }, + } + + for _, testcase := range testcases { + fakeController := newFakeController() + + err := fakeController.createPodGroupIfNotExist(testcase.Job) + if err != testcase.ExpextVal { + t.Errorf("Expected return value to be equal to expected: %s, but got: %s", testcase.ExpextVal, err) + } + + _, err = fakeController.kbClients.SchedulingV1alpha1().PodGroups(namespace).Get(testcase.Job.Name, metav1.GetOptions{}) + if err != nil { + t.Error("Expected PodGroup to get created, but not created") + } + } +} + +func TestDeleteJobPod(t *testing.T) { + namespace := "test" + + testcases := []struct { + Name string + Job *v1alpha1.Job + Pods map[string]*v1.Pod + DeletePod *v1.Pod + ExpextVal error + }{ + { + Name: "DeleteJobPod sucess case", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + }, + Pods: map[string]*v1.Pod{ + "job1-task1-0": buildPod(namespace, "job1-task1-0", v1.PodRunning, nil), + "job1-task1-1": buildPod(namespace, "job1-task1-1", v1.PodRunning, nil), + }, + DeletePod: buildPod(namespace, "job1-task1-0", v1.PodRunning, nil), + ExpextVal: nil, + }, + } + + for _, testcase := range testcases { + fakeController := newFakeController() + + for _, pod := range testcase.Pods { + _, err := fakeController.kubeClients.CoreV1().Pods(namespace).Create(pod) + if err != nil { + t.Error("Expected error not to occur") + } + } + + err := fakeController.deleteJobPod(testcase.Job.Name, testcase.DeletePod) + if err != testcase.ExpextVal { + t.Errorf("Expected return value to be equal to expected: %s, but got: %s", testcase.ExpextVal, err) + } + + _, err = fakeController.kubeClients.CoreV1().Pods(namespace).Get("job1-task1-0", metav1.GetOptions{}) + if err == nil { + t.Error("Expected Pod to be deleted but not deleted") + } + } +} From 10b906908a0165a2c22052e1ffe5067532caf0df Mon Sep 17 00:00:00 2001 From: nikita15p Date: Tue, 18 Jun 2019 16:41:51 +0530 Subject: [PATCH 03/26] Lint fix for admission package --- hack/.golint_failures | 1 - pkg/admission/admission_controller.go | 16 +++++++++++++--- pkg/admission/admit_job.go | 2 +- pkg/admission/admit_job_test.go | 4 ++-- pkg/admission/mutate_job.go | 3 ++- 5 files changed, 18 insertions(+), 8 deletions(-) diff --git a/hack/.golint_failures b/hack/.golint_failures index 7ea2ff67b7..dd46196f93 100644 --- a/hack/.golint_failures +++ b/hack/.golint_failures @@ -1,4 +1,3 @@ -volcano.sh/volcano/pkg/admission volcano.sh/volcano/pkg/controllers/apis volcano.sh/volcano/pkg/controllers/cache volcano.sh/volcano/pkg/controllers/job diff --git a/pkg/admission/admission_controller.go b/pkg/admission/admission_controller.go index 65c7c4aa40..2bcc7c3c6b 100644 --- a/pkg/admission/admission_controller.go +++ b/pkg/admission/admission_controller.go @@ -34,15 +34,23 @@ import ( ) const ( - AdmitJobPath = "/jobs" + //AdmitJobPath is the pattern for the jobs admission + AdmitJobPath = "/jobs" + //MutateJobPath is the pattern for the mutating jobs MutateJobPath = "/mutating-jobs" - PVCInputName = "volcano.sh/job-input" + //PVCInputName stores the input name of PVC + PVCInputName = "volcano.sh/job-input" + //PVCOutputName stores the output name of PVC PVCOutputName = "volcano.sh/job-output" ) +//The AdmitFunc returns response type AdmitFunc func(v1beta1.AdmissionReview) *v1beta1.AdmissionResponse var scheme = runtime.NewScheme() + +//Codecs is for retrieving serializers for the supported wire formats +//and conversion wrappers to define preferred internal and external versions. var Codecs = serializer.NewCodecFactory(scheme) // policyEventMap defines all policy events and whether to allow external use @@ -75,6 +83,7 @@ func addToScheme(scheme *runtime.Scheme) { admissionregistrationv1beta1.AddToScheme(scheme) } +//ToAdmissionResponse updates the admission response with the input error func ToAdmissionResponse(err error) *v1beta1.AdmissionResponse { glog.Error(err) return &v1beta1.AdmissionResponse{ @@ -84,6 +93,7 @@ func ToAdmissionResponse(err error) *v1beta1.AdmissionResponse { } } +//DecodeJob decodes the job using deserializer from the raw object func DecodeJob(object runtime.RawExtension, resource metav1.GroupVersionResource) (v1alpha1.Job, error) { jobResource := metav1.GroupVersionResource{Group: v1alpha1.SchemeGroupVersion.Group, Version: v1alpha1.SchemeGroupVersion.Version, Resource: "jobs"} raw := object.Raw @@ -178,7 +188,7 @@ func getValidActions() []v1alpha1.Action { return actions } -// validate IO configuration +// ValidateIO validate IO configuration func ValidateIO(volumes []v1alpha1.VolumeSpec) (string, bool) { volumeMap := map[string]bool{} for _, volume := range volumes { diff --git a/pkg/admission/admit_job.go b/pkg/admission/admit_job.go index 819cebfa95..6a011b72a3 100644 --- a/pkg/admission/admit_job.go +++ b/pkg/admission/admit_job.go @@ -39,7 +39,7 @@ import ( //KubeBatchClientSet is kube-batch clientset var KubeBatchClientSet versioned.Interface -// job admit. +// AdmitJobs is to admit jobs and return response func AdmitJobs(ar v1beta1.AdmissionReview) *v1beta1.AdmissionResponse { glog.V(3).Infof("admitting jobs -- %s", ar.Request.Operation) diff --git a/pkg/admission/admit_job_test.go b/pkg/admission/admit_job_test.go index 43f509c4bb..8b9a1f8aef 100644 --- a/pkg/admission/admit_job_test.go +++ b/pkg/admission/admit_job_test.go @@ -33,7 +33,7 @@ import ( func TestValidateExecution(t *testing.T) { namespace := "test" - var invTtl int32 = -1 + var invTTL int32 = -1 testCases := []struct { Name string @@ -281,7 +281,7 @@ func TestValidateExecution(t *testing.T) { }, }, }, - TTLSecondsAfterFinished: &invTtl, + TTLSecondsAfterFinished: &invTTL, }, }, reviewResponse: v1beta1.AdmissionResponse{Allowed: true}, diff --git a/pkg/admission/mutate_job.go b/pkg/admission/mutate_job.go index 637761f133..e269c20fba 100644 --- a/pkg/admission/mutate_job.go +++ b/pkg/admission/mutate_job.go @@ -29,6 +29,7 @@ import ( ) const ( + //DefaultQueue constant stores the name of the queue as "default" DefaultQueue = "default" ) @@ -38,7 +39,7 @@ type patchOperation struct { Value interface{} `json:"value,omitempty"` } -// mutate job. +// MutateJobs mutate jobs func MutateJobs(ar v1beta1.AdmissionReview) *v1beta1.AdmissionResponse { glog.V(3).Infof("mutating jobs") From d63c817e46c20ae4967a1fff808fbcff37134dd3 Mon Sep 17 00:00:00 2001 From: Thandayuthapani Date: Tue, 18 Jun 2019 15:59:35 +0530 Subject: [PATCH 04/26] Update README --- README.md | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6417c26465..e7579ef63b 100644 --- a/README.md +++ b/README.md @@ -53,8 +53,26 @@ the open source community. ## Quick Start Guide -The easiest way to deploy Volcano is to use the Helm chart. +The easiest way to deploy Volcano is to use the Helm chart. Volcano can be deployed by cloning code and also by adding helm repo. +## Using Volcano Helm Repo + +Add helm repo using following command, + +``` +helm repo add volcano https://volcano-sh.github.io/charts +``` + +Install Volcano using following command, + +``` +helm install volcano/volcano --namespace --name + +For eg : +helm install volcano/volcano --namespace volcano-trial --name volcano-trial +``` + +## Cloning Code ### Pre-requisites First of all, clone the repo to your local path: From c17f58627b2b81159f57ec124aec13d5ee397a20 Mon Sep 17 00:00:00 2001 From: Thandayuthapani Date: Tue, 18 Jun 2019 18:08:05 +0530 Subject: [PATCH 05/26] Fix review comments --- pkg/controllers/job/job_controller_actions_test.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pkg/controllers/job/job_controller_actions_test.go b/pkg/controllers/job/job_controller_actions_test.go index 042c6d3274..714c43c14b 100644 --- a/pkg/controllers/job/job_controller_actions_test.go +++ b/pkg/controllers/job/job_controller_actions_test.go @@ -44,7 +44,7 @@ func TestKillJobFunc(t *testing.T) { ExpextVal error }{ { - Name: "KillJob Sucess Case", + Name: "KillJob success Case", Job: &v1alpha1.Job{ ObjectMeta: metav1.ObjectMeta{ Name: "job1", @@ -173,7 +173,7 @@ func TestCreateJobFunc(t *testing.T) { ExpextVal error }{ { - Name: "CreateJob Sucess Case", + Name: "CreateJob success Case", Job: &v1alpha1.Job{ ObjectMeta: metav1.ObjectMeta{ Name: "job1", @@ -281,7 +281,7 @@ func TestSyncJobFunc(t *testing.T) { ExpextVal error }{ { - Name: "SyncJob Sucess Case", + Name: "SyncJob success Case", Job: &v1alpha1.Job{ ObjectMeta: metav1.ObjectMeta{ Name: "job1", @@ -388,7 +388,7 @@ func TestCreateJobIOIfNotExistFunc(t *testing.T) { ExpextVal error }{ { - Name: "Create Job IO sucess case", + Name: "Create Job IO success case", Job: &v1alpha1.Job{ ObjectMeta: metav1.ObjectMeta{ Name: "job1", @@ -430,7 +430,7 @@ func TestCreatePVCFunc(t *testing.T) { ExpextVal error }{ { - Name: "CreatePVC Sucess Case", + Name: "CreatePVC success Case", Job: &v1alpha1.Job{ ObjectMeta: metav1.ObjectMeta{ Name: "job1", @@ -467,7 +467,7 @@ func TestCreatePodGroupIfNotExistFunc(t *testing.T) { ExpextVal error }{ { - Name: "CreatePodGroup Sucess Case", + Name: "CreatePodGroup success Case", Job: &v1alpha1.Job{ ObjectMeta: metav1.ObjectMeta{ Namespace: namespace, @@ -504,7 +504,7 @@ func TestDeleteJobPod(t *testing.T) { ExpextVal error }{ { - Name: "DeleteJobPod sucess case", + Name: "DeleteJobPod success case", Job: &v1alpha1.Job{ ObjectMeta: metav1.ObjectMeta{ Name: "job1", From c05f3a6709f4289bf418a5f64ef6bec8cd782b79 Mon Sep 17 00:00:00 2001 From: Rajadeepan D Ramesh Date: Tue, 18 Jun 2019 20:07:27 +0530 Subject: [PATCH 06/26] Adding Events for Action --- pkg/apis/batch/v1alpha1/job.go | 2 ++ pkg/controllers/job/job_controller.go | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/pkg/apis/batch/v1alpha1/job.go b/pkg/apis/batch/v1alpha1/job.go index e304ad195d..7ddf67aac0 100644 --- a/pkg/apis/batch/v1alpha1/job.go +++ b/pkg/apis/batch/v1alpha1/job.go @@ -112,6 +112,8 @@ const ( PVCError JobEvent = "PVCError" // PodGroupError pod grp error event is generated if error happens during pod grp creation PodGroupError JobEvent = "PodGroupError" + //ExecuteAction action issued event for each action + ExecuteAction JobEvent = "ExecuteAction" ) // Event represent the phase of Job, e.g. pod-failed. diff --git a/pkg/controllers/job/job_controller.go b/pkg/controllers/job/job_controller.go index 8f6eb06dea..c82c875355 100644 --- a/pkg/controllers/job/job_controller.go +++ b/pkg/controllers/job/job_controller.go @@ -17,6 +17,8 @@ limitations under the License. package job import ( + "fmt" + "github.com/golang/glog" "k8s.io/api/core/v1" @@ -38,6 +40,7 @@ import ( kbinfo "github.com/kubernetes-sigs/kube-batch/pkg/client/informers/externalversions/scheduling/v1alpha1" kblister "github.com/kubernetes-sigs/kube-batch/pkg/client/listers/scheduling/v1alpha1" + vkbatchv1 "volcano.sh/volcano/pkg/apis/batch/v1alpha1" vkver "volcano.sh/volcano/pkg/client/clientset/versioned" vkscheme "volcano.sh/volcano/pkg/client/clientset/versioned/scheme" vkinfoext "volcano.sh/volcano/pkg/client/informers/externalversions" @@ -239,6 +242,11 @@ func (cc *Controller) processNextReq() bool { glog.V(3).Infof("Execute <%v> on Job <%s/%s> in <%s> by <%T>.", action, req.Namespace, req.JobName, jobInfo.Job.Status.State.Phase, st) + if action != vkbatchv1.SyncJobAction { + cc.recordJobEvent(jobInfo.Job.Namespace, jobInfo.Job.Name, vkbatchv1.ExecuteAction, fmt.Sprintf( + "Start to execute action %s ", action)) + } + if err := st.Execute(action); err != nil { glog.Errorf("Failed to handle Job <%s/%s>: %v", jobInfo.Job.Namespace, jobInfo.Job.Name, err) From 85f27989bfecb967977ae0ed2f187eed98623f00 Mon Sep 17 00:00:00 2001 From: TommyLike Date: Wed, 19 Jun 2019 16:07:55 +0800 Subject: [PATCH 07/26] Cleanup codes --- .cid/volcano.yml | 30 - hack/dind-cluster-v1.12.sh | 2389 ----------------- pkg/admission/admission_controller.go | 2 - .../garbagecollector/garbagecollector.go | 2 +- pkg/controllers/job/job_controller_util.go | 12 - pkg/controllers/job/plugins/env/types.go | 3 - pkg/controllers/job/state/inqueue.go | 1 - 7 files changed, 1 insertion(+), 2438 deletions(-) delete mode 100644 .cid/volcano.yml delete mode 100755 hack/dind-cluster-v1.12.sh diff --git a/.cid/volcano.yml b/.cid/volcano.yml deleted file mode 100644 index 53f88ce37d..0000000000 --- a/.cid/volcano.yml +++ /dev/null @@ -1,30 +0,0 @@ -version: 1.0 -name: volcano -# 开发语言 -language: - - go - -# 构建工具 -dependencies: - base: - godk: 1.10.3 - helm: 2.6.1 - - -# 编译机类型 -machine: - standard: - euler: - - default - -variables: - TAG: 0.0.1 - -# 定义构建的命令行,通过定义的构建命令行可以构建出artifacts中定义的构建产物。 -scripts: -- set -ex; export GOPATH=`pwd`; find ./installer/dockerfile/ -maxdepth 2 -name Dockerfile -exec sed -i 's#FROM .*$#FROM registry.huawei.com/huawei-paas/euleros:2.2.5#' {} \; ; mkdir -p fuxi-prepare; mkdir -p fuxi-prepare; rsync -ac --exclude fuxi-prepare/ ./ fuxi-prepare; mkdir -p src/volcano.sh; mv fuxi-prepare/ src/volcano.sh/volcano; cd src/volcano.sh/volcano; make package -e TAG=${TAG} # 请修改成业务使用的构建脚本文件相对路径 - -# 定义构建产物,在编译完成后根据此配置项上传构建产物。 -artifacts: - cdk: - - _output/Volcano-package-.*.tgz \ No newline at end of file diff --git a/hack/dind-cluster-v1.12.sh b/hack/dind-cluster-v1.12.sh deleted file mode 100755 index 1003e6722f..0000000000 --- a/hack/dind-cluster-v1.12.sh +++ /dev/null @@ -1,2389 +0,0 @@ -#!/bin/bash -# Copyright 2018 Mirantis -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -o errexit -set -o nounset -set -o pipefail -set -o errtrace - -if [ $(uname) = Darwin ]; then - readlinkf(){ perl -MCwd -e 'print Cwd::abs_path shift' "$1";} -else - readlinkf(){ readlink -f "$1"; } -fi -DIND_ROOT="$(cd $(dirname "$(readlinkf "${BASH_SOURCE}")"); pwd)" - -RUN_ON_BTRFS_ANYWAY="${RUN_ON_BTRFS_ANYWAY:-}" -if [[ ! ${RUN_ON_BTRFS_ANYWAY} ]] && docker info| grep -q '^Storage Driver: btrfs'; then - echo "ERROR: Docker is using btrfs storage driver which is unsupported by kubeadm-dind-cluster" >&2 - echo "Please refer to the documentation for more info." >&2 - echo "Set RUN_ON_BTRFS_ANYWAY to non-empty string to continue anyway." >&2 - exit 1 -fi - -# In case of linuxkit / moby linux, -v will not work so we can't -# mount /lib/modules and /boot. Also we'll be using localhost -# to access the apiserver. -using_linuxkit= -if ! docker info|grep -s '^Operating System: .*Docker for Windows' > /dev/null 2>&1 ; then - if docker info|grep -s '^Kernel Version: .*-moby$' >/dev/null 2>&1 || - docker info|grep -s '^Kernel Version: .*-linuxkit' > /dev/null 2>&1 ; then - using_linuxkit=1 - fi -fi - -# Determine when using Linux and docker daemon running locally -using_local_linuxdocker= -if [[ $(uname) == Linux && -z ${DOCKER_HOST:-} ]]; then - using_local_linuxdocker=1 -fi - -EMBEDDED_CONFIG=y;DOWNLOAD_KUBECTL=y;DIND_K8S_VERSION=v1.12;DIND_IMAGE_DIGEST=sha256:a6534a4425b0427c32f420a2d38c09964327c5515da4f5620f82876507cc8afd;DIND_COMMIT=596f7d093470c1dc3a3e4466bcdfb34438a99b90 - -# dind::localhost provides the local host IP based on the address family used for service subnet. -function dind::localhost() { - if [[ ${SERVICE_NET_MODE} = "ipv6" ]]; then - echo '[::1]' - else - echo '127.0.0.1' - fi -} - -# dind::family-for indicates whether the CIDR or IP is for an IPv6 or IPv4 family. -function dind::family-for { - local addr=$1 - if [[ "$addr" = *":"* ]]; then - echo "ipv6" - else - echo "ipv4" - fi -} - -# dind::cluster-suffix builds a suffix used for resources, based on the DIND_LABEL. -function dind::cluster-suffix { - if [ "$DIND_LABEL" != "$DEFAULT_DIND_LABEL" ]; then - echo "-${DIND_LABEL}" - else - echo '' - fi -} - -function dind::net-name { - echo "kubeadm-dind-net$( dind::cluster-suffix )" -} - -# dind::add-cluster will inject the cluster ID to the IP address. For IPv4, it is -# assumed that the IP is a /24 with the third part of the address available for cluster ID. -# For IPv6, it is assumed that there is enough space for the cluster to be added, and the -# cluster ID will be added to the 16 bits before the double colon. For example: -# -# 10.192.0.0/24 => 10.192.5.0/24 -# fd00:77:20::/64 => fd00:77:20:5::/64 -# -# This function is intended to be used for management networks. -# -# TODO: Validate that there is enough space for cluster ID. -# TODO: For IPv6 could get fancy and handle case where cluster ID is placed in upper 8 bits of hextet -# TODO: Consider if want to do /16 for IPv4 management subnet. -# -function dind::add-cluster { - local cidr=$1 - local ip_mode=$2 - - if [[ ${ip_mode} = "ipv4" ]]; then - echo ${cidr} | sed "s/^\([0-9]*\.[0-9]*\.\).*\/24$/\1${CLUSTER_ID}.0\/24/" - else # IPv6 - echo ${cidr} | sed "s/^\(.*\)\(\:\:\/[0-9]*\)$/\1:${CLUSTER_ID}\2/" - fi -} - -# dind::get-and-validate-cidrs takes a list of CIDRs and validates them based on the ip -# mode, returning them. For IPv4 only and IPv6 only modes, only one CIDR is expected. For -# dual stack, two CIDRS are expected. It verifies that the CIDRs are the right family and -# will use the provided defaults, when CIDRs are missing. For dual-stack, the IPv4 address -# will be first. -# -# For the management network, the cluster ID will be injected into the CIDR. Also, if no -# MGMT_CIDRS value is specified, but the legacy DIND_SUBNET/DIND_SUBNET_SIZE is provided, -# that will be used for the (first) CIDR. -# -# NOTE: It is expected that the CIDR size is /24 for IPv4 management networks. -# -# For pod CIDRs, the size will be increased by 8, to leave room for the node ID to be -# injected into the address. -# -# NOTE: For IPv4, the pod size is expected to be /16 -> /24 in usage. -# -function dind::get-and-validate-cidrs { - IFS=', ' read -r -a cidrs <<< "$1" - IFS=', ' read -r -a defaults <<< "$2" - local is_mgmt=$3 - case ${IP_MODE} in - ipv4) - case ${#cidrs[@]} in - 0) - cidrs[0]="${defaults[0]}" - ;; - 1) - ;; - *) - echo "ERROR! More than one CIDR provided '$1'" - exit 1 - ;; - esac - if [[ $( dind::family-for "${cidrs[0]}" ) != "ipv4" ]]; then - echo "ERROR! CIDR must be IPv4 value" - exit 1 - fi - if [[ ${is_mgmt} = true ]]; then - cidrs[0]="$( dind::add-cluster "${cidrs[0]}" "${IP_MODE}" )" - fi - ;; - - ipv6) - case ${#cidrs[@]} in - 0) - cidrs[0]="${defaults[0]}" - ;; - 1) - ;; - *) - echo "ERROR! More than one CIDR provided '$1'" - exit 1 - ;; - esac - if [[ $( dind::family-for "${cidrs[0]}" ) != "ipv6" ]]; then - echo "ERROR! CIDR must be IPv6 value" - exit 1 - fi - if [[ ${is_mgmt} = true ]]; then - cidrs[0]="$( dind::add-cluster "${cidrs[0]}" "${IP_MODE}" )" - fi - ;; - - dual-stack) - case ${#cidrs[@]} in - 0) - cidrs[0]="${defaults[0]}" - cidrs[1]="${defaults[1]}" - ;; - 1) - if [[ $( dind::family-for "${cidrs[0]}" ) = "ipv6" ]]; then - cidrs[1]=${cidrs[0]} - cidrs[0]="${defaults[0]}" # Assuming first default is a V4 address - else - cidrs[1]="${defaults[1]}" - fi - ;; - 2) - # Force ordering to have V4 address first - if [[ $( dind::family-for "${cidrs[0]}" ) = "ipv6" ]]; then - local temp=${cidrs[0]} - cidrs[0]=${cidrs[1]} - cidrs[1]=${temp} - fi - ;; - *) - echo "ERROR! More than two CIDRs provided '$1'" - exit 1 - ;; - esac - local have_v4="" - local have_v6="" - for cidr in ${cidrs[@]}; do - if [[ $( dind::family-for "${cidr}" ) = "ipv6" ]]; then - have_v6=1 - else - have_v4=1 - fi - done - if [[ -z ${have_v4} ]]; then - echo "ERROR! Missing IPv4 CIDR in '$1'" - exit 1 - fi - if [[ -z ${have_v6} ]]; then - echo "ERROR! Missing IPv6 CIDR in '$1'" - exit 1 - fi - if [[ ${is_mgmt} = true ]]; then - cidrs[0]="$( dind::add-cluster "${cidrs[0]}" "${IP_MODE}" )" - cidrs[1]="$( dind::add-cluster "${cidrs[1]}" "${IP_MODE}" )" - fi - ;; - esac - echo "${cidrs[@]}" -} - -# dind::make-ip-from-cidr strips off the slash and size, and appends the -# interface part to the prefix to form an IP. For IPv4, it strips off the -# fourth part of the prefix, so that it can be replaced. It assumes that the -# resulting prefix will be of sufficient size. It also will use hex for the -# appended part for IPv6, and decimal for IPv4. -# -# fd00:20::/64 -> fd00:20::a -# 10.96.0.0/12 -> 10.96.0.10 -# -function dind::make-ip-from-cidr { - prefix="$(echo $1 | sed 's,/.*,,')" - if [[ $( dind::family-for ${prefix} ) == "ipv4" ]]; then - printf "%s%d" $( echo ${prefix} | sed 's/0$//' ) $2 - else - printf "%s%x" ${prefix} $2 - fi -} - -# dind::add-cluster-id-and-validate-nat64-prefix will modify the IPv4 mapping -# subnet prefix, by adding the cluster ID (default 0) to the second octet. -# It will produce an error, if the prefix is not in the 10.0.0.0/8 or -# 172.16.0.0/12 private networks. -function dind::add-cluster-id-and-validate-nat64-prefix { - local parts - IFS="." read -a parts <<<${NAT64_V4_SUBNET_PREFIX} - if [[ ${#parts[@]} -ne 2 ]]; then - echo "ERROR! NAT64_V4_SUBNET_PREFIX must be two octets (have '${NAT64_V4_SUBNET_PREFIX}')" - exit 1 - fi - (( parts[1]+=${CLUSTER_ID} )) - NAT64_V4_SUBNET_PREFIX="${parts[0]}.${parts[1]}" - echo "Added cluster ID offset (${CLUSTER_ID}) to NAT64_V4_SUBNET_PREFIX giving prefix '${NAT64_V4_SUBNET_PREFIX}'" - if [[ ${parts[0]} -eq 10 ]]; then - if [[ ${parts[1]} > 253 ]]; then - echo "ERROR! NAT64_V4_SUBNET_PREFIX is too large for 10.0.0.0/8 private net" - exit 1 - fi - elif [[ ${parts[0]} -eq 172 ]]; then - if [[ ${parts[1]} -lt 16 || ${parts[1]} -gt 31 ]]; then - echo "ERROR! NAT64_V4_SUBNET_PREFIX is outside of range for 172.16.0.0/12 private net" - exit 1 - fi - else - echo "ERROR! NAT64_V4_SUBNET_PREFIX is not in 10.0.0.0/8 or 172.16.0.0/12 private networks" - exit 1 - fi - echo "Using NAT64 V4 mapping network prefix: ${NAT64_V4_SUBNET_PREFIX}" -} - - -# START OF PROCESSING... - -IP_MODE="${IP_MODE:-ipv4}" # ipv4, ipv6, dual-stack -# FUTURE: Once dual-stack support is released, check K8s version, and reject for older versions. -if [[ ! ${EMBEDDED_CONFIG:-} ]]; then - source "${DIND_ROOT}/config.sh" -fi - -# Multicluster support -# Users can specify a cluster ID number from 1..254, represented as a string. -# This will be used to form resource names "cluster-#", and will be used in the -# management subnet to give unique networks for each cluster. If the cluster ID -# is not specified, or zero, it will be considered a single cluster or the first -# in the multi-cluster. This is the recommended usage. -# -# For legacy support, the user can specify DIND_LABEL, which will be used in the -# resource names. If a cluster ID is specified (a hybrid case, where people are -# using the new method, but want custom names), the resourse name will have the -# suffix "-#" with the cluster ID. If no cluster ID is specified (for backward -# compatibility), then the resource name will be just the DIND_LABEL, and a pseudo- -# random number from 1..13 will be generated for the cluster ID to be used in -# management network. The range is limited, because, in IPv6 mode, the cluster ID -# is used in the NAT64 V4 subnet prefix, which must be in a private network. -# The default is 172.18, so the cluster ID cannot be larger than 13 to guarantee -# a valid value. -# -# To get around that limitation, you can set the cluster ID, in addition to the -# DIND_LABEL, and optionally, change the NAT64_V4_SUBNET_PREFIX value. -# -DEFAULT_DIND_LABEL='mirantis.kubeadm_dind_cluster_runtime' -if [[ -z ${DIND_LABEL+x} ]]; then # No legacy DIND_LABEL set - if [[ -z ${CLUSTER_ID+x} ]]; then # No cluster ID set - DIND_LABEL=${DEFAULT_DIND_LABEL} # Single cluster mode - CLUSTER_ID="0" - else # Have cluster ID - if [[ ${CLUSTER_ID} = "0" ]]; then - DIND_LABEL=${DEFAULT_DIND_LABEL} # Single cluster mode or first cluster of multi-cluster - else - DIND_LABEL="cluster-${CLUSTER_ID}" # Multi-cluster - fi - fi -else # Legacy DIND_LABEL set for multi-cluster - if [[ -z ${CLUSTER_ID+x} ]]; then # No cluster ID set, make one from 1..13, but don't use in resource names - CLUSTER_ID="$(( ($RANDOM % 12) + 1 ))" - else - if [[ ${CLUSTER_ID} = "0" ]]; then - CLUSTER_ID="$(( ($RANDOM % 12) + 1 ))" # Force a pseudo-random cluster for additional legacy cluster - else - DIND_LABEL="${DIND_LABEL}-${CLUSTER_ID}" - fi - fi -fi - -CNI_PLUGIN="${CNI_PLUGIN:-bridge}" -GCE_HOSTED="${GCE_HOSTED:-}" -DIND_ALLOW_AAAA_USE="${DIND_ALLOW_AAAA_USE:-}" # Default is to use DNS64 always for IPv6 mode -KUBE_ROUTER_VERSION="${KUBE_ROUTER_VERSION:-v0.2.0}" - -# Use legacy DIND_SUBNET/DIND_SUBNET_SIZE, only if MGMT_CIDRS is not set. -legacy_mgmt_cidr="" -if [[ ${DIND_SUBNET:-} && ${DIND_SUBNET_SIZE:-} ]]; then - legacy_mgmt_cidr="${DIND_SUBNET}/${DIND_SUBNET_SIZE}" -fi - -if [[ ${IP_MODE} = "dual-stack" ]]; then - mgmt_net_defaults="10.192.0.0/24, fd00:20::/64" - - KUBE_RSYNC_ADDR="${KUBE_RSYNC_ADDR:-::1}" - SERVICE_CIDR="${SERVICE_CIDR:-fd00:30::/110}" # Will default to IPv6 service net family - - pod_net_defaults="10.244.0.0/16, fd00:40::/72" - - USE_HAIRPIN="${USE_HAIRPIN:-true}" # Default is to use hairpin for dual-stack - DIND_ALLOW_AAAA_USE=true # Forced, so can access external hosts via IPv6 - if [[ ${DIND_ALLOW_AAAA_USE} && ${GCE_HOSTED} ]]; then - echo "ERROR! GCE does not support use of IPv6 for external addresses - aborting." - exit 1 - fi -elif [[ ${IP_MODE} = "ipv6" ]]; then - mgmt_net_defaults="fd00:20::/64" - - KUBE_RSYNC_ADDR="${KUBE_RSYNC_ADDR:-::1}" - SERVICE_CIDR="${SERVICE_CIDR:-fd00:30::/110}" - - pod_net_defaults="fd00:40::/72" - - USE_HAIRPIN="${USE_HAIRPIN:-true}" # Default is to use hairpin for IPv6 - if [[ ${DIND_ALLOW_AAAA_USE} && ${GCE_HOSTED} ]]; then - echo "ERROR! GCE does not support use of IPv6 for external addresses - aborting." - exit 1 - fi -else # IPv4 mode - mgmt_net_defaults="10.192.0.0/24" - - KUBE_RSYNC_ADDR="${KUBE_RSYNC_ADDR:-127.0.0.1}" - SERVICE_CIDR="${SERVICE_CIDR:-10.96.0.0/12}" - - pod_net_defaults="10.244.0.0/16" - - USE_HAIRPIN="${USE_HAIRPIN:-false}" # Disabled for IPv4, as issue with Virtlet networking - if [[ ${DIND_ALLOW_AAAA_USE} ]]; then - echo "WARNING! The DIND_ALLOW_AAAA_USE option is for IPv6 mode - ignoring setting." - DIND_ALLOW_AAAA_USE= - fi - if [[ ${CNI_PLUGIN} = "calico" || ${CNI_PLUGIN} = "calico-kdd" ]]; then - pod_net_defaults="192.168.0.0/16" - fi -fi - -IFS=' ' read -r -a mgmt_net_cidrs <<<$( dind::get-and-validate-cidrs "${MGMT_CIDRS:-${legacy_mgmt_cidr}}" "${mgmt_net_defaults[@]}" true ) - -REMOTE_DNS64_V4SERVER="${REMOTE_DNS64_V4SERVER:-8.8.8.8}" -if [[ ${IP_MODE} == "ipv6" ]]; then - # Uses local DNS64 container - dns_server="$( dind::make-ip-from-cidr ${mgmt_net_cidrs[0]} 0x100 )" - DNS64_PREFIX="${DNS64_PREFIX:-fd00:10:64:ff9b::}" - DNS64_PREFIX_SIZE="${DNS64_PREFIX_SIZE:-96}" - DNS64_PREFIX_CIDR="${DNS64_PREFIX}/${DNS64_PREFIX_SIZE}" - - LOCAL_NAT64_SERVER="$( dind::make-ip-from-cidr ${mgmt_net_cidrs[0]} 0x200 )" - NAT64_V4_SUBNET_PREFIX="${NAT64_V4_SUBNET_PREFIX:-172.18}" - dind::add-cluster-id-and-validate-nat64-prefix -else - dns_server="${REMOTE_DNS64_V4SERVER}" -fi - -SERVICE_NET_MODE="$( dind::family-for ${SERVICE_CIDR} )" -DNS_SVC_IP="$( dind::make-ip-from-cidr ${SERVICE_CIDR} 10 )" - -ETCD_HOST="${ETCD_HOST:-$( dind::localhost )}" - -IFS=' ' read -r -a pod_net_cidrs <<<$( dind::get-and-validate-cidrs "${POD_NETWORK_CIDR:-}" "${pod_net_defaults[@]}" false ) - -declare -a pod_prefixes -declare -a pod_sizes -# Extract the prefix and size from the provided pod CIDR(s), based on the IP mode of each. The -# size will be increased by 8, to make room for the node ID to be added to the prefix later. -# Bridge and PTP plugins can process IPv4 and IPv6 pod CIDRs, other plugins must be IPv4 only. -for pod_cidr in "${pod_net_cidrs[@]}"; do - if [[ $( dind::family-for "${pod_cidr}" ) = "ipv4" ]]; then - actual_size=$( echo ${pod_cidr} | sed 's,.*/,,' ) - if [[ ${actual_size} -ne 16 ]]; then - echo "ERROR! For IPv4 CIDRs, the size must be /16. Have '${pod_cidr}'" - exit 1 - fi - pod_sizes+=( 24 ) - pod_prefixes+=( "$(echo ${pod_cidr} | sed 's/^\([0-9]*\.[0-9]*\.\).*/\1/')" ) - else # IPv6 - if [[ ${CNI_PLUGIN} != "bridge" && ${CNI_PLUGIN} != "ptp" ]]; then - echo "ERROR! IPv6 pod networks are only supported by bridge and PTP CNI plugins" - exit 1 - fi - # There are several cases to address. First, is normal split of prefix and size: - # fd00:10:20:30::/64 ---> fd00:10:20:30: /72 - # - # Second, is when the prefix needs to be padded, so that node ID can be added later: - # fd00:10::/64 ---> fd00:10:0:0: /72 - # - # Third, is when the low order part of the address, must be removed for the prefix, - # as the node ID will be placed in the lower byte: - # fd00:10:20:30:4000::/72 ---> fd00:10:20:30:40 /80 - # - # We will attempt to check for three error cases. One is when the address part is - # way too big for the size specified: - # fd00:10:20:30:40::/48 ---> fd00:10:20: /56 desired, but conflict with 30:40: - # - # Another is when the address part, once trimmed for the size, would loose info: - # fd00:10:20:1234::/56 ---> fd00:10:20:12 /64, but lost 34:, which conflicts - # - # Lastly, again, trimming would leave high byte in hextet, conflicting with - # the node ID: - # fd00:10:20:30:1200::/64 ---> fd00:10:20:30:12 /72, but 12 conflicts - # - # Note: later, the node ID will be appended to the prefix generated. - # - cluster_size="$(echo ${pod_cidr} | sed 's,.*::/,,')" - pod_sizes+=( $((${cluster_size}+8)) ) - - pod_prefix="$(echo ${pod_cidr} | sed 's,::/.*,:,')" - num_colons="$(grep -o ":" <<< "${pod_prefix}" | wc -l)" - need_zero_pads=$((${cluster_size}/16)) - - if [[ ${num_colons} -gt $((need_zero_pads + 1)) ]]; then - echo "ERROR! Address part of CIDR (${pod_prefix}) is too large for /${cluster_size}" - exit 1 - fi - if [[ ${num_colons} -gt ${need_zero_pads} ]]; then - # Will be replacing lowest byte with node ID, so pull off lower byte and colon - if [[ ${pod_prefix: -3} != "00:" ]]; then # last byte is not zero - echo "ERROR! Cannot trim address part of CIDR (${pod_prefix}) to fit in /${cluster_size}" - exit 1 - fi - pod_prefix=${pod_prefix::-3} - if [[ $(( ${cluster_size} % 16 )) -eq 0 && $( ${pod_prefix: -1} ) != ":" ]]; then # should not be upper byte for this size CIDR - echo "ERROR! Trimmed address part of CIDR (${pod_prefix}) is still too large for /${cluster_size}" - exit 1 - fi - fi - # Add in zeros to pad 16 bits at a time, up to the padding needed, which is - # need_zero_pads - num_colons. - while [ ${num_colons} -lt ${need_zero_pads} ]; do - pod_prefix+="0:" - ((num_colons++)) - done - pod_prefixes+=( "${pod_prefix}" ) - fi -done - -DIND_IMAGE_BASE="${DIND_IMAGE_BASE:-mirantis/kubeadm-dind-cluster}" -if [[ ${DIND_COMMIT:-} ]]; then - if [[ ${DIND_COMMIT} = current ]]; then - DIND_COMMIT="$(cd "${DIND_ROOT}"; git rev-parse HEAD)" - fi - DIND_K8S_VERSION="${DIND_K8S_VERSION:-v1.13}" - DIND_IMAGE="${DIND_IMAGE_BASE}:${DIND_COMMIT}-${DIND_K8S_VERSION}" -else - DIND_IMAGE="${DIND_IMAGE:-${DIND_IMAGE_BASE}:local}" -fi -if [[ ${DIND_IMAGE_DIGEST:-} ]]; then - DIND_IMAGE="${DIND_IMAGE}@${DIND_IMAGE_DIGEST}" -fi - -BUILD_KUBEADM="${BUILD_KUBEADM:-}" -BUILD_HYPERKUBE="${BUILD_HYPERKUBE:-}" -if [[ ! -z ${DIND_K8S_BIN_DIR:-} ]]; then - BUILD_KUBEADM="" - BUILD_HYPERKUBE="" -fi -KUBEADM_SOURCE="${KUBEADM_SOURCE-}" -HYPERKUBE_SOURCE="${HYPERKUBE_SOURCE-}" -NUM_NODES=${NUM_NODES:-2} -EXTRA_PORTS="${EXTRA_PORTS:-}" -KUBECTL_DIR="${KUBECTL_DIR:-${HOME}/.kubeadm-dind-cluster}" -DASHBOARD_URL="${DASHBOARD_URL:-https://rawgit.com/kubernetes/dashboard/bfab10151f012d1acc5dfb1979f3172e2400aa3c/src/deploy/kubernetes-dashboard.yaml}" -SKIP_SNAPSHOT="${SKIP_SNAPSHOT:-}" -E2E_REPORT_DIR="${E2E_REPORT_DIR:-}" -DIND_NO_PARALLEL_E2E="${DIND_NO_PARALLEL_E2E:-}" -DNS_SERVICE="${DNS_SERVICE:-coredns}" -DIND_STORAGE_DRIVER="${DIND_STORAGE_DRIVER:-overlay2}" - -DIND_CA_CERT_URL="${DIND_CA_CERT_URL:-}" -DIND_PROPAGATE_HTTP_PROXY="${DIND_PROPAGATE_HTTP_PROXY:-}" -DIND_HTTP_PROXY="${DIND_HTTP_PROXY:-}" -DIND_HTTPS_PROXY="${DIND_HTTPS_PROXY:-}" -DIND_NO_PROXY="${DIND_NO_PROXY:-}" - -DIND_DAEMON_JSON_FILE="${DIND_DAEMON_JSON_FILE:-/etc/docker/daemon.json}" # can be set to /dev/null -DIND_REGISTRY_MIRROR="${DIND_REGISTRY_MIRROR:-}" # plain string format -DIND_INSECURE_REGISTRIES="${DIND_INSECURE_REGISTRIES:-}" # json list format - -FEATURE_GATES="${FEATURE_GATES:-MountPropagation=true}" -# you can set special value 'none' not to set any kubelet's feature gates. -KUBELET_FEATURE_GATES="${KUBELET_FEATURE_GATES:-MountPropagation=true,DynamicKubeletConfig=true}" - -ENABLE_CEPH="${ENABLE_CEPH:-}" - -DIND_CRI="${DIND_CRI:-docker}" -case "${DIND_CRI}" in - docker) - CRI_SOCKET=/var/run/dockershim.sock - ;; - containerd) - CRI_SOCKET=/var/run/containerd/containerd.sock - ;; - *) - echo >&2 "Bad DIND_CRI. Please specify 'docker' or 'containerd'" - ;; -esac - -# TODO: Test multi-cluster for IPv6, before enabling -if [[ "${DIND_LABEL}" != "${DEFAULT_DIND_LABEL}" && "${IP_MODE}" == 'dual-stack' ]]; then - echo "Multiple parallel clusters currently not supported for dual-stack mode" >&2 - exit 1 -fi - -# not configurable for now, would need to setup context for kubectl _inside_ the cluster -readonly INTERNAL_APISERVER_PORT=8080 - -function dind::need-source { - if [[ ! -f cluster/kubectl.sh ]]; then - echo "$0 must be called from the Kubernetes repository root directory" 1>&2 - exit 1 - fi -} - -build_tools_dir="build" -use_k8s_source=y -if [[ ! ${BUILD_KUBEADM} && ! ${BUILD_HYPERKUBE} ]]; then - use_k8s_source= -fi -if [[ ${use_k8s_source} ]]; then - dind::need-source - kubectl=cluster/kubectl.sh - if [[ ! -f ${build_tools_dir}/common.sh ]]; then - build_tools_dir="build-tools" - fi -else - if [[ ! ${DOWNLOAD_KUBECTL:-} ]] && ! hash kubectl 2>/dev/null; then - echo "You need kubectl binary in your PATH to use prebuilt DIND image" 1>&2 - exit 1 - fi - kubectl=kubectl -fi - -function dind::retry { - # based on retry function in hack/jenkins/ scripts in k8s source - for i in {1..10}; do - "$@" && return 0 || sleep ${i} - done - "$@" -} - -busybox_image="busybox:1.26.2" -e2e_base_image="golang:1.10.5" -sys_volume_args=() -build_volume_args=() - -function dind::set-build-volume-args { - if [ ${#build_volume_args[@]} -gt 0 ]; then - return 0 - fi - build_container_name= - if [ -n "${KUBEADM_DIND_LOCAL:-}" ]; then - build_volume_args=(-v "$PWD:/go/src/k8s.io/kubernetes") - else - build_container_name="$(KUBE_ROOT=${PWD} ETCD_HOST=${ETCD_HOST} && - . ${build_tools_dir}/common.sh && - kube::build::verify_prereqs >&2 && - echo "${KUBE_DATA_CONTAINER_NAME:-${KUBE_BUILD_DATA_CONTAINER_NAME}}")" - build_volume_args=(--volumes-from "${build_container_name}") - fi -} - -function dind::volume-exists { - local name="$1" - if docker volume inspect "${name}" >& /dev/null; then - return 0 - fi - return 1 -} - -function dind::create-volume { - local name="$1" - docker volume create --label "${DIND_LABEL}" --name "${name}" >/dev/null -} - -# We mount /boot and /lib/modules into the container -# below to in case some of the workloads need them. -# This includes virtlet, for instance. Also this may be -# useful in future if we want DIND nodes to pass -# preflight checks. -# Unfortunately we can't do this when using Mac Docker -# (unless a remote docker daemon on Linux is used) -# NB: there's no /boot on recent Mac dockers -function dind::prepare-sys-mounts { - if [[ ! ${using_linuxkit} ]]; then - sys_volume_args=() - if [[ -d /boot ]]; then - sys_volume_args+=(-v /boot:/boot) - fi - if [[ -d /lib/modules ]]; then - sys_volume_args+=(-v /lib/modules:/lib/modules) - fi - return 0 - fi - local dind_sys_vol_name - dind_sys_vol_name="kubeadm-dind-sys$( dind::cluster-suffix )" - if ! dind::volume-exists "$dind_sys_vol_name"; then - dind::step "Saving a copy of docker host's /lib/modules" - dind::create-volume "$dind_sys_vol_name" - # Use a dirty nsenter trick to fool Docker on Mac and grab system - # /lib/modules into sys.tar file on kubeadm-dind-sys volume. - local nsenter="nsenter --mount=/proc/1/ns/mnt --" - docker run \ - --rm \ - --privileged \ - -v "$dind_sys_vol_name":/dest \ - --pid=host \ - "${busybox_image}" \ - /bin/sh -c \ - "if ${nsenter} test -d /lib/modules; then ${nsenter} tar -C / -c lib/modules >/dest/sys.tar; fi" - fi - sys_volume_args=(-v "$dind_sys_vol_name":/dind-sys) -} - -tmp_containers=() - -function dind::cleanup { - if [ ${#tmp_containers[@]} -gt 0 ]; then - for name in "${tmp_containers[@]}"; do - docker rm -vf "${name}" 2>/dev/null - done - fi -} - -trap dind::cleanup EXIT - -function dind::check-image { - local name="$1" - if docker inspect --format 'x' "${name}" >&/dev/null; then - return 0 - else - return 1 - fi -} - -function dind::filter-make-output { - # these messages make output too long and make Travis CI choke - egrep -v --line-buffered 'I[0-9][0-9][0-9][0-9] .*(parse|conversion|defaulter|deepcopy)\.go:[0-9]+\]' -} - -function dind::run-build-command { - # this is like build/run.sh, but it doesn't rsync back the binaries, - # only the generated files. - local cmd=("$@") - ( - # The following is taken from build/run.sh and build/common.sh - # of Kubernetes source tree. It differs in - # --filter='+ /_output/dockerized/bin/**' - # being removed from rsync - . ${build_tools_dir}/common.sh - kube::build::verify_prereqs - kube::build::build_image - kube::build::run_build_command "$@" - - kube::log::status "Syncing out of container" - - kube::build::start_rsyncd_container - - local rsync_extra="" - if (( ${KUBE_VERBOSE} >= 6 )); then - rsync_extra="-iv" - fi - - # The filter syntax for rsync is a little obscure. It filters on files and - # directories. If you don't go in to a directory you won't find any files - # there. Rules are evaluated in order. The last two rules are a little - # magic. '+ */' says to go in to every directory and '- /**' says to ignore - # any file or directory that isn't already specifically allowed. - # - # We are looking to copy out all of the built binaries along with various - # generated files. - kube::build::rsync \ - --filter='- /vendor/' \ - --filter='- /_temp/' \ - --filter='+ zz_generated.*' \ - --filter='+ generated.proto' \ - --filter='+ *.pb.go' \ - --filter='+ types.go' \ - --filter='+ */' \ - --filter='- /**' \ - "rsync://k8s@${KUBE_RSYNC_ADDR}/k8s/" "${KUBE_ROOT}" - - kube::build::stop_rsyncd_container - ) -} - -function dind::make-for-linux { - local copy="$1" - shift - dind::step "Building binaries:" "$*" - if [ -n "${KUBEADM_DIND_LOCAL:-}" ]; then - dind::step "+ make WHAT=\"$*\"" - make WHAT="$*" 2>&1 | dind::filter-make-output - elif [ "${copy}" = "y" ]; then - dind::step "+ ${build_tools_dir}/run.sh make WHAT=\"$*\"" - "${build_tools_dir}/run.sh" make WHAT="$*" 2>&1 | dind::filter-make-output - else - dind::step "+ [using the build container] make WHAT=\"$*\"" - dind::run-build-command make WHAT="$*" 2>&1 | dind::filter-make-output - fi -} - -function dind::check-binary { - local filename="$1" - local dockerized="_output/dockerized/bin/linux/amd64/${filename}" - local plain="_output/local/bin/linux/amd64/${filename}" - dind::set-build-volume-args - # FIXME: don't hardcode amd64 arch - if [ -n "${KUBEADM_DIND_LOCAL:-${force_local:-}}" ]; then - if [ -f "${dockerized}" -o -f "${plain}" ]; then - return 0 - fi - elif docker run --rm "${build_volume_args[@]}" \ - "${busybox_image}" \ - test -f "/go/src/k8s.io/kubernetes/${dockerized}" >&/dev/null; then - return 0 - fi - return 1 -} - -function dind::ensure-downloaded-kubectl { - local kubectl_url - local kubectl_sha1 - local kubectl_sha1_linux - local kubectl_sha1_darwin - local kubectl_link - local kubectl_os - - if [[ ! ${DOWNLOAD_KUBECTL:-} ]]; then - return 0 - fi - - export PATH="${KUBECTL_DIR}:$PATH" - - eval "$(docker run --entrypoint /bin/bash --rm "${DIND_IMAGE}" -c "cat /dind-env")" - - if [ $(uname) = Darwin ]; then - kubectl_sha1="${KUBECTL_DARWIN_SHA1}" - kubectl_url="${KUBECTL_DARWIN_URL}" - else - kubectl_sha1="${KUBECTL_LINUX_SHA1}" - kubectl_url="${KUBECTL_LINUX_URL}" - fi - local link_target="kubectl-${KUBECTL_VERSION}" - local link_name="${KUBECTL_DIR}"/kubectl - if [[ -h "${link_name}" && "$(readlink "${link_name}")" = "${link_target}" ]]; then - return 0 - fi - - local path="${KUBECTL_DIR}/${link_target}" - if [[ ! -f "${path}" ]]; then - mkdir -p "${KUBECTL_DIR}" - curl -sSLo "${path}" "${kubectl_url}" - echo "${kubectl_sha1} ${path}" | sha1sum -c - chmod +x "${path}" - fi - - ln -fs "${link_target}" "${KUBECTL_DIR}/kubectl" -} - -function dind::ensure-kubectl { - if [[ ! ${use_k8s_source} ]]; then - # already checked on startup - dind::ensure-downloaded-kubectl - return 0 - fi - if [ $(uname) = Darwin ]; then - if [ ! -f _output/local/bin/darwin/amd64/kubectl ]; then - dind::step "Building kubectl" - dind::step "+ make WHAT=cmd/kubectl" - make WHAT=cmd/kubectl 2>&1 | dind::filter-make-output - fi - elif ! force_local=y dind::check-binary kubectl; then - dind::make-for-linux y cmd/kubectl - fi -} - -function dind::ensure-binaries { - local -a to_build=() - for name in "$@"; do - if ! dind::check-binary "$(basename "${name}")"; then - to_build+=("${name}") - fi - done - if [ "${#to_build[@]}" -gt 0 ]; then - dind::make-for-linux n "${to_build[@]}" - fi - return 0 -} - -# dind::ensure-network creates the management network for the cluster. For IPv4 -# only it will have the management network CIDR. For IPv6 only, it will have -# the IPv6 management network CIDR and the NAT64 V4 mapping network CIDR. For -# dual stack, it will have the IPv4 and IPv6 management CIDRs. Each of the -# management networks (not the NAT64 network) will have a gateway specified. -# -function dind::ensure-network { - if ! docker network inspect $(dind::net-name) >&/dev/null; then - local -a args - for cidr in "${mgmt_net_cidrs[@]}"; do - if [[ $( dind::family-for ${cidr} ) = "ipv6" ]]; then - args+=(--ipv6) - fi - args+=(--subnet="${cidr}") - local gw=$( dind::make-ip-from-cidr ${cidr} 1 ) - args+=(--gateway="${gw}") - done - if [[ ${IP_MODE} = "ipv6" ]]; then - # Need second network for NAT64 V4 mapping network - args+=(--subnet=${NAT64_V4_SUBNET_PREFIX}.0.0/16) - fi - docker network create ${args[@]} $(dind::net-name) >/dev/null - fi -} - -function dind::ensure-volume { - local reuse_volume= - if [[ $1 = -r ]]; then - reuse_volume=1 - shift - fi - local name="$1" - if dind::volume-exists "${name}"; then - if [[ ! ${reuse_volume} ]]; then - docker volume rm "${name}" >/dev/null - fi - fi - dind::create-volume "${name}" -} - -function dind::ensure-dns { - if [[ ${IP_MODE} = "ipv6" ]]; then - local dns64_name="bind9$( dind::cluster-suffix )" - if ! docker inspect ${dns64_name} >&/dev/null; then - local force_dns64_for="" - if [[ ! ${DIND_ALLOW_AAAA_USE} ]]; then - # Normally, if have an AAAA record, it is used. This clause tells - # bind9 to do ignore AAAA records for the specified networks - # and/or addresses and lookup A records and synthesize new AAAA - # records. In this case, we select "any" networks that have AAAA - # records meaning we ALWAYS use A records and do NAT64. - force_dns64_for="exclude { any; };" - fi - read -r -d '' bind9_conf </named.conf && named -c /named.conf -g -u named' >/dev/null - ipv4_addr="$(docker exec ${dns64_name} ip addr list eth0 | grep "inet" | awk '$1 == "inet" {print $2}')" - docker exec ${dns64_name} ip addr del ${ipv4_addr} dev eth0 - docker exec ${dns64_name} ip -6 route add ${DNS64_PREFIX_CIDR} via ${LOCAL_NAT64_SERVER} - fi - fi -} - -function dind::ensure-nat { - if [[ ${IP_MODE} = "ipv6" ]]; then - local nat64_name="tayga$( dind::cluster-suffix )" - if ! docker ps | grep ${nat64_name} >&/dev/null; then - docker run -d --name ${nat64_name} --hostname ${nat64_name} --net "$(dind::net-name)" --label "dind-support$( dind::cluster-suffix )" \ - --sysctl net.ipv6.conf.all.disable_ipv6=0 --sysctl net.ipv6.conf.all.forwarding=1 \ - --privileged=true --ip ${NAT64_V4_SUBNET_PREFIX}.0.200 --ip6 ${LOCAL_NAT64_SERVER} --dns ${REMOTE_DNS64_V4SERVER} --dns ${dns_server} \ - -e TAYGA_CONF_PREFIX=${DNS64_PREFIX_CIDR} -e TAYGA_CONF_IPV4_ADDR=${NAT64_V4_SUBNET_PREFIX}.0.200 \ - -e TAYGA_CONF_DYNAMIC_POOL=${NAT64_V4_SUBNET_PREFIX}.0.128/25 danehans/tayga:latest >/dev/null - # Need to check/create, as "clean" may remove route - local route="$(ip route | egrep "^${NAT64_V4_SUBNET_PREFIX}.0.128/25")" - if [[ -z "${route}" ]]; then - docker run --net=host --rm --privileged ${busybox_image} ip route add ${NAT64_V4_SUBNET_PREFIX}.0.128/25 via ${NAT64_V4_SUBNET_PREFIX}.0.200 - fi - fi - fi -} - -function dind::run { - local reuse_volume= - if [[ $1 = -r ]]; then - reuse_volume="-r" - shift - fi - local container_name="${1:-}" - local node_id=${2:-0} - local portforward="${3:-}" - if [[ $# -gt 3 ]]; then - shift 3 - else - shift $# - fi - - local -a opts=("$@") - local ip_mode="--ip" - for cidr in "${mgmt_net_cidrs[@]}"; do - if [[ $( dind::family-for ${cidr} ) = "ipv6" ]]; then - ip_mode="--ip6" - fi - opts+=("${ip_mode}" "$( dind::make-ip-from-cidr ${cidr} $((${node_id}+1)) )") - done - opts+=("$@") - - local -a args=("systemd.setenv=CNI_PLUGIN=${CNI_PLUGIN}") - args+=("systemd.setenv=IP_MODE=${IP_MODE}") - args+=("systemd.setenv=DIND_STORAGE_DRIVER=${DIND_STORAGE_DRIVER}") - args+=("systemd.setenv=DIND_CRI=${DIND_CRI}") - - if [[ ${IP_MODE} != "ipv4" ]]; then - opts+=(--sysctl net.ipv6.conf.all.disable_ipv6=0) - opts+=(--sysctl net.ipv6.conf.all.forwarding=1) - fi - - if [[ ${IP_MODE} = "ipv6" ]]; then - opts+=(--dns ${dns_server}) - args+=("systemd.setenv=DNS64_PREFIX_CIDR=${DNS64_PREFIX_CIDR}") - args+=("systemd.setenv=LOCAL_NAT64_SERVER=${LOCAL_NAT64_SERVER}") - fi - - declare -a pod_nets - local i=0 - if [[ ${IP_MODE} = "ipv4" || ${IP_MODE} = "dual-stack" ]]; then - pod_nets+=("${pod_prefixes[$i]}${node_id}") - i=$((i+1)) - fi - if [[ ${IP_MODE} = "ipv6" || ${IP_MODE} = "dual-stack" ]]; then - # For prefix, if node ID will be in the upper byte, push it over - if [[ $((${pod_sizes[$i]} % 16)) -ne 0 ]]; then - n_id=$(printf "%02x00\n" "${node_id}") - else - if [[ "${pod_prefixes[$i]: -1}" = ":" ]]; then - n_id=$(printf "%x\n" "${node_id}") - else - n_id=$(printf "%02x\n" "${node_id}") # In lower byte, so ensure two chars - fi - fi - pod_nets+=("${pod_prefixes[$i]}${n_id}") - fi - - args+=("systemd.setenv=POD_NET_PREFIX=\"${pod_nets[0]}\"") - args+=("systemd.setenv=POD_NET_SIZE=\"${pod_sizes[0]}\"") - args+=("systemd.setenv=POD_NET2_PREFIX=\"${pod_nets[1]:-}\"") - args+=("systemd.setenv=POD_NET2_SIZE=\"${pod_sizes[1]:-}\"") - args+=("systemd.setenv=SERVICE_NET_MODE=${SERVICE_NET_MODE}") - args+=("systemd.setenv=USE_HAIRPIN=${USE_HAIRPIN}") - args+=("systemd.setenv=DNS_SVC_IP=${DNS_SVC_IP}") - args+=("systemd.setenv=DNS_SERVICE=${DNS_SERVICE}") - if [[ ! "${container_name}" ]]; then - echo >&2 "Must specify container name" - exit 1 - fi - - # remove any previously created containers with the same name - docker rm -vf "${container_name}" >&/dev/null || true - - if [[ "${portforward}" ]]; then - IFS=';' read -ra array <<< "${portforward}" - for element in "${array[@]}"; do - opts+=(-p "${element}") - done - fi - - opts+=(${sys_volume_args[@]+"${sys_volume_args[@]}"}) - - dind::step "Starting DIND container:" "${container_name}" - - if [[ ! -z ${DIND_K8S_BIN_DIR:-} ]]; then - opts+=(-v ${DIND_K8S_BIN_DIR}:/k8s) - fi - if [[ ! ${using_linuxkit} ]]; then - opts+=(-v /boot:/boot -v /lib/modules:/lib/modules) - fi - - if [[ ${ENABLE_CEPH} ]]; then - opts+=(-v /dev:/dev - -v /sys/bus:/sys/bus - -v /var/run/docker.sock:/opt/outer-docker.sock) - fi - - local volume_name="kubeadm-dind-${container_name}" - dind::ensure-network - dind::ensure-volume ${reuse_volume} "${volume_name}" - dind::ensure-nat - dind::ensure-dns - - # TODO: create named volume for binaries and mount it to /k8s - # in case of the source build - - # Start the new container. - docker run \ - -e IP_MODE="${IP_MODE}" \ - -e KUBEADM_SOURCE="${KUBEADM_SOURCE}" \ - -e HYPERKUBE_SOURCE="${HYPERKUBE_SOURCE}" \ - -d --privileged \ - --net "$(dind::net-name)" \ - --name "${container_name}" \ - --hostname "${container_name}" \ - -l "${DIND_LABEL}" \ - -v "${volume_name}:/dind" \ - ${opts[@]+"${opts[@]}"} \ - "${DIND_IMAGE}" \ - ${args[@]+"${args[@]}"} -} - -function dind::kubeadm { - local container_id="$1" - shift - dind::step "Running kubeadm:" "$*" - status=0 - # See image/bare/wrapkubeadm. - # Capturing output is necessary to grab flags for 'kubeadm join' - local -a env=(-e KUBELET_FEATURE_GATES="${KUBELET_FEATURE_GATES}" - -e DIND_CRI="${DIND_CRI}") - if ! docker exec "${env[@]}" "${container_id}" /usr/local/bin/wrapkubeadm "$@" 2>&1 | tee /dev/fd/2; then - echo "*** kubeadm failed" >&2 - return 1 - fi - return ${status} -} - -# function dind::bare { -# local container_name="${1:-}" -# if [[ ! "${container_name}" ]]; then -# echo >&2 "Must specify container name" -# exit 1 -# fi -# shift -# run_opts=(${@+"$@"}) -# dind::run "${container_name}" -# } - -function dind::configure-kubectl { - dind::step "Setting cluster config" - local host="$(dind::localhost)" - if [[ -z "$using_local_linuxdocker" ]]; then - host="127.0.0.1" - fi - local context_name cluster_name - context_name="$(dind::context-name)" - cluster_name="$(dind::context-name)" - "${kubectl}" config set-cluster "$cluster_name" \ - --server="http://${host}:$(dind::apiserver-port)" \ - --insecure-skip-tls-verify=true - "${kubectl}" config set-context "$context_name" --cluster="$cluster_name" - if [[ ${DIND_LABEL} = ${DEFAULT_DIND_LABEL} ]]; then - # Single cluster mode - "${kubectl}" config use-context "$context_name" - fi -} - -force_make_binaries= -function dind::set-master-opts { - master_opts=() - if [[ ${BUILD_KUBEADM} || ${BUILD_HYPERKUBE} ]]; then - # share binaries pulled from the build container between nodes - local dind_k8s_bin_vol_name - dind_k8s_bin_vol_name="dind-k8s-binaries$(dind::cluster-suffix)" - dind::ensure-volume -r "${dind_k8s_bin_vol_name}" - dind::set-build-volume-args - master_opts+=("${build_volume_args[@]}" -v "${dind_k8s_bin_vol_name}:/k8s") - local -a bins - if [[ ${BUILD_KUBEADM} ]]; then - master_opts+=(-e KUBEADM_SOURCE=build://) - bins+=(cmd/kubeadm) - else - master_opts+=(-e ${KUBEADM_SOURCE}) - fi - if [[ ${BUILD_HYPERKUBE} ]]; then - master_opts+=(-e HYPERKUBE_SOURCE=build://) - bins+=(cmd/hyperkube) - fi - if [[ ${force_make_binaries} ]]; then - dind::make-for-linux n "${bins[@]}" - else - dind::ensure-binaries "${bins[@]}" - fi - fi - if [[ ${MASTER_EXTRA_OPTS:-} ]]; then - master_opts+=( ${MASTER_EXTRA_OPTS} ) - fi -} - -function dind::ensure-dashboard-clusterrolebinding { - local ctx - ctx="$(dind::context-name)" - # 'create' may cause etcd timeout, yet create the clusterrolebinding. - # So use 'apply' to actually create it - "${kubectl}" --context "$ctx" create clusterrolebinding add-on-cluster-admin \ - --clusterrole=cluster-admin \ - --serviceaccount=kube-system:default \ - -o json --dry-run | - docker exec -i "$(dind::master-name)" jq '.apiVersion="rbac.authorization.k8s.io/v1beta1"|.kind|="ClusterRoleBinding"' | - "${kubectl}" --context "$ctx" apply -f - -} - -function dind::deploy-dashboard { - dind::step "Deploying k8s dashboard" - dind::retry "${kubectl}" --context "$(dind::context-name)" apply -f "${DASHBOARD_URL}" - # https://kubernetes-io-vnext-staging.netlify.com/docs/admin/authorization/rbac/#service-account-permissions - # Thanks @liggitt for the hint - dind::retry dind::ensure-dashboard-clusterrolebinding -} - -function dind::kubeadm-version { - if [[ ${use_k8s_source} ]]; then - (cluster/kubectl.sh version --short 2>/dev/null || true) | - grep Client | - sed 's/^.*: v\([0-9.]*\).*/\1/' - else - docker exec "$(dind::master-name)" \ - /bin/bash -c 'kubeadm version -o json | jq -r .clientVersion.gitVersion' | - sed 's/^v\([0-9.]*\).*/\1/' - fi -} - -function dind::kubeadm-version-at-least { - local major="${1}" - local minor="${2}" - if [[ ! ( $(dind::kubeadm-version) =~ ^([0-9]+)\.([0-9]+) ) ]]; then - echo >&2 "WARNING: can't parse kubeadm version: $(dind::kubeadm-version)" - return 1 - fi - local act_major="${BASH_REMATCH[1]}" - local act_minor="${BASH_REMATCH[2]}" - if [[ ${act_major} -gt ${major} ]]; then - return 0 - fi - if [[ ${act_major} -lt ${major} ]]; then - return 1 - fi - if [[ ${act_minor} -ge ${minor} ]]; then - return 0 - fi - return 1 -} - -function dind::verify-image-compatibility { - # We can't tell in advance, if the image selected supports dual-stack, - # but will do the next best thing, and check as soon as start up kube-master - local master_name=$1 - if [[ ${IP_MODE} = "dual-stack" ]]; then - local dual_stack_support="$(docker exec ${master_name} cat /node-info 2>/dev/null | grep "dual-stack-support" | wc -l)" - if [[ ${dual_stack_support} -eq 0 ]]; then - echo "ERROR! DinD image (${DIND_IMAGE}) does not support dual-stack mode - aborting!" - dind::remove-images "${DIND_LABEL}" - exit 1 - fi - fi -} - -function dind::check-dns-service-type { - if [[ ${DNS_SERVICE} = "kube-dns" ]] && dind::kubeadm-version-at-least 1 13; then - echo >&2 "WARNING: for 1.13+, only coredns can be used as the DNS service" - DNS_SERVICE="coredns" - fi -} - -function dind::init { - local -a opts - dind::set-master-opts - local local_host master_name container_id - master_name="$(dind::master-name)" - local_host="$( dind::localhost )" - container_id=$(dind::run "${master_name}" 1 "${local_host}:$(dind::apiserver-port):${INTERNAL_APISERVER_PORT}" ${master_opts[@]+"${master_opts[@]}"}) - - dind::verify-image-compatibility ${master_name} - - # FIXME: I tried using custom tokens with 'kubeadm ex token create' but join failed with: - # 'failed to parse response as JWS object [square/go-jose: compact JWS format must have three parts]' - # So we just pick the line from 'kubeadm init' output - # Using a template file in the image (based on version) to build a kubeadm.conf file and to customize - # it based on CNI plugin, IP mode, and environment settings. User can add additional - # customizations to template and then rebuild the image used (build/build-local.sh). - local pod_subnet_disable="# " - # TODO: May want to specify each of the plugins that require --pod-network-cidr - if [[ ${CNI_PLUGIN} != "bridge" && ${CNI_PLUGIN} != "ptp" ]]; then - pod_subnet_disable="" - fi - local bind_address="0.0.0.0" - if [[ ${SERVICE_NET_MODE} = "ipv6" ]]; then - bind_address="::" - fi - dind::proxy "$master_name" - dind::custom-docker-opts "$master_name" - - # HACK: Indicating mode, so that wrapkubeadm will not set a cluster CIDR for kube-proxy - # in IPv6 (only) mode. - if [[ ${SERVICE_NET_MODE} = "ipv6" ]]; then - docker exec --privileged -i "$master_name" touch /v6-mode - fi - - feature_gates="{CoreDNS: false}" - if [[ ${DNS_SERVICE} == "coredns" ]]; then - feature_gates="{CoreDNS: true}" - fi - - kubeadm_version="$(dind::kubeadm-version)" - case "${kubeadm_version}" in - 1\.9\.* | 1\.10\.*) - template="1.10" - ;; - 1\.11\.*) - template="1.11" - ;; - 1\.12\.*) - template="1.12" - ;; - *) # Includes 1.13 master branch - # Will make a separate template if/when it becomes incompatible - template="1.13" - # CoreDNS can no longer be switched off - feature_gates="{}" - ;; - esac - dind::check-dns-service-type - - component_feature_gates="" - if [ "${FEATURE_GATES}" != "none" ]; then - component_feature_gates="feature-gates: \\\"${FEATURE_GATES}\\\"" - fi - - apiserver_extra_args="" - for e in $(set -o posix ; set | grep -E "^APISERVER_[a-z_]+=" | cut -d'=' -f 1); do - opt_name=$(echo ${e#APISERVER_} | sed 's/_/-/g') - apiserver_extra_args+=" ${opt_name}: \\\"$(eval echo \$$e)\\\"\\n" - done - - controller_manager_extra_args="" - for e in $(set -o posix ; set | grep -E "^CONTROLLER_MANAGER_[a-z_]+=" | cut -d'=' -f 1); do - opt_name=$(echo ${e#CONTROLLER_MANAGER_} | sed 's/_/-/g') - controller_manager_extra_args+=" ${opt_name}: \\\"$(eval echo \$$e)\\\"\\n" - done - - scheduler_extra_args="" - for e in $(set -o posix ; set | grep -E "^SCHEDULER_[a-z_]+=" | cut -d'=' -f 1); do - opt_name=$(echo ${e#SCHEDULER_} | sed 's/_/-/g') - scheduler_extra_args+=" ${opt_name}: \\\"$(eval echo \$$e)\\\"\\n" - done - - local mgmt_cidr=${mgmt_net_cidrs[0]} - if [[ ${IP_MODE} = "dual-stack" && ${SERVICE_NET_MODE} = "ipv6" ]]; then - mgmt_cidr=${mgmt_net_cidrs[1]} - fi - local master_ip=$( dind::make-ip-from-cidr ${mgmt_cidr} 2 ) - docker exec -i "$master_name" bash < /etc/kubeadm.conf -EOF - init_args=(--config /etc/kubeadm.conf) - # required when building from source - if [[ ${BUILD_KUBEADM} || ${BUILD_HYPERKUBE} ]]; then - docker exec "$master_name" mount --make-shared /k8s - fi - kubeadm_join_flags="$(dind::kubeadm "${container_id}" init "${init_args[@]}" --ignore-preflight-errors=all "$@" | grep '^ *kubeadm join' | sed 's/^ *kubeadm join //')" - dind::configure-kubectl - dind::start-port-forwarder -} - -function dind::create-node-container { - local reuse_volume next_node_index node_name - reuse_volume='' - if [[ ${1:-} = -r ]]; then - reuse_volume="-r" - shift - fi - # if there's just one node currently, it's master, thus we need to use - # kube-node-1 hostname, if there are two nodes, we should pick - # kube-node-2 and so on - next_node_index=${1:-$(docker ps -q --filter=label="${DIND_LABEL}" | wc -l | sed 's/^ *//g')} - local -a opts - if [[ ${BUILD_KUBEADM} || ${BUILD_HYPERKUBE} ]]; then - opts+=(-v "dind-k8s-binaries$(dind::cluster-suffix)":/k8s) - if [[ ${BUILD_KUBEADM} ]]; then - opts+=(-e KUBEADM_SOURCE=build://) - fi - if [[ ${BUILD_HYPERKUBE} ]]; then - opts+=(-e HYPERKUBE_SOURCE=build://) - fi - fi - node_name="$(dind::node-name ${next_node_index})" - dind::run ${reuse_volume} "$node_name" $((next_node_index + 1)) "${EXTRA_PORTS}" ${opts[@]+"${opts[@]}"} -} - -function dind::join { - local container_id="$1" - shift - dind::proxy "${container_id}" - dind::custom-docker-opts "${container_id}" - local -a join_opts=(--ignore-preflight-errors=all - --cri-socket="${CRI_SOCKET}") - dind::kubeadm "${container_id}" join "${join_opts[@]}" "$@" >/dev/null -} - -function dind::escape-e2e-name { - sed 's/[]\$*.^()[]/\\&/g; s/\s\+/\\s+/g' <<< "$1" | tr -d '\n' -} - -function dind::accelerate-kube-dns { - if [[ ${DNS_SERVICE} == "kube-dns" ]]; then - dind::step "Patching kube-dns deployment to make it start faster" - # Could do this on the host, too, but we don't want to require jq here - # TODO: do this in wrapkubeadm - docker exec "$(dind::master-name)" /bin/bash -c \ - "kubectl get deployment kube-dns -n kube-system -o json | jq '.spec.template.spec.containers[0].readinessProbe.initialDelaySeconds = 3|.spec.template.spec.containers[0].readinessProbe.periodSeconds = 3' | kubectl apply --force -f -" - fi -} - -function dind::component-ready { - local label="$1" - local out - if ! out="$("${kubectl}" --context "$(dind::context-name)" get pod -l "${label}" -n kube-system \ - -o jsonpath='{ .items[*].status.conditions[?(@.type == "Ready")].status }' 2>/dev/null)"; then - return 1 - fi - if ! grep -v False <<<"${out}" | grep -q True; then - return 1 - fi - return 0 -} - -function dind::kill-failed-pods { - local pods ctx - ctx="$(dind::context-name)" - # workaround for https://github.com/kubernetes/kubernetes/issues/36482 - if ! pods="$(kubectl --context "$ctx" get pod -n kube-system -o jsonpath='{ .items[?(@.status.phase == "Failed")].metadata.name }' 2>/dev/null)"; then - return - fi - for name in ${pods}; do - kubectl --context "$ctx" delete pod --now -n kube-system "${name}" >&/dev/null || true - done -} - -function dind::create-static-routes { - echo "Creating static routes for bridge/PTP plugin" - for ((i=0; i <= NUM_NODES; i++)); do - if [[ ${i} -eq 0 ]]; then - node="$(dind::master-name)" - else - node="$(dind::node-name $i)" - fi - for ((j=0; j <= NUM_NODES; j++)); do - if [[ ${i} -eq ${j} ]]; then - continue - fi - if [[ ${j} -eq 0 ]]; then - dest_node="$(dind::master-name)" - else - dest_node="$(dind::node-name $j)" - fi - id=$((${j}+1)) - if [[ ${IP_MODE} = "ipv4" || ${IP_MODE} = "dual-stack" ]]; then - # Assuming pod subnets will all be /24 - dest="${pod_prefixes[0]}${id}.0/24" - gw=`docker exec ${dest_node} ip addr show eth0 | grep -w inet | awk '{ print $2 }' | sed 's,/.*,,'` - docker exec "${node}" ip route add "${dest}" via "${gw}" - fi - if [[ ${IP_MODE} = "ipv6" || ${IP_MODE} = "dual-stack" ]]; then - local position=0 - if [[ ${IP_MODE} = "dual-stack" ]]; then - position=1 - fi - instance=$(printf "%02x" ${id}) - if [[ $((${pod_sizes[$position]} % 16)) -ne 0 ]]; then - instance+="00" # Move node ID to upper byte - fi - dest="${pod_prefixes[$position]}${instance}::/${pod_sizes[$position]}" - gw=`docker exec ${dest_node} ip addr show eth0 | grep -w inet6 | grep -i global | head -1 | awk '{ print $2 }' | sed 's,/.*,,'` - docker exec "${node}" ip route add "${dest}" via "${gw}" - fi - done - done -} - -# If we are allowing AAAA record use, then provide SNAT for IPv6 packets from -# node containers, and forward packets to bridge used for $(dind::net-name). -# This gives pods access to external IPv6 sites, when using IPv6 addresses. -function dind::setup_external_access_on_host { - if [[ ! ${DIND_ALLOW_AAAA_USE} ]]; then - return - fi - local main_if=`ip route | grep default | awk '{print $5}'` - dind::ip6tables-on-hostnet -t nat -A POSTROUTING -o $main_if -j MASQUERADE - if [[ ${IP_MODE} = "dual-stack" ]]; then - return - fi - local bridge_if=`ip route | grep ${NAT64_V4_SUBNET_PREFIX}.0.0 | awk '{print $3}'` - if [[ -n "$bridge_if" ]]; then - dind::ip6tables-on-hostnet -A FORWARD -i $bridge_if -j ACCEPT - else - echo "WARNING! No $(dind::net-name) bridge with NAT64 - unable to setup forwarding/SNAT" - fi -} - -# Remove ip6tables rules for SNAT and forwarding, if they exist. -function dind::remove_external_access_on_host { - if [[ ! ${DIND_ALLOW_AAAA_USE} ]]; then - return - fi - local have_rule - local main_if="$(ip route | grep default | awk '{print $5}')" - have_rule="$(dind::ip6tables-on-hostnet -S -t nat | grep "\-o $main_if" || true)" - if [[ -n "$have_rule" ]]; then - dind::ip6tables-on-hostnet -t nat -D POSTROUTING -o $main_if -j MASQUERADE - else - echo "Skipping delete of ip6tables rule for SNAT, as rule non-existent" - fi - - if [[ ${IP_MODE} = "dual-stack" ]]; then - return - fi - local bridge_if="$(ip route | grep ${NAT64_V4_SUBNET_PREFIX}.0.0 | awk '{print $3}')" - if [[ -n "$bridge_if" ]]; then - have_rule="$(dind::ip6tables-on-hostnet -S | grep "\-i $bridge_if" || true)" - if [[ -n "$have_rule" ]]; then - dind::ip6tables-on-hostnet -D FORWARD -i $bridge_if -j ACCEPT - else - echo "Skipping delete of ip6tables rule for forwarding, as rule non-existent" - fi - else - echo "Skipping delete of ip6tables rule for forwarding, as no bridge interface using NAT64" - fi -} - -function dind::ip6tables-on-hostnet { - local mod_path='/lib/modules' - docker run -v "${mod_path}:${mod_path}" --entrypoint /sbin/ip6tables --net=host --rm --privileged "${DIND_IMAGE}" "$@" -} - -function dind::wait-for-ready { - local app="kube-proxy" - if [[ ${CNI_PLUGIN} = "kube-router" ]]; then - app=kube-router - fi - dind::step "Waiting for ${app} and the nodes" - local app_ready - local nodes_ready - local n=3 - local ntries=200 - local ctx - ctx="$(dind::context-name)" - while true; do - dind::kill-failed-pods - if "${kubectl}" --context "$ctx" get nodes 2>/dev/null | grep -q NotReady; then - nodes_ready= - else - nodes_ready=y - fi - if dind::component-ready k8s-app=${app}; then - app_ready=y - else - app_ready= - fi - if [[ ${nodes_ready} && ${app_ready} ]]; then - if ((--n == 0)); then - echo "[done]" >&2 - break - fi - else - n=3 - fi - if ((--ntries == 0)); then - echo "Error waiting for ${app} and the nodes" >&2 - exit 1 - fi - echo -n "." >&2 - sleep 1 - done - - dind::step "Bringing up ${DNS_SERVICE} and kubernetes-dashboard" - # on Travis 'scale' sometimes fails with 'error: Scaling the resource failed with: etcdserver: request timed out; Current resource version 442' here - dind::retry "${kubectl}" --context "$ctx" scale deployment --replicas=1 -n kube-system ${DNS_SERVICE} - dind::retry "${kubectl}" --context "$ctx" scale deployment --replicas=1 -n kube-system kubernetes-dashboard - - ntries=200 - while ! dind::component-ready k8s-app=kube-dns || ! dind::component-ready app=kubernetes-dashboard; do - if ((--ntries == 0)); then - echo "Error bringing up ${DNS_SERVICE} and kubernetes-dashboard" >&2 - exit 1 - fi - echo -n "." >&2 - dind::kill-failed-pods - sleep 1 - done - echo "[done]" >&2 - - dind::retry "${kubectl}" --context "$ctx" get nodes >&2 - - local local_host - local_host="$( dind::localhost )" - dind::step "Access dashboard at:" "http://${local_host}:$(dind::apiserver-port)/api/v1/namespaces/kube-system/services/kubernetes-dashboard:/proxy" -} - -# dind::make-kube-router-yaml creates a temp file with contents of the configuration needed for the kube-router CNI -# plugin at a specific version, instead of using the publically available file, which uses the latest version. This -# allows us to control the version used. If/when updating, be sure to update the KUBE_ROUTER_VERSION env variable -# ensure the YAML contents below, reflect the configuration in: -# -# https://raw.githubusercontent.com/cloudnativelabs/kube-router/master/daemonset/kubeadm-kuberouter-all-feature.yaml -# -# FUTURE: May be able to remove this, if/when kube-router "latest" is stable, and use the public YAML file instead. -function dind::make-kube-router-yaml { - tmp_yaml=$(mktemp /tmp/kube-router-yaml.XXXXXX) - cat >${tmp_yaml} <&2 "*** Failed to start node container ${n}" - exit 1 - else - node_containers+=(${container_id}) - dind::step "Node container started:" ${n} - fi - done - dind::fix-mounts - status=0 - local -a pids - for ((n=1; n <= NUM_NODES; n++)); do - ( - dind::step "Joining node:" ${n} - container_id="${node_containers[${n}-1]}" - if ! dind::join ${container_id} ${kubeadm_join_flags}; then - echo >&2 "*** Failed to start node container ${n}" - exit 1 - else - dind::step "Node joined:" ${n} - fi - )& - pids[${n}]=$! - done - if ((NUM_NODES > 0)); then - for pid in ${pids[*]}; do - wait ${pid} - done - else - # FIXME: this may fail depending on k8s/kubeadm version - # FIXME: check for taint & retry if it's there - "${kubectl}" --context "$ctx" taint nodes $(dind::master-name) node-role.kubernetes.io/master- || true - fi - case "${CNI_PLUGIN}" in - bridge | ptp) - dind::create-static-routes - dind::setup_external_access_on_host - ;; - flannel) - # without --validate=false this will fail on older k8s versions - dind::retry "${kubectl}" --context "$ctx" apply --validate=false -f "https://github.com/coreos/flannel/blob/master/Documentation/kube-flannel.yml?raw=true" - ;; - calico) - manifest_base=https://docs.projectcalico.org/${CALICO_VERSION:-v3.3}/getting-started/kubernetes/installation - dind::retry "${kubectl}" --context "$ctx" apply -f ${manifest_base}/hosted/etcd.yaml - if [ "${CALICO_VERSION:-v3.3}" != master ]; then - dind::retry "${kubectl}" --context "$ctx" apply -f ${manifest_base}/rbac.yaml - fi - dind::retry "${kubectl}" --context "$ctx" apply -f ${manifest_base}/hosted/calico.yaml - dind::retry "${kubectl}" --context "$ctx" apply -f ${manifest_base}/hosted/calicoctl.yaml - ;; - calico-kdd) - manifest_base=https://docs.projectcalico.org/${CALICO_VERSION:-v3.3}/getting-started/kubernetes/installation - dind::retry "${kubectl}" --context "$ctx" apply -f ${manifest_base}/hosted/rbac-kdd.yaml - dind::retry "${kubectl}" --context "$ctx" apply -f ${manifest_base}/hosted/kubernetes-datastore/calico-networking/1.7/calico.yaml - ;; - weave) - dind::retry "${kubectl}" --context "$ctx" apply -f "https://cloud.weave.works/k8s/net?k8s-version=$(${kubectl} --context "$ctx" version | base64 | tr -d '\n')" - ;; - kube-router) - kube_router_config="$( dind::make-kube-router-yaml )" - dind::retry "${kubectl}" --context "$ctx" apply -f ${kube_router_config} - rm "${kube_router_config}" - dind::retry "${kubectl}" --context "$ctx" -n kube-system delete ds kube-proxy - docker run --privileged --net=host k8s.gcr.io/kube-proxy-amd64:v1.10.2 kube-proxy --cleanup - ;; - *) - echo "Unsupported CNI plugin '${CNI_PLUGIN}'" >&2 - ;; - esac - dind::deploy-dashboard - dind::accelerate-kube-dns - if [[ (${CNI_PLUGIN} != "bridge" && ${CNI_PLUGIN} != "ptp") || ${SKIP_SNAPSHOT} ]]; then - # This is especially important in case of Calico - - # the cluster will not recover after snapshotting - # (at least not after restarting from the snapshot) - # if Calico installation is interrupted - dind::wait-for-ready - fi - dind::step "Cluster Info" - echo "Network Mode: ${IP_MODE}" - echo "Cluster context: $( dind::context-name )" - echo "Cluster ID: ${CLUSTER_ID}" - echo "Management CIDR(s): ${mgmt_net_cidrs[@]}" - echo "Service CIDR/mode: ${SERVICE_CIDR}/${SERVICE_NET_MODE}" - echo "Pod CIDR(s): ${pod_net_cidrs[@]}" -} - -function dind::fix-mounts { - local node_name - for ((n=0; n <= NUM_NODES; n++)); do - node_name="$(dind::master-name)" - if ((n > 0)); then - node_name="$(dind::node-name $n)" - fi - docker exec "${node_name}" mount --make-shared /run - if [[ ! ${using_linuxkit} ]]; then - docker exec "${node_name}" mount --make-shared /lib/modules/ - fi - # required when building from source - if [[ ${BUILD_KUBEADM} || ${BUILD_HYPERKUBE} ]]; then - docker exec "${node_name}" mount --make-shared /k8s - fi - # docker exec "${node_name}" mount --make-shared /sys/kernel/debug - done -} - -function dind::snapshot_container { - local container_name="$1" - # we must pass DIND_CRI here because in case of containerd - # a special care must be taken to stop the containers during - # the snapshot - docker exec -e DIND_CRI="${DIND_CRI}" -i ${container_name} \ - /usr/local/bin/snapshot prepare - # remove the hidden *plnk directories - docker diff ${container_name} | grep -v plnk | docker exec -i ${container_name} /usr/local/bin/snapshot save -} - -function dind::snapshot { - dind::step "Taking snapshot of the cluster" - dind::snapshot_container "$(dind::master-name)" - for ((n=1; n <= NUM_NODES; n++)); do - dind::snapshot_container "$(dind::node-name $n)" - done - dind::wait-for-ready -} - -restore_cmd=restore -function dind::restore_container { - local container_id="$1" - docker exec ${container_id} /usr/local/bin/snapshot "${restore_cmd}" -} - -function dind::restore { - local apiserver_port local_host pid pids - dind::down - dind::check-dns-service-type - dind::step "Restoring containers" - dind::set-master-opts - local_host="$( dind::localhost )" - apiserver_port="$( dind::apiserver-port )" - for ((n=0; n <= NUM_NODES; n++)); do - ( - if [[ n -eq 0 ]]; then - dind::step "Restoring master container" - dind::restore_container "$(dind::run -r "$(dind::master-name)" 1 "${local_host}:${apiserver_port}:${INTERNAL_APISERVER_PORT}" ${master_opts[@]+"${master_opts[@]}"})" - dind::verify-image-compatibility "$(dind::master-name)" - dind::step "Master container restored" - else - dind::step "Restoring node container:" ${n} - if ! container_id="$(dind::create-node-container -r ${n})"; then - echo >&2 "*** Failed to start node container ${n}" - exit 1 - else - dind::restore_container "${container_id}" - dind::step "Node container restored:" ${n} - fi - fi - )& - pids[${n}]=$! - done - for pid in ${pids[*]}; do - wait ${pid} - done - if [[ ${CNI_PLUGIN} = "bridge" || ${CNI_PLUGIN} = "ptp" ]]; then - dind::create-static-routes - dind::setup_external_access_on_host - fi - dind::fix-mounts - # Recheck kubectl config. It's possible that the cluster was started - # on this docker from different host - dind::configure-kubectl - dind::start-port-forwarder - dind::wait-for-ready -} - -function dind::down { - dind::remove-images "${DIND_LABEL}" - if [[ ${CNI_PLUGIN} = "bridge" || ${CNI_PLUGIN} = "ptp" ]]; then - dind::remove_external_access_on_host - elif [[ "${CNI_PLUGIN}" = "kube-router" ]]; then - if [[ ${COMMAND} = "down" || ${COMMAND} = "clean" ]]; then - # FUTURE: Updated pinned version, after verifying operation - docker run --privileged --net=host cloudnativelabs/kube-router:${KUBE_ROUTER_VERSION} --cleanup-config - fi - fi -} - -function dind::apiserver-port { - # APISERVER_PORT is explicitely set - if [ -n "${APISERVER_PORT:-}" ] - then - echo "$APISERVER_PORT" - return - fi - - # Get the port from the master - local master port - master="$(dind::master-name)" - # 8080/tcp -> 127.0.0.1:8082 => 8082 - port="$( docker port "$master" 2>/dev/null | awk -F: "/^${INTERNAL_APISERVER_PORT}/{ print \$NF }" )" - if [ -n "$port" ] - then - APISERVER_PORT="$port" - echo "$APISERVER_PORT" - return - fi - - # get a random free port - APISERVER_PORT=0 - echo "$APISERVER_PORT" -} - -function dind::master-name { - echo "kube-master$( dind::cluster-suffix )" -} - -function dind::node-name { - local nr="$1" - echo "kube-node-${nr}$( dind::cluster-suffix )" -} - -function dind::context-name { - echo "dind$( dind::cluster-suffix )" -} - -function dind::remove-volumes { - # docker 1.13+: docker volume ls -q -f label="${DIND_LABEL}" - local nameRE - nameRE="^kubeadm-dind-(sys|kube-master|kube-node-[0-9]+)$(dind::cluster-suffix)$" - docker volume ls -q | (grep -E "$nameRE" || true) | while read -r volume_id; do - dind::step "Removing volume:" "${volume_id}" - docker volume rm "${volume_id}" - done -} - -function dind::remove-images { - local which=$1 - docker ps -a -q --filter=label="${which}" | while read container_id; do - dind::step "Removing container:" "${container_id}" - docker rm -fv "${container_id}" - done -} - -function dind::remove-cluster { - cluster_name="dind$(dind::cluster-suffix)" - if ${kubectl} config get-clusters | grep -qE "^${cluster_name}$"; then - dind::step "Removing cluster from config:" "${cluster_name}" - ${kubectl} config delete-cluster ${cluster_name} 2>/dev/null || true - fi -} - -function dind::remove-context { - context_name="$(dind::context-name)" - if ${kubectl} config get-contexts | grep -qE "${context_name}\\s"; then - dind::step "Removing context from config:" "${context_name}" - ${kubectl} config delete-context ${context_name} 2>/dev/null || true - fi -} - -function dind::start-port-forwarder { - local fwdr port - fwdr="${DIND_PORT_FORWARDER:-}" - - [ -n "$fwdr" ] || return 0 - - [ -x "$fwdr" ] || { - echo "'${fwdr}' is not executable." >&2 - return 1 - } - - port="$( dind::apiserver-port )" - dind::step "+ Setting up port-forwarding for :${port}" - "$fwdr" "$port" -} - -function dind::check-for-snapshot { - if ! dind::volume-exists "kubeadm-dind-$(dind::master-name)"; then - return 1 - fi - for ((n=1; n <= NUM_NODES; n++)); do - if ! dind::volume-exists "kubeadm-dind-$(dind::node-name ${n})"; then - return 1 - fi - done -} - -function dind::do-run-e2e { - local parallel="${1:-}" - local focus="${2:-}" - local skip="${3:-}" - local host="$(dind::localhost)" - if [[ -z "$using_local_linuxdocker" ]]; then - host="127.0.0.1" - fi - dind::need-source - local kubeapi test_args term= - local -a e2e_volume_opts=() - kubeapi="http://${host}:$(dind::apiserver-port)" - test_args="--host=${kubeapi}" - if [[ ${focus} ]]; then - test_args="--ginkgo.focus=${focus} ${test_args}" - fi - if [[ ${skip} ]]; then - test_args="--ginkgo.skip=${skip} ${test_args}" - fi - if [[ ${E2E_REPORT_DIR} ]]; then - test_args="--report-dir=/report ${test_args}" - e2e_volume_opts=(-v "${E2E_REPORT_DIR}:/report") - fi - dind::make-for-linux n "cmd/kubectl test/e2e/e2e.test vendor/github.com/onsi/ginkgo/ginkgo" - dind::step "Running e2e tests with args:" "${test_args}" - dind::set-build-volume-args - if [ -t 1 ] ; then - term="-it" - test_args="--ginkgo.noColor --num-nodes=2 ${test_args}" - fi - docker run \ - --rm ${term} \ - --net=host \ - "${build_volume_args[@]}" \ - -e KUBERNETES_PROVIDER=dind \ - -e KUBE_MASTER_IP="${kubeapi}" \ - -e KUBE_MASTER=local \ - -e KUBERNETES_CONFORMANCE_TEST=y \ - -e GINKGO_PARALLEL=${parallel} \ - ${e2e_volume_opts[@]+"${e2e_volume_opts[@]}"} \ - -w /go/src/k8s.io/kubernetes \ - "${e2e_base_image}" \ - bash -c "cluster/kubectl.sh config set-cluster dind --server='${kubeapi}' --insecure-skip-tls-verify=true && - cluster/kubectl.sh config set-context dind --cluster=dind && - cluster/kubectl.sh config use-context dind && - go run hack/e2e.go -- --v 6 --test --check-version-skew=false --test_args='${test_args}'" -} - -function dind::clean { - dind::ensure-downloaded-kubectl - dind::down - dind::remove-images "dind-support$( dind::cluster-suffix )" - dind::remove-volumes - local net_name - net_name="$(dind::net-name)" - if docker network inspect "$net_name" >&/dev/null; then - docker network rm "$net_name" - fi - dind::remove-cluster - dind::remove-context -} - -function dind::copy-image { - local image="${2:-}" - local image_path="/tmp/save_${image//\//_}" - if [[ -f "${image_path}" ]]; then - rm -fr "${image_path}" - fi - docker save "${image}" -o "${image_path}" - docker ps -a -q --filter=label="${DIND_LABEL}" | while read container_id; do - cat "${image_path}" | docker exec -i "${container_id}" docker load - done - rm -fr "${image_path}" -} - -function dind::run-e2e { - local focus="${1:-}" - local skip="${2:-[Serial]}" - skip="$(dind::escape-e2e-name "${skip}")" - if [[ "$focus" ]]; then - focus="$(dind::escape-e2e-name "${focus}")" - else - focus="\[Conformance\]" - fi - local parallel=y - if [[ ${DIND_NO_PARALLEL_E2E} ]]; then - parallel= - fi - dind::do-run-e2e "${parallel}" "${focus}" "${skip}" -} - -function dind::run-e2e-serial { - local focus="${1:-}" - local skip="${2:-}" - skip="$(dind::escape-e2e-name "${skip}")" - dind::need-source - if [[ "$focus" ]]; then - focus="$(dind::escape-e2e-name "${focus}")" - else - focus="\[Serial\].*\[Conformance\]" - fi - dind::do-run-e2e n "${focus}" "${skip}" - # TBD: specify filter -} - -function dind::step { - local OPTS="" - if [ "$1" = "-n" ]; then - shift - OPTS+="-n" - fi - GREEN="$1" - shift - if [ -t 2 ] ; then - echo -e ${OPTS} "\x1B[97m* \x1B[92m${GREEN}\x1B[39m $*" 1>&2 - else - echo ${OPTS} "* ${GREEN} $*" 1>&2 - fi -} - -function dind::dump { - set +e - echo "*** Dumping cluster state ***" - for node in $(docker ps --format '{{.Names}}' --filter label="${DIND_LABEL}"); do - for service in kubelet.service dindnet.service criproxy.service dockershim.service; do - if docker exec "${node}" systemctl is-enabled "${service}" >&/dev/null; then - echo "@@@ service-${node}-${service}.log @@@" - docker exec "${node}" systemctl status "${service}" - docker exec "${node}" journalctl -xe -n all -u "${service}" - fi - done - echo "@@@ psaux-${node}.txt @@@" - docker exec "${node}" ps auxww - echo "@@@ dockerps-a-${node}.txt @@@" - docker exec "${node}" docker ps -a - echo "@@@ ip-a-${node}.txt @@@" - docker exec "${node}" ip a - echo "@@@ ip-r-${node}.txt @@@" - docker exec "${node}" ip r - done - local ctx master_name - master_name="$(dind::master-name)" - ctx="$(dind::context-name)" - docker exec "$master_name" kubectl get pods --all-namespaces \ - -o go-template='{{range $x := .items}}{{range $x.spec.containers}}{{$x.spec.nodeName}}{{" "}}{{$x.metadata.namespace}}{{" "}}{{$x.metadata.name}}{{" "}}{{.name}}{{"\n"}}{{end}}{{end}}' | - while read node ns pod container; do - echo "@@@ pod-${node}-${ns}-${pod}--${container}.log @@@" - docker exec "$master_name" kubectl logs -n "${ns}" -c "${container}" "${pod}" - done - echo "@@@ kubectl-all.txt @@@" - docker exec "$master_name" kubectl get all --all-namespaces -o wide - echo "@@@ describe-all.txt @@@" - docker exec "$master_name" kubectl describe all --all-namespaces - echo "@@@ nodes.txt @@@" - docker exec "$master_name" kubectl get nodes -o wide -} - -function dind::dump64 { - echo "%%% start-base64 %%%" - dind::dump | docker exec -i "$(dind::master-name)" /bin/sh -c "lzma | base64 -w 100" - echo "%%% end-base64 %%%" -} - -function dind::split-dump { - mkdir -p cluster-dump - cd cluster-dump - awk '!/^@@@ .* @@@$/{print >out}; /^@@@ .* @@@$/{out=$2}' out=/dev/null - ls -l -} - -function dind::split-dump64 { - decode_opt=-d - if base64 --help | grep -q '^ *-D'; then - # Mac OS X - decode_opt=-D - fi - sed -n '/^%%% start-base64 %%%$/,/^%%% end-base64 %%%$/p' | - sed '1d;$d' | - base64 "${decode_opt}" | - lzma -dc | - dind::split-dump -} - -function dind::proxy { - local container_id="$1" - if [[ ${DIND_CA_CERT_URL} ]] ; then - dind::step "+ Adding certificate on ${container_id}" - docker exec ${container_id} /bin/sh -c "cd /usr/local/share/ca-certificates; curl -sSO ${DIND_CA_CERT_URL}" - docker exec ${container_id} update-ca-certificates - fi - if [[ "${DIND_PROPAGATE_HTTP_PROXY}" || "${DIND_HTTP_PROXY}" || "${DIND_HTTPS_PROXY}" || "${DIND_NO_PROXY}" ]]; then - dind::step "+ Setting *_PROXY for docker service on ${container_id}" - local proxy_env="[Service]"$'\n'"Environment=" - if [[ "${DIND_PROPAGATE_HTTP_PROXY}" ]]; then - # take *_PROXY values from container environment - proxy_env+=$(docker exec ${container_id} env | grep -i _proxy | awk '{ print "\""$0"\""}' | xargs -d'\n') - else - if [[ "${DIND_HTTP_PROXY}" ]] ; then proxy_env+="\"HTTP_PROXY=${DIND_HTTP_PROXY}\" "; fi - if [[ "${DIND_HTTPS_PROXY}" ]] ; then proxy_env+="\"HTTPS_PROXY=${DIND_HTTPS_PROXY}\" "; fi - if [[ "${DIND_NO_PROXY}" ]] ; then proxy_env+="\"NO_PROXY=${DIND_NO_PROXY}\" "; fi - fi - docker exec -i ${container_id} /bin/sh -c "cat > /etc/systemd/system/docker.service.d/30-proxy.conf" <<< "${proxy_env}" - docker exec ${container_id} systemctl daemon-reload - docker exec ${container_id} systemctl restart docker - fi -} - -function dind::custom-docker-opts { - local container_id="$1" - local -a jq=() - local got_changes="" - if [[ ! -f ${DIND_DAEMON_JSON_FILE} ]] ; then - jq[0]="{}" - else - jq+=("$(cat ${DIND_DAEMON_JSON_FILE})") - if [[ ${DIND_DAEMON_JSON_FILE} != "/etc/docker/daemon.json" ]]; then - got_changes=1 - fi - fi - if [[ ${DIND_REGISTRY_MIRROR} ]] ; then - dind::step "+ Setting up registry mirror on ${container_id}" - jq+=("{\"registry-mirrors\": [\"${DIND_REGISTRY_MIRROR}\"]}") - got_changes=1 - fi - if [[ ${DIND_INSECURE_REGISTRIES} ]] ; then - dind::step "+ Setting up insecure-registries on ${container_id}" - jq+=("{\"insecure-registries\": ${DIND_INSECURE_REGISTRIES}}") - got_changes=1 - fi - if [[ ${got_changes} ]] ; then - local json=$(IFS="+"; echo "${jq[*]}") - docker exec -i ${container_id} /bin/sh -c "mkdir -p /etc/docker && jq -n '${json}' > /etc/docker/daemon.json" - docker exec ${container_id} systemctl daemon-reload - docker exec ${container_id} systemctl restart docker - fi -} - -COMMAND="${1:-}" - -case ${COMMAND} in - up) - if [[ ! ( ${DIND_IMAGE} =~ local ) && ! ${DIND_SKIP_PULL:-} ]]; then - dind::step "Making sure DIND image is up to date" - docker pull "${DIND_IMAGE}" >&2 - fi - - dind::prepare-sys-mounts - dind::ensure-kubectl - if [[ ${SKIP_SNAPSHOT} ]]; then - force_make_binaries=y dind::up - elif ! dind::check-for-snapshot; then - force_make_binaries=y dind::up - dind::snapshot - else - dind::restore - fi - ;; - reup) - dind::prepare-sys-mounts - dind::ensure-kubectl - if [[ ${SKIP_SNAPSHOT} ]]; then - force_make_binaries=y dind::up - elif ! dind::check-for-snapshot; then - force_make_binaries=y dind::up - dind::snapshot - else - force_make_binaries=y - restore_cmd=update_and_restore - dind::restore - fi - ;; - down) - dind::down - ;; - init) - shift - dind::prepare-sys-mounts - dind::ensure-kubectl - dind::init "$@" - ;; - join) - shift - dind::prepare-sys-mounts - dind::ensure-kubectl - dind::join "$(dind::create-node-container)" "$@" - ;; - # bare) - # shift - # dind::bare "$@" - # ;; - snapshot) - shift - dind::snapshot - ;; - restore) - shift - dind::restore - ;; - clean) - dind::clean - ;; - copy-image) - dind::copy-image "$@" - ;; - e2e) - shift - dind::run-e2e "$@" - ;; - e2e-serial) - shift - dind::run-e2e-serial "$@" - ;; - dump) - dind::dump - ;; - dump64) - dind::dump64 - ;; - split-dump) - dind::split-dump - ;; - split-dump64) - dind::split-dump64 - ;; - apiserver-port) - dind::apiserver-port - ;; - *) - echo "usage:" >&2 - echo " $0 up" >&2 - echo " $0 reup" >&2 - echo " $0 down" >&2 - echo " $0 init kubeadm-args..." >&2 - echo " $0 join kubeadm-args..." >&2 - # echo " $0 bare container_name [docker_options...]" - echo " $0 clean" - echo " $0 copy-image [image_name]" >&2 - echo " $0 e2e [test-name-substring]" >&2 - echo " $0 e2e-serial [test-name-substring]" >&2 - echo " $0 dump" >&2 - echo " $0 dump64" >&2 - echo " $0 split-dump" >&2 - echo " $0 split-dump64" >&2 - exit 1 - ;; -esac diff --git a/pkg/admission/admission_controller.go b/pkg/admission/admission_controller.go index 65c7c4aa40..2278b8e4ba 100644 --- a/pkg/admission/admission_controller.go +++ b/pkg/admission/admission_controller.go @@ -36,8 +36,6 @@ import ( const ( AdmitJobPath = "/jobs" MutateJobPath = "/mutating-jobs" - PVCInputName = "volcano.sh/job-input" - PVCOutputName = "volcano.sh/job-output" ) type AdmitFunc func(v1beta1.AdmissionReview) *v1beta1.AdmissionResponse diff --git a/pkg/controllers/garbagecollector/garbagecollector.go b/pkg/controllers/garbagecollector/garbagecollector.go index 25d3dc242f..0da9cbb763 100644 --- a/pkg/controllers/garbagecollector/garbagecollector.go +++ b/pkg/controllers/garbagecollector/garbagecollector.go @@ -250,7 +250,7 @@ func isJobFinished(job *v1alpha1.Job) bool { func getFinishAndExpireTime(j *v1alpha1.Job) (*time.Time, *time.Time, error) { if !needsCleanup(j) { - return nil, nil, fmt.Errorf("Job %s/%s should not be cleaned up", j.Namespace, j.Name) + return nil, nil, fmt.Errorf("job %s/%s should not be cleaned up", j.Namespace, j.Name) } finishAt, err := jobFinishTime(j) if err != nil { diff --git a/pkg/controllers/job/job_controller_util.go b/pkg/controllers/job/job_controller_util.go index 22c7458923..aaf8efeef2 100644 --- a/pkg/controllers/job/job_controller_util.go +++ b/pkg/controllers/job/job_controller_util.go @@ -31,18 +31,6 @@ import ( vkjobhelpers "volcano.sh/volcano/pkg/controllers/job/helpers" ) -func eventKey(obj interface{}) interface{} { - req, ok := obj.(apis.Request) - if !ok { - return obj - } - - return apis.Request{ - Namespace: req.Namespace, - JobName: req.JobName, - } -} - func MakePodName(jobName string, taskName string, index int) string { return fmt.Sprintf(vkjobhelpers.PodNameFmt, jobName, taskName, index) } diff --git a/pkg/controllers/job/plugins/env/types.go b/pkg/controllers/job/plugins/env/types.go index 5f751b5535..296f651561 100644 --- a/pkg/controllers/job/plugins/env/types.go +++ b/pkg/controllers/job/plugins/env/types.go @@ -17,9 +17,6 @@ limitations under the License. package env const ( - // ConfigMapTaskHostFmt key in config map - ConfigMapTaskHostFmt = "%s.host" - // ConfigMapMountPath mount path ConfigMapMountPath = "/etc/volcano" diff --git a/pkg/controllers/job/state/inqueue.go b/pkg/controllers/job/state/inqueue.go index 981aeb40e5..1cd55fcba1 100644 --- a/pkg/controllers/job/state/inqueue.go +++ b/pkg/controllers/job/state/inqueue.go @@ -65,5 +65,4 @@ func (ps *inqueueState) Execute(action vkv1.Action) error { return false }) } - return nil } From b1b6023faf3001fc2ca86fb89aca0f92cacc6c48 Mon Sep 17 00:00:00 2001 From: TommyLike Date: Wed, 19 Jun 2019 16:43:04 +0800 Subject: [PATCH 08/26] Use submodule to integrate helm chart --- .gitmodules | 3 + README.md | 6 +- hack/run-e2e-kind.sh | 2 +- installer/chart/Chart.yaml | 4 - installer/chart/config/kube-batch-ci.conf | 11 -- installer/chart/config/kube-batch.conf | 11 -- .../chart/templates/admission-config.yaml | 50 ----- installer/chart/templates/admission.yaml | 134 ------------- .../chart/templates/batch_v1alpha1_job.yaml | 187 ------------------ .../chart/templates/bus_v1alpha1_command.yaml | 46 ----- installer/chart/templates/controllers.yaml | 96 --------- installer/chart/templates/default-queue.yaml | 6 - installer/chart/templates/scheduler.yaml | 119 ----------- .../scheduling_v1alpha1_podgroup.yaml | 41 ---- .../templates/scheduling_v1alpha1_queue.yaml | 37 ---- installer/chart/values.yaml | 8 - installer/helm | 1 + 17 files changed, 8 insertions(+), 754 deletions(-) create mode 100644 .gitmodules delete mode 100644 installer/chart/Chart.yaml delete mode 100644 installer/chart/config/kube-batch-ci.conf delete mode 100644 installer/chart/config/kube-batch.conf delete mode 100644 installer/chart/templates/admission-config.yaml delete mode 100644 installer/chart/templates/admission.yaml delete mode 100644 installer/chart/templates/batch_v1alpha1_job.yaml delete mode 100644 installer/chart/templates/bus_v1alpha1_command.yaml delete mode 100644 installer/chart/templates/controllers.yaml delete mode 100644 installer/chart/templates/default-queue.yaml delete mode 100644 installer/chart/templates/scheduler.yaml delete mode 100644 installer/chart/templates/scheduling_v1alpha1_podgroup.yaml delete mode 100644 installer/chart/templates/scheduling_v1alpha1_queue.yaml delete mode 100644 installer/chart/values.yaml create mode 160000 installer/helm diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000..207611549e --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "installer/helm"] + path = installer/helm + url = https://github.com/volcano-sh/charts.git diff --git a/README.md b/README.md index 6417c26465..4940369d5f 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ First of all, clone the repo to your local path: ``` # mkdir -p $GOPATH/src/volcano.sh/ # cd $GOPATH/src/volcano.sh/ -# git clone https://github.com/volcano-sh/volcano.git +# git clone --recursive https://github.com/volcano-sh/volcano.git ``` ### 1. Volcano Image @@ -92,10 +92,10 @@ try command ```kind load docker-image : ``` for each of the ima Secondly, install helm chart. ``` -helm install installer/chart --namespace --name +helm install installer/helm/chart/volcano --namespace --name For eg : -helm install installer/chart --namespace volcano-trial --name volcano-trial +helm install installer/helm/chart/volcano --namespace volcano-trial --name volcano-trial ``` diff --git a/hack/run-e2e-kind.sh b/hack/run-e2e-kind.sh index cbcd9e198f..27dfdc5e59 100755 --- a/hack/run-e2e-kind.sh +++ b/hack/run-e2e-kind.sh @@ -63,7 +63,7 @@ function install-volcano { kind load docker-image ${MPI_EXAMPLE_IMAGE} ${CLUSTER_CONTEXT} echo "Install volcano chart" - helm install installer/chart --namespace kube-system --name ${CLUSTER_NAME} --kubeconfig ${KUBECONFIG} --set basic.image_tag_version=${TAG} --set basic.scheduler_config_file=kube-batch-ci.conf --wait + helm install installer/helm/chart/volcano --namespace kube-system --name ${CLUSTER_NAME} --kubeconfig ${KUBECONFIG} --set basic.image_tag_version=${TAG} --set basic.scheduler_config_file=kube-batch-ci.conf --wait } function uninstall-volcano { diff --git a/installer/chart/Chart.yaml b/installer/chart/Chart.yaml deleted file mode 100644 index fbc85a71a2..0000000000 --- a/installer/chart/Chart.yaml +++ /dev/null @@ -1,4 +0,0 @@ -name: volcano -version: 0.0.1 -description: volcano -apiVersion: v1 diff --git a/installer/chart/config/kube-batch-ci.conf b/installer/chart/config/kube-batch-ci.conf deleted file mode 100644 index 5a6c324b30..0000000000 --- a/installer/chart/config/kube-batch-ci.conf +++ /dev/null @@ -1,11 +0,0 @@ -actions: "enqueue, reclaim, allocate, backfill, preempt" -tiers: -- plugins: - - name: priority - - name: gang - - name: conformance -- plugins: - - name: drf - - name: predicates - - name: proportion - - name: nodeorder diff --git a/installer/chart/config/kube-batch.conf b/installer/chart/config/kube-batch.conf deleted file mode 100644 index 14fa3072a2..0000000000 --- a/installer/chart/config/kube-batch.conf +++ /dev/null @@ -1,11 +0,0 @@ -actions: "enqueue, allocate, backfill" -tiers: -- plugins: - - name: priority - - name: gang - - name: conformance -- plugins: - - name: drf - - name: predicates - - name: proportion - - name: nodeorder diff --git a/installer/chart/templates/admission-config.yaml b/installer/chart/templates/admission-config.yaml deleted file mode 100644 index 7ece1449d7..0000000000 --- a/installer/chart/templates/admission-config.yaml +++ /dev/null @@ -1,50 +0,0 @@ -apiVersion: admissionregistration.k8s.io/v1beta1 -kind: ValidatingWebhookConfiguration -metadata: - name: {{ .Release.Name }}-validate-job - annotations: - "helm.sh/hook": pre-install,pre-upgrade,post-delete -webhooks: - - clientConfig: - service: - name: {{ .Release.Name }}-admission-service - namespace: {{ .Release.Namespace }} - path: /jobs - failurePolicy: Ignore - name: validatejob.volcano.sh - namespaceSelector: {} - rules: - - apiGroups: - - "batch.volcano.sh" - apiVersions: - - "v1alpha1" - operations: - - CREATE - - UPDATE - resources: - - jobs ---- -apiVersion: admissionregistration.k8s.io/v1beta1 -kind: MutatingWebhookConfiguration -metadata: - name: {{ .Release.Name }}-mutate-job - annotations: - "helm.sh/hook": pre-install,pre-upgrade,post-delete -webhooks: - - clientConfig: - service: - name: {{ .Release.Name }}-admission-service - namespace: {{ .Release.Namespace }} - path: /mutating-jobs - failurePolicy: Ignore - name: mutatejob.volcano.sh - namespaceSelector: {} - rules: - - apiGroups: - - "batch.volcano.sh" - apiVersions: - - "v1alpha1" - operations: - - CREATE - resources: - - jobs diff --git a/installer/chart/templates/admission.yaml b/installer/chart/templates/admission.yaml deleted file mode 100644 index 436b6b34d1..0000000000 --- a/installer/chart/templates/admission.yaml +++ /dev/null @@ -1,134 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: {{ .Release.Name }}-admission - namespace: {{ .Release.Namespace }} ---- -kind: ClusterRole -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: {{ .Release.Name }}-admission - namespace: {{ .Release.Namespace }} -rules: - - apiGroups: [""] - resources: ["configmaps"] - verbs: ["get", "list", "watch"] - - apiGroups: ["admissionregistration.k8s.io"] - resources: ["mutatingwebhookconfigurations"] - verbs: ["get", "list", "watch", "patch"] - - apiGroups: ["admissionregistration.k8s.io"] - resources: ["validatingwebhookconfigurations"] - verbs: ["get", "list", "watch", "patch"] - # Rules below is used generate admission service secret - - apiGroups: ["certificates.k8s.io"] - resources: ["certificatesigningrequests"] - verbs: ["get", "list", "create", "delete"] - - apiGroups: ["certificates.k8s.io"] - resources: ["certificatesigningrequests/approval"] - verbs: ["create", "update"] - - apiGroups: [""] - resources: ["secrets"] - verbs: ["create", "get", "patch"] - - apiGroups: ["scheduling.incubator.k8s.io"] - resources: ["queues"] - verbs: ["get", "list"] - ---- -kind: ClusterRoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: {{ .Release.Name }}-admission-role - namespace: {{ .Release.Namespace }} -subjects: - - kind: ServiceAccount - name: {{ .Release.Name }}-admission - namespace: {{ .Release.Namespace }} -roleRef: - kind: ClusterRole - name: {{ .Release.Name }}-admission - apiGroup: rbac.authorization.k8s.io - ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - labels: - app: volcano-admission - name: {{ .Release.Name }}-admission - namespace: {{ .Release.Namespace }} -spec: - replicas: 1 - selector: - matchLabels: - app: volcano-admission - template: - metadata: - labels: - app: volcano-admission - spec: - serviceAccount: {{ .Release.Name }}-admission - {{ if .Values.basic.image_pull_secret }} - imagePullSecrets: - - name: {{ .Values.basic.image_pull_secret }} - {{ end }} - containers: - - args: - - --tls-cert-file=/admission.local.config/certificates/tls.crt - - --tls-private-key-file=/admission.local.config/certificates/tls.key - - --ca-cert-file=/admission.local.config/certificates/ca.crt - - --mutate-webhook-config-name={{ .Release.Name }}-mutate-job - - --validate-webhook-config-name={{ .Release.Name }}-validate-job - - --alsologtostderr - - --port=443 - - -v=4 - - 2>&1 - image: {{.Values.basic.admission_image_name}}:{{.Values.basic.image_tag_version}} - imagePullPolicy: IfNotPresent - name: admission - volumeMounts: - - mountPath: /admission.local.config/certificates - name: admission-certs - readOnly: true - volumes: - - name: admission-certs - secret: - defaultMode: 420 - secretName: {{.Values.basic.admission_secret_name}} - ---- -apiVersion: v1 -kind: Service -metadata: - labels: - app: volcano-admission - name: {{ .Release.Name }}-admission-service - namespace: {{ .Release.Namespace }} -spec: - ports: - - port: 443 - protocol: TCP - targetPort: 443 - selector: - app: volcano-admission - sessionAffinity: None - ---- -apiVersion: batch/v1 -kind: Job -metadata: - name: {{ .Release.Name }}-admission-init - namespace: {{ .Release.Namespace }} - labels: - app: volcano-admission-init -spec: - backoffLimit: 3 - template: - spec: - serviceAccountName: {{ .Release.Name }}-admission - restartPolicy: Never - containers: - - name: main - image: {{.Values.basic.admission_image_name}}:{{.Values.basic.image_tag_version}} - imagePullPolicy: IfNotPresent - command: ["./gen-admission-secret.sh", "--service", "{{ .Release.Name }}-admission-service", "--namespace", - "{{ .Release.Namespace }}", "--secret", "{{.Values.basic.admission_secret_name}}"] diff --git a/installer/chart/templates/batch_v1alpha1_job.yaml b/installer/chart/templates/batch_v1alpha1_job.yaml deleted file mode 100644 index eb54dca40b..0000000000 --- a/installer/chart/templates/batch_v1alpha1_job.yaml +++ /dev/null @@ -1,187 +0,0 @@ -apiVersion: apiextensions.k8s.io/v1beta1 -kind: CustomResourceDefinition -metadata: - name: jobs.batch.volcano.sh - annotations: - "helm.sh/hook": crd-install -spec: - group: batch.volcano.sh - names: - kind: Job - plural: jobs - shortNames: - - vkjob - - vj - scope: Namespaced - validation: - openAPIV3Schema: - properties: - apiVersion: - description: 'APIVersion defines the versioned schema of this representation - of an object. Servers should convert recognized schemas to the latest - internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' - type: string - kind: - description: 'Kind is a string value representing the REST resource this - object represents. Servers may infer this from the endpoint the client - submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' - type: string - metadata: - type: object - spec: - description: Specification of the desired behavior of a cron job, including - the minAvailable - properties: - volumes: - description: The volumes for Job - items: - properties: - volumeClaim: - description: VolumeClaim defines the PVC used by the VolumeMount. - type: object - mountPath: - description: Path within the container at which the volume should be mounted. - Must not contain ':'. - type: string - volumeClaimName: - description: The name of the volume claim. - type: object - required: - - mountPath - type: array - minAvailable: - description: The minimal available pods to run for this Job - format: int32 - type: integer - policies: - description: Specifies the default lifecycle of tasks - items: - properties: - action: - description: The action that will be taken to the PodGroup according - to Event. One of "Restart", "None". Default to None. - type: string - event: - description: The Event recorded by scheduler; the controller takes - actions according to this Event. - type: string - timeout: - description: Timeout is the grace period for controller to take - actions. Default to nil (take action immediately). - type: object - type: object - type: array - schedulerName: - description: SchedulerName is the default value of `tasks.template.spec.schedulerName`. - type: string - plugins: - description: Enabled task plugins when creating job. - type: object - additionalProperties: - type: array - tasks: - description: Tasks specifies the task specification of Job - items: - properties: - name: - description: Name specifies the name of tasks - type: string - policies: - description: Specifies the lifecycle of task - items: - properties: - action: - description: The action that will be taken to the PodGroup - according to Event. One of "Restart", "None". Default - to None. - type: string - event: - description: The Event recorded by scheduler; the controller - takes actions according to this Event. - type: string - timeout: - description: Timeout is the grace period for controller - to take actions. Default to nil (take action immediately). - type: object - type: object - type: array - replicas: - description: Replicas specifies the replicas of this TaskSpec - in Job - format: int32 - type: integer - template: - description: Specifies the pod that will be created for this TaskSpec - when executing a Job - type: object - type: object - type: array - queue: - description: The name of the queue on which job should been created - type: string - maxRetry: - description: The limit for retrying submiting job, default is 3 - format: int32 - type: integer - type: object - status: - description: Current status of Job - properties: - Succeeded: - description: The number of pods which reached phase Succeeded. - format: int32 - type: integer - failed: - description: The number of pods which reached phase Failed. - format: int32 - type: integer - minAvailable: - description: The minimal available pods to run for this Job - format: int32 - type: integer - pending: - description: The number of pending pods. - format: int32 - type: integer - running: - description: The number of running pods. - format: int32 - type: integer - version: - description: Job's current version - format: int32 - type: integer - retryCount: - description: The number that volcano retried to submit the job. - format: int32 - type: integer - ControlledResources: - description: All of the resources that are controlled by this job. - type: object - additionalProperties: - type: string - state: - description: Current state of Job. - properties: - message: - description: Human-readable message indicating details about last - transition. - type: string - phase: - description: The phase of Job - type: string - reason: - description: Unique, one-word, CamelCase reason for the condition's - last transition. - type: string - type: object - type: object - version: v1alpha1 - subresources: - status: {} -status: - acceptedNames: - kind: "" - plural: "" - conditions: [] - storedVersions: [] diff --git a/installer/chart/templates/bus_v1alpha1_command.yaml b/installer/chart/templates/bus_v1alpha1_command.yaml deleted file mode 100644 index 1cf467052b..0000000000 --- a/installer/chart/templates/bus_v1alpha1_command.yaml +++ /dev/null @@ -1,46 +0,0 @@ -apiVersion: apiextensions.k8s.io/v1beta1 -kind: CustomResourceDefinition -metadata: - name: commands.bus.volcano.sh - annotations: - "helm.sh/hook": crd-install -spec: - group: bus.volcano.sh - names: - kind: Command - plural: commands - scope: Namespaced - validation: - openAPIV3Schema: - properties: - action: - description: Action defines the action that will be took to the target object. - type: string - apiVersion: - description: 'APIVersion defines the versioned schema of this representation - of an object. Servers should convert recognized schemas to the latest - internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' - type: string - kind: - description: 'Kind is a string value representing the REST resource this - object represents. Servers may infer this from the endpoint the client - submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' - type: string - message: - description: Human-readable message indicating details of this command. - type: string - metadata: - type: object - reason: - description: Unique, one-word, CamelCase reason for this command. - type: string - target: - description: TargetObject defines the target object of this command. - type: object - version: v1alpha1 -status: - acceptedNames: - kind: "" - plural: "" - conditions: [] - storedVersions: [] diff --git a/installer/chart/templates/controllers.yaml b/installer/chart/templates/controllers.yaml deleted file mode 100644 index 6ce0743edc..0000000000 --- a/installer/chart/templates/controllers.yaml +++ /dev/null @@ -1,96 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: {{ .Release.Name }}-controllers - namespace: {{ .Release.Namespace }} - ---- -kind: ClusterRole -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: {{ .Release.Name }}-controllers - namespace: {{ .Release.Namespace }} -rules: - - apiGroups: ["apiextensions.k8s.io"] - resources: ["customresourcedefinitions"] - verbs: ["create", "get", "list", "watch", "delete"] - - apiGroups: ["batch"] - resources: ["jobs"] - verbs: ["create", "get", "list", "watch", "delete", "update"] - - apiGroups: ["batch.volcano.sh"] - resources: ["jobs"] - verbs: ["get", "list", "watch", "update", "delete"] - - apiGroups: ["batch.volcano.sh"] - resources: ["jobs/status"] - verbs: ["update", "patch"] - - apiGroups: ["bus.volcano.sh"] - resources: ["commands"] - verbs: ["get", "list", "watch", "delete"] - - apiGroups: [""] - resources: ["events"] - verbs: ["create", "list", "watch", "update", "patch"] - - apiGroups: [""] - resources: ["pods"] - verbs: ["create", "get", "list", "watch", "update", "bind", "delete"] - - apiGroups: [""] - resources: ["persistentvolumeclaims"] - verbs: ["get", "list", "watch", "create"] - - apiGroups: [""] - resources: ["services"] - verbs: ["get", "list", "watch", "create", "delete"] - - apiGroups: [""] - resources: ["configmaps"] - verbs: ["get", "list", "watch", "create", "delete", "update"] - - apiGroups: ["scheduling.incubator.k8s.io"] - resources: ["podgroups", "queues", "queues/status"] - verbs: ["get", "list", "watch", "create", "delete", "update"] - - apiGroups: ["scheduling.k8s.io"] - resources: ["priorityclasses"] - verbs: ["get", "list", "watch", "create", "delete"] - ---- -kind: ClusterRoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: {{ .Release.Name }}-controllers-role - namespace: {{ .Release.Namespace }} -subjects: - - kind: ServiceAccount - name: {{ .Release.Name }}-controllers - namespace: {{ .Release.Namespace }} -roleRef: - kind: ClusterRole - name: {{ .Release.Name }}-controllers - apiGroup: rbac.authorization.k8s.io - ---- -kind: Deployment -apiVersion: apps/v1 -metadata: - name: {{ .Release.Name }}-controllers - namespace: {{ .Release.Namespace }} - labels: - app: volcano-controller -spec: - replicas: 1 - selector: - matchLabels: - app: volcano-controller - template: - metadata: - labels: - app: volcano-controller - spec: - serviceAccount: {{ .Release.Name }}-controllers - {{ if .Values.basic.image_pull_secret }} - imagePullSecrets: - - name: {{ .Values.basic.image_pull_secret }} - {{ end }} - containers: - - name: {{ .Release.Name }}-controllers - image: {{.Values.basic.controller_image_name}}:{{.Values.basic.image_tag_version}} - args: - - --alsologtostderr - - -v=4 - - 2>&1 - imagePullPolicy: "IfNotPresent" diff --git a/installer/chart/templates/default-queue.yaml b/installer/chart/templates/default-queue.yaml deleted file mode 100644 index 3aa233f5db..0000000000 --- a/installer/chart/templates/default-queue.yaml +++ /dev/null @@ -1,6 +0,0 @@ -apiVersion: scheduling.incubator.k8s.io/v1alpha1 -kind: Queue -metadata: - name: default -spec: - weight: 1 diff --git a/installer/chart/templates/scheduler.yaml b/installer/chart/templates/scheduler.yaml deleted file mode 100644 index 9617d431ae..0000000000 --- a/installer/chart/templates/scheduler.yaml +++ /dev/null @@ -1,119 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ .Release.Name }}-scheduler-configmap -data: - {{- (.Files.Glob "config/*").AsConfig | nindent 2 }} ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: {{ .Release.Name }}-scheduler - namespace: {{ .Release.Namespace }} ---- -kind: ClusterRole -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: {{ .Release.Name }}-scheduler - namespace: {{ .Release.Namespace }} -rules: - - apiGroups: ["apiextensions.k8s.io"] - resources: ["customresourcedefinitions"] - verbs: ["create", "get", "list", "watch", "delete"] - - apiGroups: ["batch.volcano.sh"] - resources: ["jobs"] - verbs: ["get", "list", "watch", "update", "delete"] - - apiGroups: ["batch.volcano.sh"] - resources: ["jobs/status"] - verbs: ["update", "patch"] - - apiGroups: [""] - resources: ["events"] - verbs: ["create", "list", "watch", "update", "patch"] - - apiGroups: [""] - resources: ["pods", "pods/status"] - verbs: ["create", "get", "list", "watch", "update", "bind", "updateStatus", "delete"] - - apiGroups: [""] - resources: ["pods/binding"] - verbs: ["create"] - - apiGroups: [""] - resources: ["persistentvolumeclaims"] - verbs: ["list", "watch"] - - apiGroups: [""] - resources: ["persistentvolumes"] - verbs: ["list", "watch"] - - apiGroups: ["scheduling.incubator.k8s.io"] - resources: ["podgroups"] - verbs: ["list", "watch", "update"] - - apiGroups: [""] - resources: ["namespaces"] - verbs: ["list", "watch"] - - apiGroups: ["storage.k8s.io"] - resources: ["storageclasses"] - verbs: ["list", "watch"] - - apiGroups: [""] - resources: ["nodes"] - verbs: ["list", "watch"] - - apiGroups: ["policy"] - resources: ["poddisruptionbudgets"] - verbs: ["list", "watch"] - - apiGroups: ["scheduling.incubator.k8s.io"] - resources: ["queues"] - verbs: ["get", "list", "watch", "create", "delete"] - - apiGroups: ["scheduling.k8s.io"] - resources: ["priorityclasses"] - verbs: ["get", "list", "watch"] - ---- -kind: ClusterRoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: {{ .Release.Name }}-scheduler-role - namespace: {{ .Release.Namespace }} -subjects: - - kind: ServiceAccount - name: {{ .Release.Name }}-scheduler - namespace: {{ .Release.Namespace }} -roleRef: - kind: ClusterRole - name: {{ .Release.Name }}-scheduler - apiGroup: rbac.authorization.k8s.io - ---- -kind: Deployment -apiVersion: apps/v1 -metadata: - name: {{ .Release.Name }}-scheduler - namespace: {{ .Release.Namespace }} - labels: - app: volcano-scheduler -spec: - replicas: 1 - selector: - matchLabels: - app: volcano-scheduler - template: - metadata: - labels: - app: volcano-scheduler - spec: - serviceAccount: {{ .Release.Name }}-scheduler - {{ if .Values.basic.image_pull_secret }} - imagePullSecrets: - - name: {{ .Values.basic.image_pull_secret }} - {{ end }} - containers: - - name: {{ .Release.Name }}-scheduler - image: {{.Values.basic.scheduler_image_name}}:{{.Values.basic.image_tag_version}} - args: - - --alsologtostderr - - --scheduler-conf=/volcano.scheduler/{{.Values.basic.scheduler_config_file}} - - -v=3 - - 2>&1 - imagePullPolicy: "IfNotPresent" - volumeMounts: - - name: scheduler-config - mountPath: /volcano.scheduler - volumes: - - name: scheduler-config - configMap: - name: {{ .Release.Name }}-scheduler-configmap diff --git a/installer/chart/templates/scheduling_v1alpha1_podgroup.yaml b/installer/chart/templates/scheduling_v1alpha1_podgroup.yaml deleted file mode 100644 index 35f5e5bd1d..0000000000 --- a/installer/chart/templates/scheduling_v1alpha1_podgroup.yaml +++ /dev/null @@ -1,41 +0,0 @@ -apiVersion: apiextensions.k8s.io/v1beta1 -kind: CustomResourceDefinition -metadata: - name: podgroups.scheduling.incubator.k8s.io - annotations: - "helm.sh/hook": crd-install -spec: - group: scheduling.incubator.k8s.io - names: - kind: PodGroup - plural: podgroups - scope: Namespaced - validation: - openAPIV3Schema: - properties: - apiVersion: - type: string - kind: - type: string - metadata: - type: object - spec: - properties: - minMember: - format: int32 - type: integer - type: object - status: - properties: - succeeded: - format: int32 - type: integer - failed: - format: int32 - type: integer - running: - format: int32 - type: integer - type: object - type: object - version: v1alpha1 diff --git a/installer/chart/templates/scheduling_v1alpha1_queue.yaml b/installer/chart/templates/scheduling_v1alpha1_queue.yaml deleted file mode 100644 index 46bc0ade69..0000000000 --- a/installer/chart/templates/scheduling_v1alpha1_queue.yaml +++ /dev/null @@ -1,37 +0,0 @@ -apiVersion: apiextensions.k8s.io/v1beta1 -kind: CustomResourceDefinition -metadata: - name: queues.scheduling.incubator.k8s.io - annotations: - "helm.sh/hook": crd-install -spec: - group: scheduling.incubator.k8s.io - names: - kind: Queue - plural: queues - scope: Cluster - validation: - openAPIV3Schema: - properties: - apiVersion: - type: string - kind: - type: string - metadata: - type: object - spec: - properties: - weight: - format: int32 - type: integer - type: object - type: object - version: v1alpha1 - subresources: - status: {} -status: - acceptedNames: - kind: "" - plural: "" - conditions: [] - storedVersions: [] diff --git a/installer/chart/values.yaml b/installer/chart/values.yaml deleted file mode 100644 index 813da3ba24..0000000000 --- a/installer/chart/values.yaml +++ /dev/null @@ -1,8 +0,0 @@ -basic: - image_tag_version: "latest" - controller_image_name: "volcanosh/vk-controllers" - scheduler_image_name: "volcanosh/vk-kube-batch" - admission_image_name: "volcanosh/vk-admission" - admission_secret_name: "volcano-admission-secret" - scheduler_config_file: "kube-batch.conf" - image_pull_secret: "" diff --git a/installer/helm b/installer/helm new file mode 160000 index 0000000000..4de7e23fad --- /dev/null +++ b/installer/helm @@ -0,0 +1 @@ +Subproject commit 4de7e23fadcea5a7a727eabc6c498711f2e63770 From dcf8043b4c8e55b36315cfa81b0607099d45d574 Mon Sep 17 00:00:00 2001 From: TommyLike Date: Fri, 21 Jun 2019 10:10:32 +0800 Subject: [PATCH 09/26] Make first job keep running --- test/e2e/job_scheduling.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/test/e2e/job_scheduling.go b/test/e2e/job_scheduling.go index 6f1cf0b07b..23057ab392 100644 --- a/test/e2e/job_scheduling.go +++ b/test/e2e/job_scheduling.go @@ -261,11 +261,10 @@ var _ = Describe("Job E2E Test", func() { namespace: "test", tasks: []taskSpec{ { - img: defaultNginxImage, - req: slot, - min: rep, - rep: rep, - command: "sleep 10s", + img: defaultNginxImage, + req: slot, + min: rep, + rep: rep, }, }, } From 9a8392bf81bfc6cf99b917ff0e9231f8d97d1d18 Mon Sep 17 00:00:00 2001 From: Thandayuthapani Date: Fri, 21 Jun 2019 10:33:40 +0530 Subject: [PATCH 10/26] Add UT cases for pkg/controllers/job/state package --- pkg/controllers/job/job_state_test.go | 1568 +++++++++++++++++++++++++ 1 file changed, 1568 insertions(+) create mode 100644 pkg/controllers/job/job_state_test.go diff --git a/pkg/controllers/job/job_state_test.go b/pkg/controllers/job/job_state_test.go new file mode 100644 index 0000000000..a45c5432e0 --- /dev/null +++ b/pkg/controllers/job/job_state_test.go @@ -0,0 +1,1568 @@ +/* +Copyright 2019 The Volcano Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package job + +import ( + "fmt" + "k8s.io/api/core/v1" + "testing" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "volcano.sh/volcano/pkg/apis/batch/v1alpha1" + "volcano.sh/volcano/pkg/controllers/apis" + "volcano.sh/volcano/pkg/controllers/job/state" +) + +func TestAbortedState_Execute(t *testing.T) { + namespace := "test" + + testcases := []struct { + Name string + JobInfo *apis.JobInfo + Action v1alpha1.Action + ExpectedVal error + }{ + { + Name: "AbortedState-ResumeAction case", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Aborted, + }, + }, + }, + }, + Action: v1alpha1.ResumeJobAction, + ExpectedVal: nil, + }, + { + Name: "AbortedState-AnyOtherAction case", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Aborted, + }, + }, + }, + }, + Action: v1alpha1.RestartJobAction, + ExpectedVal: nil, + }, + } + + for _, testcase := range testcases { + absState := state.NewState(testcase.JobInfo) + + fakecontroller := newFakeController() + state.KillJob = fakecontroller.killJob + + _, err := fakecontroller.vkClients.BatchV1alpha1().Jobs(namespace).Create(testcase.JobInfo.Job) + if err != nil { + t.Error("Error while creating Job") + } + + err = fakecontroller.cache.Add(testcase.JobInfo.Job) + if err != nil { + t.Error("Error while adding Job in cache") + } + + err = absState.Execute(testcase.Action) + if err != nil { + t.Errorf("Expected Error not to occur but got: %s", err) + } + if testcase.Action == v1alpha1.ResumeJobAction { + jobInfo, err := fakecontroller.cache.Get(fmt.Sprintf("%s/%s", testcase.JobInfo.Job.Namespace, testcase.JobInfo.Job.Name)) + if err != nil { + t.Error("Error while retrieving value from Cache") + } + + if jobInfo.Job.Status.State.Phase != v1alpha1.Restarting { + t.Error("Expected Phase to be equal to restarting phase") + } + } + } +} + +func TestAbortingState_Execute(t *testing.T) { + namespace := "test" + + testcases := []struct { + Name string + JobInfo *apis.JobInfo + Action v1alpha1.Action + ExpectedVal error + }{ + { + Name: "AbortedState-ResumeAction case", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Aborting, + }, + }, + }, + }, + Action: v1alpha1.ResumeJobAction, + ExpectedVal: nil, + }, + { + Name: "AbortedState-AnyOtherAction case with pods count equal to 0", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Aborting, + }, + }, + }, + }, + Action: v1alpha1.RestartJobAction, + ExpectedVal: nil, + }, + { + Name: "AbortedState-AnyOtherAction case with Pods count not equal to 0", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + Pending: 1, + State: v1alpha1.JobState{ + Phase: v1alpha1.Aborting, + }, + }, + }, + Pods: map[string]map[string]*v1.Pod{ + "task1": { + "pod1": buildPod(namespace, "pod1", v1.PodPending, nil), + }, + }, + }, + Action: v1alpha1.RestartJobAction, + ExpectedVal: nil, + }, + } + + for _, testcase := range testcases { + absState := state.NewState(testcase.JobInfo) + + fakecontroller := newFakeController() + state.KillJob = fakecontroller.killJob + + _, err := fakecontroller.vkClients.BatchV1alpha1().Jobs(namespace).Create(testcase.JobInfo.Job) + if err != nil { + t.Error("Error while creating Job") + } + + err = fakecontroller.cache.Add(testcase.JobInfo.Job) + if err != nil { + t.Error("Error while adding Job in cache") + } + + err = absState.Execute(testcase.Action) + if err != nil { + t.Errorf("Expected Error not to occur but got: %s", err) + } + if testcase.Action == v1alpha1.ResumeJobAction { + jobInfo, err := fakecontroller.cache.Get(fmt.Sprintf("%s/%s", testcase.JobInfo.Job.Namespace, testcase.JobInfo.Job.Name)) + if err != nil { + t.Error("Error while retrieving value from Cache") + } + + if jobInfo.Job.Status.RetryCount == 0 { + t.Error("Retry Count should not be zero") + } + } + + if testcase.Action != v1alpha1.ResumeJobAction { + jobInfo, err := fakecontroller.cache.Get(fmt.Sprintf("%s/%s", testcase.JobInfo.Job.Namespace, testcase.JobInfo.Job.Name)) + if err != nil { + t.Error("Error while retrieving value from Cache") + } + + if testcase.JobInfo.Job.Status.Pending == 0 && testcase.JobInfo.Job.Status.Running == 0 && testcase.JobInfo.Job.Status.Terminating == 0 { + if jobInfo.Job.Status.State.Phase != v1alpha1.Aborted { + t.Error("Phase Should be aborted") + } + } else { + if jobInfo.Job.Status.State.Phase != v1alpha1.Aborting { + t.Error("Phase Should be aborted") + } + } + } + } +} + +func TestCompletingState_Execute(t *testing.T) { + + namespace := "test" + + testcases := []struct { + Name string + JobInfo *apis.JobInfo + Action v1alpha1.Action + ExpectedVal error + }{ + { + Name: "CompletingState- With pod count not equal to zero", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + Running: 2, + State: v1alpha1.JobState{ + Phase: v1alpha1.Completing, + }, + }, + }, + Pods: map[string]map[string]*v1.Pod{ + "task1": { + "pod1": buildPod(namespace, "pod1", v1.PodRunning, nil), + "pod2": buildPod(namespace, "pod2", v1.PodRunning, nil), + }, + }, + }, + Action: v1alpha1.ResumeJobAction, + ExpectedVal: nil, + }, + { + Name: "CompletingState- With pod count equal to zero", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Completing, + }, + }, + }, + }, + Action: v1alpha1.ResumeJobAction, + ExpectedVal: nil, + }, + } + + for i, testcase := range testcases { + testState := state.NewState(testcase.JobInfo) + + fakecontroller := newFakeController() + state.KillJob = fakecontroller.killJob + + _, err := fakecontroller.vkClients.BatchV1alpha1().Jobs(namespace).Create(testcase.JobInfo.Job) + if err != nil { + t.Error("Error while creating Job") + } + + err = fakecontroller.cache.Add(testcase.JobInfo.Job) + if err != nil { + t.Error("Error while adding Job in cache") + } + + err = testState.Execute(testcase.Action) + if err != nil { + t.Errorf("Expected Error not to occur but got: %s", err) + } + + jobInfo, err := fakecontroller.cache.Get(fmt.Sprintf("%s/%s", testcase.JobInfo.Job.Namespace, testcase.JobInfo.Job.Name)) + if err != nil { + t.Error("Error while retrieving value from Cache") + } + + if testcase.JobInfo.Job.Status.Running == 0 && testcase.JobInfo.Job.Status.Pending == 0 && testcase.JobInfo.Job.Status.Terminating == 0 { + if jobInfo.Job.Status.State.Phase != v1alpha1.Completed { + fmt.Println(jobInfo.Job.Status.State.Phase) + t.Errorf("Expected Phase to be Completed State in test case: %d", i) + } + } else { + if jobInfo.Job.Status.State.Phase != v1alpha1.Completing { + t.Errorf("Expected Phase to be completing state in test case: %d", i) + } + } + } +} + +func TestFinishedState_Execute(t *testing.T) { + namespace := "test" + + testcases := []struct { + Name string + JobInfo *apis.JobInfo + Action v1alpha1.Action + ExpectedVal error + }{ + { + Name: "FinishedState Test Case", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Completed, + }, + }, + }, + }, + Action: v1alpha1.ResumeJobAction, + ExpectedVal: nil, + }, + } + + for _, testcase := range testcases { + testState := state.NewState(testcase.JobInfo) + + fakecontroller := newFakeController() + state.KillJob = fakecontroller.killJob + + _, err := fakecontroller.vkClients.BatchV1alpha1().Jobs(namespace).Create(testcase.JobInfo.Job) + if err != nil { + t.Error("Error while creating Job") + } + + err = fakecontroller.cache.Add(testcase.JobInfo.Job) + if err != nil { + t.Error("Error while adding Job in cache") + } + + err = testState.Execute(testcase.Action) + if err != nil { + t.Errorf("Expected Error not to occur but got: %s", err) + } + } +} + +func TestInqueueState_Execute(t *testing.T) { + namespace := "test" + + testcases := []struct { + Name string + JobInfo *apis.JobInfo + Action v1alpha1.Action + ExpectedVal error + }{ + { + Name: "InqueueState- RestartJobAction case With terminating pod count equal to zero", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Inqueue, + }, + }, + }, + }, + Action: v1alpha1.RestartJobAction, + ExpectedVal: nil, + }, + { + Name: "InqueueState- RestartJobAction case With terminating pod count not equal to zero", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + Terminating: 2, + State: v1alpha1.JobState{ + Phase: v1alpha1.Inqueue, + }, + }, + }, + Pods: map[string]map[string]*v1.Pod{ + "task1": { + "pod1": buildPod(namespace, "pod1", v1.PodRunning, nil), + "pod2": buildPod(namespace, "pod2", v1.PodRunning, nil), + }, + }, + }, + Action: v1alpha1.RestartJobAction, + ExpectedVal: nil, + }, + { + Name: "InqueueState- AbortJobAction case With terminating pod count equal to zero", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Inqueue, + }, + }, + }, + }, + Action: v1alpha1.AbortJobAction, + ExpectedVal: nil, + }, + { + Name: "InqueueState- AbortJobAction case With terminating pod count not equal to zero", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + Terminating: 2, + State: v1alpha1.JobState{ + Phase: v1alpha1.Inqueue, + }, + }, + }, + Pods: map[string]map[string]*v1.Pod{ + "task1": { + "pod1": buildPod(namespace, "pod1", v1.PodRunning, nil), + "pod2": buildPod(namespace, "pod2", v1.PodRunning, nil), + }, + }, + }, + Action: v1alpha1.AbortJobAction, + ExpectedVal: nil, + }, + { + Name: "InqueueState- CompleteJobAction case With terminating pod count equal to zero", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Inqueue, + }, + }, + }, + }, + Action: v1alpha1.CompleteJobAction, + ExpectedVal: nil, + }, + { + Name: "InqueueState- CompleteJobAction case With terminating pod count not equal to zero", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + Terminating: 2, + State: v1alpha1.JobState{ + Phase: v1alpha1.Inqueue, + }, + }, + }, + Pods: map[string]map[string]*v1.Pod{ + "task1": { + "pod1": buildPod(namespace, "pod1", v1.PodRunning, nil), + "pod2": buildPod(namespace, "pod2", v1.PodRunning, nil), + }, + }, + }, + Action: v1alpha1.CompleteJobAction, + ExpectedVal: nil, + }, + { + Name: "InqueueState- Default case With Min Available equal to running pods", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + MinAvailable: 3, + }, + Status: v1alpha1.JobStatus{ + Running: 3, + State: v1alpha1.JobState{ + Phase: v1alpha1.Inqueue, + }, + }, + }, + Pods: map[string]map[string]*v1.Pod{ + "task1": { + "pod1": buildPod(namespace, "pod1", v1.PodRunning, nil), + "pod2": buildPod(namespace, "pod2", v1.PodRunning, nil), + "pod3": buildPod(namespace, "pod3", v1.PodRunning, nil), + }, + }, + }, + Action: v1alpha1.ResumeJobAction, + ExpectedVal: nil, + }, + { + Name: "InqueueState- Default case With Min Available not equal to running pods", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + MinAvailable: 3, + }, + Status: v1alpha1.JobStatus{ + Running: 2, + State: v1alpha1.JobState{ + Phase: v1alpha1.Inqueue, + }, + }, + }, + Pods: map[string]map[string]*v1.Pod{ + "task1": { + "pod1": buildPod(namespace, "pod1", v1.PodRunning, nil), + "pod2": buildPod(namespace, "pod2", v1.PodRunning, nil), + }, + }, + }, + Action: v1alpha1.ResumeJobAction, + ExpectedVal: nil, + }, + } + + for i, testcase := range testcases { + testState := state.NewState(testcase.JobInfo) + + fakecontroller := newFakeController() + state.KillJob = fakecontroller.killJob + + _, err := fakecontroller.vkClients.BatchV1alpha1().Jobs(namespace).Create(testcase.JobInfo.Job) + if err != nil { + t.Error("Error while creating Job") + } + + err = fakecontroller.cache.Add(testcase.JobInfo.Job) + if err != nil { + t.Error("Error while adding Job in cache") + } + + err = testState.Execute(testcase.Action) + if err != nil { + t.Errorf("Expected Error not to occur but got: %s", err) + } + + jobInfo, err := fakecontroller.cache.Get(fmt.Sprintf("%s/%s", testcase.JobInfo.Job.Namespace, testcase.JobInfo.Job.Name)) + if err != nil { + t.Error("Error while retrieving value from Cache") + } + + if testcase.Action == v1alpha1.RestartJobAction { + if jobInfo.Job.Status.Terminating == 0 { + if jobInfo.Job.Status.State.Phase != v1alpha1.Pending { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Pending, jobInfo.Job.Status.State.Phase, i) + } + } else { + if jobInfo.Job.Status.State.Phase != v1alpha1.Restarting { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Restarting, jobInfo.Job.Status.State.Phase, i) + } + } + } else if testcase.Action == v1alpha1.AbortJobAction { + if jobInfo.Job.Status.Terminating == 0 { + if jobInfo.Job.Status.State.Phase != v1alpha1.Pending { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Pending, jobInfo.Job.Status.State.Phase, i) + } + } else { + if jobInfo.Job.Status.State.Phase != v1alpha1.Aborting { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Restarting, jobInfo.Job.Status.State.Phase, i) + } + } + } else if testcase.Action == v1alpha1.CompleteJobAction { + if jobInfo.Job.Status.Terminating == 0 { + if jobInfo.Job.Status.State.Phase != v1alpha1.Completed { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Completed, jobInfo.Job.Status.State.Phase, i) + } + } else { + if jobInfo.Job.Status.State.Phase != v1alpha1.Completing { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Restarting, jobInfo.Job.Status.State.Phase, i) + } + } + } else { + if jobInfo.Job.Spec.MinAvailable <= jobInfo.Job.Status.Running+jobInfo.Job.Status.Succeeded+jobInfo.Job.Status.Failed { + if jobInfo.Job.Status.State.Phase != v1alpha1.Running { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Running, jobInfo.Job.Status.State.Phase, i) + } + } else { + if jobInfo.Job.Status.State.Phase != testcase.JobInfo.Job.Status.State.Phase { + t.Errorf("Expected Job phase to %s, but got %s in case %d", testcase.JobInfo.Job.Status.State.Phase, jobInfo.Job.Status.State.Phase, i) + } + } + } + } +} + +func TestPendingState_Execute(t *testing.T) { + namespace := "test" + + testcases := []struct { + Name string + JobInfo *apis.JobInfo + Action v1alpha1.Action + ExpectedVal error + }{ + { + Name: "PendingState- RestartJobAction case With terminating pod count equal to zero", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Pending, + }, + }, + }, + }, + Action: v1alpha1.RestartJobAction, + ExpectedVal: nil, + }, + { + Name: "PendingState- RestartJobAction case With terminating pod count not equal to zero", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + Terminating: 2, + State: v1alpha1.JobState{ + Phase: v1alpha1.Pending, + }, + }, + }, + Pods: map[string]map[string]*v1.Pod{ + "task1": { + "pod1": buildPod(namespace, "pod1", v1.PodRunning, nil), + "pod2": buildPod(namespace, "pod2", v1.PodRunning, nil), + }, + }, + }, + Action: v1alpha1.RestartJobAction, + ExpectedVal: nil, + }, + { + Name: "PendingState- AbortJobAction case With terminating pod count equal to zero", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Pending, + }, + }, + }, + }, + Action: v1alpha1.AbortJobAction, + ExpectedVal: nil, + }, + { + Name: "PendingState- AbortJobAction case With terminating pod count not equal to zero", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + Terminating: 2, + State: v1alpha1.JobState{ + Phase: v1alpha1.Pending, + }, + }, + }, + Pods: map[string]map[string]*v1.Pod{ + "task1": { + "pod1": buildPod(namespace, "pod1", v1.PodRunning, nil), + "pod2": buildPod(namespace, "pod2", v1.PodRunning, nil), + }, + }, + }, + Action: v1alpha1.AbortJobAction, + ExpectedVal: nil, + }, + { + Name: "PendingState- CompleteJobAction case With terminating pod count equal to zero", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Pending, + }, + }, + }, + }, + Action: v1alpha1.CompleteJobAction, + ExpectedVal: nil, + }, + { + Name: "PendingState- CompleteJobAction case With terminating pod count not equal to zero", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + Terminating: 2, + State: v1alpha1.JobState{ + Phase: v1alpha1.Pending, + }, + }, + }, + Pods: map[string]map[string]*v1.Pod{ + "task1": { + "pod1": buildPod(namespace, "pod1", v1.PodRunning, nil), + "pod2": buildPod(namespace, "pod2", v1.PodRunning, nil), + }, + }, + }, + Action: v1alpha1.CompleteJobAction, + ExpectedVal: nil, + }, + { + Name: "PendingState- EnqueueAction case With Min Available equal to running pods", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + MinAvailable: 3, + }, + Status: v1alpha1.JobStatus{ + Running: 3, + State: v1alpha1.JobState{ + Phase: v1alpha1.Pending, + }, + }, + }, + Pods: map[string]map[string]*v1.Pod{ + "task1": { + "pod1": buildPod(namespace, "pod1", v1.PodRunning, nil), + "pod2": buildPod(namespace, "pod2", v1.PodRunning, nil), + "pod3": buildPod(namespace, "pod3", v1.PodRunning, nil), + }, + }, + }, + Action: v1alpha1.EnqueueAction, + ExpectedVal: nil, + }, + { + Name: "PendingState- EnqueueAction case With Min Available not equal to running pods", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + MinAvailable: 3, + }, + Status: v1alpha1.JobStatus{ + Running: 2, + State: v1alpha1.JobState{ + Phase: v1alpha1.Pending, + }, + }, + }, + Pods: map[string]map[string]*v1.Pod{ + "task1": { + "pod1": buildPod(namespace, "pod1", v1.PodRunning, nil), + "pod2": buildPod(namespace, "pod2", v1.PodRunning, nil), + }, + }, + }, + Action: v1alpha1.EnqueueAction, + ExpectedVal: nil, + }, + { + Name: "PendingState- Default case With Min Available equal to running pods", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + MinAvailable: 3, + }, + Status: v1alpha1.JobStatus{ + Running: 2, + State: v1alpha1.JobState{ + Phase: v1alpha1.Pending, + }, + }, + }, + Pods: map[string]map[string]*v1.Pod{ + "task1": { + "pod1": buildPod(namespace, "pod1", v1.PodRunning, nil), + "pod2": buildPod(namespace, "pod2", v1.PodRunning, nil), + }, + }, + }, + Action: v1alpha1.SyncJobAction, + ExpectedVal: nil, + }, + } + + for i, testcase := range testcases { + testState := state.NewState(testcase.JobInfo) + + fakecontroller := newFakeController() + state.KillJob = fakecontroller.killJob + + _, err := fakecontroller.vkClients.BatchV1alpha1().Jobs(namespace).Create(testcase.JobInfo.Job) + if err != nil { + t.Error("Error while creating Job") + } + + err = fakecontroller.cache.Add(testcase.JobInfo.Job) + if err != nil { + t.Error("Error while adding Job in cache") + } + + err = testState.Execute(testcase.Action) + if err != nil { + t.Errorf("Expected Error not to occur but got: %s", err) + } + + jobInfo, err := fakecontroller.cache.Get(fmt.Sprintf("%s/%s", testcase.JobInfo.Job.Namespace, testcase.JobInfo.Job.Name)) + if err != nil { + t.Error("Error while retrieving value from Cache") + } + + if testcase.Action == v1alpha1.RestartJobAction { + if jobInfo.Job.Status.Terminating == 0 { + if jobInfo.Job.Status.State.Phase != v1alpha1.Pending { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Pending, jobInfo.Job.Status.State.Phase, i) + } + } else { + if jobInfo.Job.Status.State.Phase != v1alpha1.Restarting { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Restarting, jobInfo.Job.Status.State.Phase, i) + } + } + } else if testcase.Action == v1alpha1.AbortJobAction { + if jobInfo.Job.Status.Terminating == 0 { + if jobInfo.Job.Status.State.Phase != v1alpha1.Pending { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Pending, jobInfo.Job.Status.State.Phase, i) + } + } else { + if jobInfo.Job.Status.State.Phase != v1alpha1.Aborting { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Restarting, jobInfo.Job.Status.State.Phase, i) + } + } + } else if testcase.Action == v1alpha1.CompleteJobAction { + if jobInfo.Job.Status.Terminating == 0 { + if jobInfo.Job.Status.State.Phase != v1alpha1.Completed { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Completed, jobInfo.Job.Status.State.Phase, i) + } + } else { + if jobInfo.Job.Status.State.Phase != v1alpha1.Completing { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Restarting, jobInfo.Job.Status.State.Phase, i) + } + } + } else if testcase.Action == v1alpha1.EnqueueAction { + if jobInfo.Job.Spec.MinAvailable <= jobInfo.Job.Status.Running+jobInfo.Job.Status.Succeeded+jobInfo.Job.Status.Failed { + if jobInfo.Job.Status.State.Phase != v1alpha1.Running { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Running, jobInfo.Job.Status.State.Phase, i) + } + } else { + if jobInfo.Job.Status.State.Phase != v1alpha1.Inqueue { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Inqueue, jobInfo.Job.Status.State.Phase, i) + } + } + } else { + if jobInfo.Job.Status.State.Phase != v1alpha1.Pending { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Pending, jobInfo.Job.Status.State.Phase, i) + } + } + } +} + +func TestRestartingState_Execute(t *testing.T) { + namespace := "test" + + testcases := []struct { + Name string + JobInfo *apis.JobInfo + Action v1alpha1.Action + ExpectedVal error + }{ + { + Name: "RestartingState- RetryCount is equal to or greater than MaxRetry", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + MaxRetry: 3, + }, + Status: v1alpha1.JobStatus{ + RetryCount: 3, + State: v1alpha1.JobState{ + Phase: v1alpha1.Restarting, + }, + }, + }, + }, + Action: v1alpha1.RestartJobAction, + ExpectedVal: nil, + }, + { + Name: "RestartingState- RetryCount is less than MaxRetry", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + MaxRetry: 3, + Tasks: []v1alpha1.TaskSpec{ + { + Name: "task1", + }, + { + Name: "task2", + }, + }, + }, + Status: v1alpha1.JobStatus{ + RetryCount: 1, + MinAvailable: 1, + Terminating: 0, + State: v1alpha1.JobState{ + Phase: v1alpha1.Restarting, + }, + }, + }, + }, + Action: v1alpha1.RestartJobAction, + ExpectedVal: nil, + }, + } + + for i, testcase := range testcases { + testState := state.NewState(testcase.JobInfo) + + fakecontroller := newFakeController() + state.KillJob = fakecontroller.killJob + + _, err := fakecontroller.vkClients.BatchV1alpha1().Jobs(namespace).Create(testcase.JobInfo.Job) + if err != nil { + t.Error("Error while creating Job") + } + + err = fakecontroller.cache.Add(testcase.JobInfo.Job) + if err != nil { + t.Error("Error while adding Job in cache") + } + + err = testState.Execute(testcase.Action) + if err != nil { + t.Errorf("Expected Error not to occur but got: %s", err) + } + + jobInfo, err := fakecontroller.cache.Get(fmt.Sprintf("%s/%s", testcase.JobInfo.Job.Namespace, testcase.JobInfo.Job.Name)) + if err != nil { + t.Error("Error while retrieving value from Cache") + } + + if testcase.JobInfo.Job.Spec.MaxRetry <= testcase.JobInfo.Job.Status.RetryCount { + if jobInfo.Job.Status.State.Phase != v1alpha1.Failed { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Failed, jobInfo.Job.Status.State.Phase, i) + } + } else { + if jobInfo.Job.Status.State.Phase != v1alpha1.Pending { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Pending, jobInfo.Job.Status.State.Phase, i) + } + } + } +} + +func TestRunningState_Execute(t *testing.T) { + namespace := "test" + + testcases := []struct { + Name string + JobInfo *apis.JobInfo + Action v1alpha1.Action + ExpectedVal error + }{ + { + Name: "RunningState- RestartJobAction case and Terminating Pods not equal to 0", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{}, + Status: v1alpha1.JobStatus{ + Terminating: 2, + State: v1alpha1.JobState{ + Phase: v1alpha1.Running, + }, + }, + }, + Pods: map[string]map[string]*v1.Pod{ + "task1": { + "pod1": buildPod(namespace, "pod1", v1.PodRunning, nil), + "pod2": buildPod(namespace, "pod2", v1.PodRunning, nil), + }, + }, + }, + Action: v1alpha1.RestartJobAction, + ExpectedVal: nil, + }, + { + Name: "RunningState- RestartJobAction case and Terminating Pods equal to 0", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{}, + Status: v1alpha1.JobStatus{ + Terminating: 0, + State: v1alpha1.JobState{ + Phase: v1alpha1.Running, + }, + }, + }, + }, + Action: v1alpha1.RestartJobAction, + ExpectedVal: nil, + }, + { + Name: "RunningState- AbortAction case and Terminating Pods equal to 0", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{}, + Status: v1alpha1.JobStatus{ + Terminating: 0, + State: v1alpha1.JobState{ + Phase: v1alpha1.Running, + }, + }, + }, + }, + Action: v1alpha1.AbortJobAction, + ExpectedVal: nil, + }, + { + Name: "RunningState- AbortAction case and Terminating Pods not equal to 0", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{}, + Status: v1alpha1.JobStatus{ + Terminating: 2, + State: v1alpha1.JobState{ + Phase: v1alpha1.Running, + }, + }, + }, + Pods: map[string]map[string]*v1.Pod{ + "task1": { + "pod1": buildPod(namespace, "pod1", v1.PodRunning, nil), + "pod2": buildPod(namespace, "pod2", v1.PodRunning, nil), + }, + }, + }, + Action: v1alpha1.AbortJobAction, + ExpectedVal: nil, + }, + { + Name: "RunningState- TerminateJobAction case and Terminating Pods equal to 0", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{}, + Status: v1alpha1.JobStatus{ + Terminating: 0, + State: v1alpha1.JobState{ + Phase: v1alpha1.Running, + }, + }, + }, + }, + Action: v1alpha1.TerminateJobAction, + ExpectedVal: nil, + }, + { + Name: "RunningState- TerminateJobAction case and Terminating Pods not equal to 0", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{}, + Status: v1alpha1.JobStatus{ + Terminating: 2, + State: v1alpha1.JobState{ + Phase: v1alpha1.Running, + }, + }, + }, + Pods: map[string]map[string]*v1.Pod{ + "task1": { + "pod1": buildPod(namespace, "pod1", v1.PodRunning, nil), + "pod2": buildPod(namespace, "pod2", v1.PodRunning, nil), + }, + }, + }, + Action: v1alpha1.TerminateJobAction, + ExpectedVal: nil, + }, + { + Name: "RunningState- CompleteJobAction case and Terminating Pods equal to 0", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{}, + Status: v1alpha1.JobStatus{ + Terminating: 0, + State: v1alpha1.JobState{ + Phase: v1alpha1.Running, + }, + }, + }, + }, + Action: v1alpha1.CompleteJobAction, + ExpectedVal: nil, + }, + { + Name: "RunningState- CompleteJobAction case and Terminating Pods not equal to 0", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{}, + Status: v1alpha1.JobStatus{ + Terminating: 2, + State: v1alpha1.JobState{ + Phase: v1alpha1.Running, + }, + }, + }, + Pods: map[string]map[string]*v1.Pod{ + "task1": { + "pod1": buildPod(namespace, "pod1", v1.PodRunning, nil), + "pod2": buildPod(namespace, "pod2", v1.PodRunning, nil), + }, + }, + }, + Action: v1alpha1.CompleteJobAction, + ExpectedVal: nil, + }, + { + Name: "RunningState- Default case and Total is equal to failed+succeeded", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + Tasks: []v1alpha1.TaskSpec{ + { + Name: "task1", + Replicas: 2, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Name: "task1", + }, + }, + }, + }, + }, + Status: v1alpha1.JobStatus{ + Succeeded: 2, + State: v1alpha1.JobState{ + Phase: v1alpha1.Running, + }, + }, + }, + Pods: map[string]map[string]*v1.Pod{ + "task1": { + "job1-task1-0": buildPod(namespace, "pod1", v1.PodSucceeded, nil), + "job1-task1-1": buildPod(namespace, "pod2", v1.PodSucceeded, nil), + }, + }, + }, + Action: v1alpha1.SyncJobAction, + ExpectedVal: nil, + }, + { + Name: "RunningState- Default case and Total is not equal to failed+succeeded", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + Tasks: []v1alpha1.TaskSpec{ + { + Name: "task1", + Replicas: 2, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Name: "task1", + }, + }, + }, + }, + }, + Status: v1alpha1.JobStatus{ + Succeeded: 1, + State: v1alpha1.JobState{ + Phase: v1alpha1.Running, + }, + }, + }, + Pods: map[string]map[string]*v1.Pod{ + "task1": { + "job1-task1-0": buildPod(namespace, "pod1", v1.PodSucceeded, nil), + }, + }, + }, + Action: v1alpha1.SyncJobAction, + ExpectedVal: nil, + }, + } + + for i, testcase := range testcases { + testState := state.NewState(testcase.JobInfo) + + fakecontroller := newFakeController() + state.KillJob = fakecontroller.killJob + + _, err := fakecontroller.vkClients.BatchV1alpha1().Jobs(namespace).Create(testcase.JobInfo.Job) + if err != nil { + t.Error("Error while creating Job") + } + + err = fakecontroller.cache.Add(testcase.JobInfo.Job) + if err != nil { + t.Error("Error while adding Job in cache") + } + + err = testState.Execute(testcase.Action) + if err != nil { + t.Errorf("Expected Error not to occur but got: %s", err) + } + + jobInfo, err := fakecontroller.cache.Get(fmt.Sprintf("%s/%s", testcase.JobInfo.Job.Namespace, testcase.JobInfo.Job.Name)) + if err != nil { + t.Error("Error while retrieving value from Cache") + } + + if testcase.Action == v1alpha1.RestartJobAction { + if testcase.JobInfo.Job.Status.Terminating != 0 { + if jobInfo.Job.Status.State.Phase != v1alpha1.Restarting { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Restarting, jobInfo.Job.Status.State.Phase, i) + } + } else { + if jobInfo.Job.Status.State.Phase != v1alpha1.Running { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Running, jobInfo.Job.Status.State.Phase, i) + } + } + } else if testcase.Action == v1alpha1.AbortJobAction { + if testcase.JobInfo.Job.Status.Terminating != 0 { + if jobInfo.Job.Status.State.Phase != v1alpha1.Aborting { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Aborting, jobInfo.Job.Status.State.Phase, i) + } + } else { + if jobInfo.Job.Status.State.Phase != v1alpha1.Running { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Running, jobInfo.Job.Status.State.Phase, i) + } + } + } else if testcase.Action == v1alpha1.TerminateJobAction { + if testcase.JobInfo.Job.Status.Terminating != 0 { + if jobInfo.Job.Status.State.Phase != v1alpha1.Terminating { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Terminating, jobInfo.Job.Status.State.Phase, i) + } + } else { + if jobInfo.Job.Status.State.Phase != v1alpha1.Running { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Running, jobInfo.Job.Status.State.Phase, i) + } + } + } else if testcase.Action == v1alpha1.CompleteJobAction { + if testcase.JobInfo.Job.Status.Terminating != 0 { + if jobInfo.Job.Status.State.Phase != v1alpha1.Completing { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Completing, jobInfo.Job.Status.State.Phase, i) + } + } else { + if jobInfo.Job.Status.State.Phase != v1alpha1.Completed { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Completed, jobInfo.Job.Status.State.Phase, i) + } + } + } else { + total := state.TotalTasks(testcase.JobInfo.Job) + if total == testcase.JobInfo.Job.Status.Succeeded+testcase.JobInfo.Job.Status.Failed { + if jobInfo.Job.Status.State.Phase != v1alpha1.Completed { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Completed, jobInfo.Job.Status.State.Phase, i) + } + } else { + if jobInfo.Job.Status.State.Phase != v1alpha1.Running { + t.Errorf("Expected Job phase to %s, but got %s in case %d", v1alpha1.Running, jobInfo.Job.Status.State.Phase, i) + } + } + } + } +} + +func TestTerminatingState_Execute(t *testing.T) { + namespace := "test" + + testcases := []struct { + Name string + JobInfo *apis.JobInfo + Action v1alpha1.Action + ExpectedVal error + }{ + { + Name: "TerminatingState- With pod count not equal to zero", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + Running: 2, + State: v1alpha1.JobState{ + Phase: v1alpha1.Terminating, + }, + }, + }, + Pods: map[string]map[string]*v1.Pod{ + "task1": { + "pod1": buildPod(namespace, "pod1", v1.PodRunning, nil), + "pod2": buildPod(namespace, "pod2", v1.PodRunning, nil), + }, + }, + }, + Action: v1alpha1.TerminateJobAction, + ExpectedVal: nil, + }, + { + Name: "TerminatingState- With pod count not equal to zero", + JobInfo: &apis.JobInfo{ + Namespace: namespace, + Name: "jobinfo1", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Terminating, + }, + }, + }, + }, + Action: v1alpha1.TerminateJobAction, + ExpectedVal: nil, + }, + } + + for i, testcase := range testcases { + testState := state.NewState(testcase.JobInfo) + + fakecontroller := newFakeController() + state.KillJob = fakecontroller.killJob + + _, err := fakecontroller.vkClients.BatchV1alpha1().Jobs(namespace).Create(testcase.JobInfo.Job) + if err != nil { + t.Error("Error while creating Job") + } + + err = fakecontroller.cache.Add(testcase.JobInfo.Job) + if err != nil { + t.Error("Error while adding Job in cache") + } + + err = testState.Execute(testcase.Action) + if err != nil { + t.Errorf("Expected Error not to occur but got: %s", err) + } + + jobInfo, err := fakecontroller.cache.Get(fmt.Sprintf("%s/%s", testcase.JobInfo.Job.Namespace, testcase.JobInfo.Job.Name)) + if err != nil { + t.Error("Error while retrieving value from Cache") + } + + if testcase.JobInfo.Job.Status.Running == 0 && testcase.JobInfo.Job.Status.Pending == 0 && testcase.JobInfo.Job.Status.Terminating == 0 { + + if jobInfo.Job.Status.State.Phase != v1alpha1.Terminated { + fmt.Println(jobInfo.Job.Status.State.Phase) + t.Errorf("Expected Phase to be Terminated State in test case: %d", i) + } + } else { + if jobInfo.Job.Status.State.Phase != v1alpha1.Terminating { + t.Errorf("Expected Phase to be Terminating state in test case: %d", i) + } + } + } +} From 9a8fe35e613f8cad414cb37db4d49e86d7499bfa Mon Sep 17 00:00:00 2001 From: Thandayuthapani Date: Mon, 24 Jun 2019 09:43:36 +0530 Subject: [PATCH 11/26] UT cases for garbagecollector package --- .../garbagecollector/garbagecollector_test.go | 381 ++++++++++++++++++ 1 file changed, 381 insertions(+) create mode 100644 pkg/controllers/garbagecollector/garbagecollector_test.go diff --git a/pkg/controllers/garbagecollector/garbagecollector_test.go b/pkg/controllers/garbagecollector/garbagecollector_test.go new file mode 100644 index 0000000000..f431f7c560 --- /dev/null +++ b/pkg/controllers/garbagecollector/garbagecollector_test.go @@ -0,0 +1,381 @@ +/* +Copyright 2019 The Volcano Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package garbagecollector + +import ( + "fmt" + "testing" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "volcano.sh/volcano/pkg/apis/batch/v1alpha1" + volcanoclient "volcano.sh/volcano/pkg/client/clientset/versioned/fake" +) + +func TestGarbageCollector_ProcessJob(t *testing.T) { + +} + +func TestGarbageCollector_ProcessTTL(t *testing.T) { + namespace := "test" + var ttlSecond int32 = 3 + var ttlSecondZero int32 + testcases := []struct { + Name string + Job *v1alpha1.Job + ExpectedVal bool + ExpectedErr error + }{ + { + Name: "False Case", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + TTLSecondsAfterFinished: &ttlSecond, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + LastTransitionTime: metav1.NewTime(time.Now()), + Phase: v1alpha1.Completed, + }, + }, + }, + ExpectedVal: false, + ExpectedErr: nil, + }, + { + Name: "True Case", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + TTLSecondsAfterFinished: &ttlSecondZero, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + LastTransitionTime: metav1.NewTime(time.Now()), + Phase: v1alpha1.Completed, + }, + }, + }, + ExpectedVal: true, + ExpectedErr: nil, + }, + } + for i, testcase := range testcases { + gc := New(volcanoclient.NewSimpleClientset()) + + expired, err := gc.processTTL(testcase.Job) + if err != nil { + t.Error("Did not expect error") + } + if expired != testcase.ExpectedVal { + t.Errorf("Expected Return Value to be %t, but got %t in case %d", testcase.ExpectedVal, expired, i) + } + } +} + +func TestGarbageCollector_NeedsCleanup(t *testing.T) { + namespace := "test" + + var ttlSecond int32 = 3 + + testcases := []struct { + Name string + Job *v1alpha1.Job + ExpectedVal bool + }{ + { + Name: "Success Case", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + TTLSecondsAfterFinished: &ttlSecond, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Completed, + }, + }, + }, + ExpectedVal: true, + }, + { + Name: "Failure Case", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + TTLSecondsAfterFinished: &ttlSecond, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Running, + }, + }, + }, + ExpectedVal: false, + }, + } + + for i, testcase := range testcases { + finished := needsCleanup(testcase.Job) + if finished != testcase.ExpectedVal { + t.Errorf("Expected value to be %t, but got: %t in case %d", testcase.ExpectedVal, finished, i) + } + } +} + +func TestGarbageCollector_IsJobFinished(t *testing.T) { + namespace := "test" + + testcases := []struct { + Name string + Job *v1alpha1.Job + ExpectedVal bool + }{ + { + Name: "Success Case", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Completed, + }, + }, + }, + ExpectedVal: true, + }, + { + Name: "Success Case", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Running, + }, + }, + }, + ExpectedVal: false, + }, + } + + for i, testcase := range testcases { + finished := isJobFinished(testcase.Job) + if finished != testcase.ExpectedVal { + t.Errorf("Expected value to be %t, but got: %t in case %d", testcase.ExpectedVal, finished, i) + } + } +} + +func TestGarbageCollector_GetFinishAndExpireTime(t *testing.T) { + namespace := "test" + + var ttlSecond int32 = 3 + var ttlSecondFail int32 = 2 + + testTime := time.Date(1, 1, 1, 1, 1, 1, 0, time.UTC) + + testcases := []struct { + Name string + Job *v1alpha1.Job + ExpectedErr error + }{ + { + Name: "Success case", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + TTLSecondsAfterFinished: &ttlSecond, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Completed, + LastTransitionTime: metav1.NewTime(testTime), + }, + }, + }, + ExpectedErr: nil, + }, + { + Name: "Failure case", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + TTLSecondsAfterFinished: &ttlSecondFail, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Completed, + LastTransitionTime: metav1.NewTime(testTime), + }, + }, + }, + ExpectedErr: nil, + }, + } + + for i, testcase := range testcases { + finishTime, expireTime, err := getFinishAndExpireTime(testcase.Job) + if err != nil && err.Error() != testcase.ExpectedErr.Error() { + t.Errorf("Expected Error to be: %s but got: %s in case %d", testcase.ExpectedErr, err, i) + } + + if finishTime != nil && metav1.NewTime(*finishTime) != testcase.Job.Status.State.LastTransitionTime { + t.Errorf("Expected value to be: %v, but got: %v in case %d", testcase.Job.Status.State.LastTransitionTime, metav1.NewTime(*finishTime), i) + } + + if expireTime != nil && metav1.NewTime(*expireTime) != metav1.NewTime(testcase.Job.Status.State.LastTransitionTime.Add(time.Duration(*testcase.Job.Spec.TTLSecondsAfterFinished)*time.Second)) { + t.Errorf("Expected value to be: %v, but got: %v in case %d", testcase.Job.Status.State.LastTransitionTime.Add(time.Duration(*testcase.Job.Spec.TTLSecondsAfterFinished)*time.Second), metav1.NewTime(*expireTime), i) + } + } +} + +func TestGarbageCollector_TimeLeft(t *testing.T) { + namespace := "test" + + var ttlSecond int32 = 3 + + testTime := time.Date(1, 1, 1, 1, 1, 1, 0, time.UTC) + + testcases := []struct { + Name string + Job *v1alpha1.Job + Time *time.Time + ExpectedVal time.Duration + ExpectedErr error + }{ + { + Name: "Success Case", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + TTLSecondsAfterFinished: &ttlSecond, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Completed, + LastTransitionTime: metav1.NewTime(testTime), + }, + }, + }, + Time: &testTime, + ExpectedVal: time.Duration(3), + ExpectedErr: nil, + }, + { + Name: "Failure Case", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + TTLSecondsAfterFinished: &ttlSecond, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + LastTransitionTime: metav1.NewTime(testTime), + }, + }, + }, + Time: &testTime, + ExpectedVal: time.Duration(3), + ExpectedErr: fmt.Errorf("job %s/%s should not be cleaned up", "test", "job1"), + }, + } + + for i, testcase := range testcases { + timeDuration, err := timeLeft(testcase.Job, testcase.Time) + if err != nil && err.Error() != testcase.ExpectedErr.Error() { + t.Errorf("Expected Error to be: %s but got: %s in case %d", testcase.ExpectedErr, err, i) + } + + if timeDuration != nil && timeDuration.Seconds() != float64(testcase.ExpectedVal*time.Second)/1e9 { + t.Errorf("Expected Value to be: %v but got: %f in case %d", testcase.ExpectedVal, timeDuration.Seconds(), i) + } + } +} + +func TestGarbageCollector_JobFinishTime(t *testing.T) { + namespace := "test" + + testcases := []struct { + Name string + Job *v1alpha1.Job + ExpectedVal error + }{ + { + Name: "Success Case", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + LastTransitionTime: metav1.NewTime(time.Now()), + }, + }, + }, + ExpectedVal: nil, + }, + { + Name: "Failure Case", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + }, + ExpectedVal: fmt.Errorf("unable to find the time when the Job %s/%s finished", "test", "job1"), + }, + } + + for i, testcase := range testcases { + _, err := jobFinishTime(testcase.Job) + if err != nil && err.Error() != testcase.ExpectedVal.Error() { + t.Errorf("Expected Error to be: %s but got: %s in case %d", testcase.ExpectedVal, err, i) + } + } +} From 3671c8968f66dcf312200bca28865cf60402ffc9 Mon Sep 17 00:00:00 2001 From: nikita15p Date: Mon, 24 Jun 2019 10:06:08 +0530 Subject: [PATCH 12/26] Lint fix for apis, cache, job and state package of controller --- hack/.golint_failures | 4 -- pkg/controllers/apis/job_info.go | 9 +++ pkg/controllers/cache/cache.go | 20 ++++--- pkg/controllers/cache/interface.go | 1 + pkg/controllers/job/job_controller_actions.go | 60 +++++++++---------- .../job/job_controller_actions_test.go | 2 +- pkg/controllers/job/job_controller_plugins.go | 45 +++++++------- pkg/controllers/job/job_controller_util.go | 3 + pkg/controllers/job/state/aborting.go | 8 +-- pkg/controllers/job/state/completing.go | 6 +- pkg/controllers/job/state/factory.go | 12 ++++ pkg/controllers/job/state/restarting.go | 19 +++--- pkg/controllers/job/state/terminating.go | 6 +- pkg/controllers/job/state/util.go | 3 +- 14 files changed, 112 insertions(+), 86 deletions(-) diff --git a/hack/.golint_failures b/hack/.golint_failures index 7ea2ff67b7..d258c0b3f6 100644 --- a/hack/.golint_failures +++ b/hack/.golint_failures @@ -1,6 +1,2 @@ volcano.sh/volcano/pkg/admission -volcano.sh/volcano/pkg/controllers/apis -volcano.sh/volcano/pkg/controllers/cache -volcano.sh/volcano/pkg/controllers/job -volcano.sh/volcano/pkg/controllers/job/state volcano.sh/volcano/test/e2e diff --git a/pkg/controllers/apis/job_info.go b/pkg/controllers/apis/job_info.go index 0517ecc0d5..a95a099b20 100644 --- a/pkg/controllers/apis/job_info.go +++ b/pkg/controllers/apis/job_info.go @@ -24,6 +24,7 @@ import ( "volcano.sh/volcano/pkg/apis/batch/v1alpha1" ) +//JobInfo struct type JobInfo struct { Namespace string Name string @@ -32,6 +33,7 @@ type JobInfo struct { Pods map[string]map[string]*v1.Pod } +//Clone function clones the k8s pod values to the JobInfo struct func (ji *JobInfo) Clone() *JobInfo { job := &JobInfo{ Namespace: ji.Namespace, @@ -51,12 +53,15 @@ func (ji *JobInfo) Clone() *JobInfo { return job } +//SetJob sets the volcano jobs values to the JobInfo struct func (ji *JobInfo) SetJob(job *v1alpha1.Job) { ji.Name = job.Name ji.Namespace = job.Namespace ji.Job = job } +//AddPod adds the k8s pod object values to the Pods field +//of JobStruct if it doesn't exist. Otherwise it throws error func (ji *JobInfo) AddPod(pod *v1.Pod) error { taskName, found := pod.Annotations[v1alpha1.TaskSpecKey] if !found { @@ -81,6 +86,7 @@ func (ji *JobInfo) AddPod(pod *v1.Pod) error { return nil } +//UpdatePod updates the k8s pod object values to the existing pod func (ji *JobInfo) UpdatePod(pod *v1.Pod) error { taskName, found := pod.Annotations[v1alpha1.TaskSpecKey] if !found { @@ -105,6 +111,7 @@ func (ji *JobInfo) UpdatePod(pod *v1.Pod) error { return nil } +//DeletePod deletes the given k8s pod from the JobInfo struct func (ji *JobInfo) DeletePod(pod *v1.Pod) error { taskName, found := pod.Annotations[v1alpha1.TaskSpecKey] if !found { @@ -127,6 +134,7 @@ func (ji *JobInfo) DeletePod(pod *v1.Pod) error { return nil } +//Request struct type Request struct { Namespace string JobName string @@ -138,6 +146,7 @@ type Request struct { JobVersion int32 } +//String function returns the request in string format func (r Request) String() string { return fmt.Sprintf( "Job: %s/%s, Task:%s, Event:%s, ExitCode:%d, Action:%s, JobVersion: %d", diff --git a/pkg/controllers/cache/cache.go b/pkg/controllers/cache/cache.go index de3747dd48..9684330684 100644 --- a/pkg/controllers/cache/cache.go +++ b/pkg/controllers/cache/cache.go @@ -40,14 +40,17 @@ func keyFn(ns, name string) string { return fmt.Sprintf("%s/%s", ns, name) } +//JobKeyByName gets the key for the job name func JobKeyByName(namespace string, name string) string { return keyFn(namespace, name) } +//JobKeyByReq gets the key for the job request func JobKeyByReq(req *apis.Request) string { return keyFn(req.Namespace, req.JobName) } +//JobKey gets the "ns"/"name" format of the given job func JobKey(job *v1alpha1.Job) string { return keyFn(job.Namespace, job.Name) } @@ -66,6 +69,7 @@ func jobKeyOfPod(pod *v1.Pod) (string, error) { return keyFn(pod.Namespace, jobName), nil } +//New gets the job Cache func New() Cache { return &jobCache{ jobs: map[string]*apis.JobInfo{}, @@ -133,11 +137,11 @@ func (jc *jobCache) Update(obj *v1alpha1.Job) error { defer jc.Unlock() key := JobKey(obj) - if job, found := jc.jobs[key]; !found { + job, found := jc.jobs[key] + if !found { return fmt.Errorf("failed to find job <%v>", key) - } else { - job.Job = obj } + job.Job = obj return nil } @@ -147,12 +151,12 @@ func (jc *jobCache) Delete(obj *v1alpha1.Job) error { defer jc.Unlock() key := JobKey(obj) - if jobInfo, found := jc.jobs[key]; !found { + jobInfo, found := jc.jobs[key] + if !found { return fmt.Errorf("failed to find job <%v>", key) - } else { - jobInfo.Job = nil - jc.deleteJob(jobInfo) } + jobInfo.Job = nil + jc.deleteJob(jobInfo) return nil } @@ -261,7 +265,7 @@ func (jc *jobCache) TaskCompleted(jobKey, taskName string) bool { for _, pod := range taskPods { if pod.Status.Phase == v1.PodSucceeded { - completed += 1 + completed++ } } return completed >= taskReplicas diff --git a/pkg/controllers/cache/interface.go b/pkg/controllers/cache/interface.go index a244565428..6e7650388d 100644 --- a/pkg/controllers/cache/interface.go +++ b/pkg/controllers/cache/interface.go @@ -23,6 +23,7 @@ import ( "volcano.sh/volcano/pkg/controllers/apis" ) +//Cache Interface type Cache interface { Run(stopCh <-chan struct{}) diff --git a/pkg/controllers/job/job_controller_actions.go b/pkg/controllers/job/job_controller_actions.go index 8d2bc0200a..9aa1fbac6b 100644 --- a/pkg/controllers/job/job_controller_actions.go +++ b/pkg/controllers/job/job_controller_actions.go @@ -116,16 +116,16 @@ func (cc *Controller) killJob(jobInfo *apis.JobInfo, podRetainPhase state.PhaseM } // Update Job status - if job, err := cc.vkClients.BatchV1alpha1().Jobs(job.Namespace).UpdateStatus(job); err != nil { + job, err := cc.vkClients.BatchV1alpha1().Jobs(job.Namespace).UpdateStatus(job) + if err != nil { glog.Errorf("Failed to update status of Job %v/%v: %v", job.Namespace, job.Name, err) return err - } else { - if e := cc.cache.Update(job); e != nil { - glog.Errorf("KillJob - Failed to update Job %v/%v in cache: %v", - job.Namespace, job.Name, e) - return e - } + } + if e := cc.cache.Update(job); e != nil { + glog.Errorf("KillJob - Failed to update Job %v/%v in cache: %v", + job.Namespace, job.Name, e) + return e } // Delete PodGroup @@ -165,7 +165,7 @@ func (cc *Controller) createJob(jobInfo *apis.JobInfo, updateStatus state.Update return err } - err, job := cc.createJobIOIfNotExist(job) + job, err := cc.createJobIOIfNotExist(job) if err != nil { cc.recorder.Event(job, v1.EventTypeWarning, string(vkv1.PVCError), fmt.Sprintf("Failed to create PVC, err: %v", err)) @@ -178,16 +178,16 @@ func (cc *Controller) createJob(jobInfo *apis.JobInfo, updateStatus state.Update } } - if job, err := cc.vkClients.BatchV1alpha1().Jobs(job.Namespace).UpdateStatus(job); err != nil { + job, err = cc.vkClients.BatchV1alpha1().Jobs(job.Namespace).UpdateStatus(job) + if err != nil { glog.Errorf("Failed to update status of Job %v/%v: %v", job.Namespace, job.Name, err) return err - } else { - if err := cc.cache.Update(job); err != nil { - glog.Errorf("CreateJob - Failed to update Job %v/%v in cache: %v", - job.Namespace, job.Name, err) - return err - } + } + if err = cc.cache.Update(job); err != nil { + glog.Errorf("CreateJob - Failed to update Job %v/%v in cache: %v", + job.Namespace, job.Name, err) + return err } return nil @@ -330,23 +330,22 @@ func (cc *Controller) syncJob(jobInfo *apis.JobInfo, updateStatus state.UpdateSt job.Status.State.LastTransitionTime = metav1.Now() } } - - if job, err := cc.vkClients.BatchV1alpha1().Jobs(job.Namespace).UpdateStatus(job); err != nil { + job, err := cc.vkClients.BatchV1alpha1().Jobs(job.Namespace).UpdateStatus(job) + if err != nil { glog.Errorf("Failed to update status of Job %v/%v: %v", job.Namespace, job.Name, err) return err - } else { - if e := cc.cache.Update(job); e != nil { - glog.Errorf("SyncJob - Failed to update Job %v/%v in cache: %v", - job.Namespace, job.Name, e) - return e - } + } + if e := cc.cache.Update(job); e != nil { + glog.Errorf("SyncJob - Failed to update Job %v/%v in cache: %v", + job.Namespace, job.Name, e) + return e } return nil } -func (cc *Controller) createJobIOIfNotExist(job *vkv1.Job) (error, *vkv1.Job) { +func (cc *Controller) createJobIOIfNotExist(job *vkv1.Job) (*vkv1.Job, error) { // If PVC does not exist, create them for Job. var needUpdate, nameExist bool volumes := job.Spec.Volumes @@ -359,7 +358,7 @@ func (cc *Controller) createJobIOIfNotExist(job *vkv1.Job) (error, *vkv1.Job) { vcName = vkjobhelpers.MakeVolumeClaimName(job.Name) exist, err := cc.checkPVCExist(job, vcName) if err != nil { - return err, nil + return nil, err } if exist { continue @@ -371,7 +370,7 @@ func (cc *Controller) createJobIOIfNotExist(job *vkv1.Job) (error, *vkv1.Job) { } else { exist, err := cc.checkPVCExist(job, vcName) if err != nil { - return err, nil + return nil, err } nameExist = exist } @@ -382,7 +381,7 @@ func (cc *Controller) createJobIOIfNotExist(job *vkv1.Job) (error, *vkv1.Job) { } if volume.VolumeClaim != nil { if err := cc.createPVC(job, vcName, volume.VolumeClaim); err != nil { - return err, nil + return nil, err } job.Status.ControlledResources["volume-pvc-"+vcName] = vcName } else { @@ -395,12 +394,11 @@ func (cc *Controller) createJobIOIfNotExist(job *vkv1.Job) (error, *vkv1.Job) { if err != nil { glog.Errorf("Failed to update Job %v/%v for volume claim name: %v ", job.Namespace, job.Name, err) - return err, nil - } else { - return nil, newJob + return nil, err } + return newJob, err } - return nil, job + return job, nil } func (cc *Controller) checkPVCExist(job *vkv1.Job, vcName string) (bool, error) { diff --git a/pkg/controllers/job/job_controller_actions_test.go b/pkg/controllers/job/job_controller_actions_test.go index 714c43c14b..34fff91d48 100644 --- a/pkg/controllers/job/job_controller_actions_test.go +++ b/pkg/controllers/job/job_controller_actions_test.go @@ -409,7 +409,7 @@ func TestCreateJobIOIfNotExistFunc(t *testing.T) { for i, testcase := range testcases { fakeController := newFakeController() - err, job := fakeController.createJobIOIfNotExist(testcase.Job) + job, err := fakeController.createJobIOIfNotExist(testcase.Job) if err != testcase.ExpextVal { t.Errorf("Expected Return value to be : %s, but got: %s in testcase %d", testcase.ExpextVal, err, i) } diff --git a/pkg/controllers/job/job_controller_plugins.go b/pkg/controllers/job/job_controller_plugins.go index d5265a2bcf..ab7dccccf1 100644 --- a/pkg/controllers/job/job_controller_plugins.go +++ b/pkg/controllers/job/job_controller_plugins.go @@ -31,17 +31,18 @@ import ( func (cc *Controller) pluginOnPodCreate(job *vkv1.Job, pod *v1.Pod) error { client := vkinterface.PluginClientset{KubeClients: cc.kubeClients} for name, args := range job.Spec.Plugins { - if pb, found := vkplugin.GetPluginBuilder(name); !found { + pb, found := vkplugin.GetPluginBuilder(name) + if !found { err := fmt.Errorf("failed to get plugin %s", name) glog.Error(err) return err - } else { - glog.Infof("Starting to execute plugin at : %s on job: <%s/%s>", name, job.Namespace, job.Name) - if err := pb(client, args).OnPodCreate(pod, job); err != nil { - glog.Errorf("Failed to process on pod create plugin %s, err %v.", name, err) - return err - } } + glog.Infof("Starting to execute plugin at : %s on job: <%s/%s>", name, job.Namespace, job.Name) + if err := pb(client, args).OnPodCreate(pod, job); err != nil { + glog.Errorf("Failed to process on pod create plugin %s, err %v.", name, err) + return err + } + } return nil } @@ -52,17 +53,18 @@ func (cc *Controller) pluginOnJobAdd(job *vkv1.Job) error { job.Status.ControlledResources = make(map[string]string) } for name, args := range job.Spec.Plugins { - if pb, found := vkplugin.GetPluginBuilder(name); !found { + pb, found := vkplugin.GetPluginBuilder(name) + if !found { err := fmt.Errorf("failed to get plugin %s", name) glog.Error(err) return err - } else { - glog.Infof("Starting to execute plugin at : %s on job: <%s/%s>", name, job.Namespace, job.Name) - if err := pb(client, args).OnJobAdd(job); err != nil { - glog.Errorf("Failed to process on job add plugin %s, err %v.", name, err) - return err - } } + glog.Infof("Starting to execute plugin at : %s on job: <%s/%s>", name, job.Namespace, job.Name) + if err := pb(client, args).OnJobAdd(job); err != nil { + glog.Errorf("Failed to process on job add plugin %s, err %v.", name, err) + return err + } + } return nil @@ -71,17 +73,18 @@ func (cc *Controller) pluginOnJobAdd(job *vkv1.Job) error { func (cc *Controller) pluginOnJobDelete(job *vkv1.Job) error { client := vkinterface.PluginClientset{KubeClients: cc.kubeClients} for name, args := range job.Spec.Plugins { - if pb, found := vkplugin.GetPluginBuilder(name); !found { + pb, found := vkplugin.GetPluginBuilder(name) + if !found { err := fmt.Errorf("failed to get plugin %s", name) glog.Error(err) return err - } else { - glog.Infof("Starting to execute plugin at : %s on job: <%s/%s>", name, job.Namespace, job.Name) - if err := pb(client, args).OnJobDelete(job); err != nil { - glog.Errorf("failed to process on job delete plugin %s, err %v.", name, err) - return err - } } + glog.Infof("Starting to execute plugin at : %s on job: <%s/%s>", name, job.Namespace, job.Name) + if err := pb(client, args).OnJobDelete(job); err != nil { + glog.Errorf("failed to process on job delete plugin %s, err %v.", name, err) + return err + } + } return nil diff --git a/pkg/controllers/job/job_controller_util.go b/pkg/controllers/job/job_controller_util.go index aaf8efeef2..8104d8f0f4 100644 --- a/pkg/controllers/job/job_controller_util.go +++ b/pkg/controllers/job/job_controller_util.go @@ -31,6 +31,7 @@ import ( vkjobhelpers "volcano.sh/volcano/pkg/controllers/job/helpers" ) +//MakePodName append podname,jobname,taskName and index and returns the string func MakePodName(jobName string, taskName string, index int) string { return fmt.Sprintf(vkjobhelpers.PodNameFmt, jobName, taskName, index) } @@ -186,12 +187,14 @@ func addResourceList(list, new v1.ResourceList) { } } +//TaskPriority structure type TaskPriority struct { priority int32 vkv1.TaskSpec } +//TasksPriority is a slice of TaskPriority type TasksPriority []TaskPriority func (p TasksPriority) Len() int { return len(p) } diff --git a/pkg/controllers/job/state/aborting.go b/pkg/controllers/job/state/aborting.go index ef40326507..fbf2b5e764 100644 --- a/pkg/controllers/job/state/aborting.go +++ b/pkg/controllers/job/state/aborting.go @@ -40,11 +40,11 @@ func (ps *abortingState) Execute(action vkv1.Action) error { // If any "alive" pods, still in Aborting phase if status.Terminating != 0 || status.Pending != 0 || status.Running != 0 { return false - } else { - status.State.Phase = vkv1.Aborted - status.State.LastTransitionTime = metav1.Now() - return true } + status.State.Phase = vkv1.Aborted + status.State.LastTransitionTime = metav1.Now() + return true + }) } } diff --git a/pkg/controllers/job/state/completing.go b/pkg/controllers/job/state/completing.go index acad88e88a..953df5aef7 100644 --- a/pkg/controllers/job/state/completing.go +++ b/pkg/controllers/job/state/completing.go @@ -30,9 +30,9 @@ func (ps *completingState) Execute(action vkv1.Action) error { // If any "alive" pods, still in Completing phase if status.Terminating != 0 || status.Pending != 0 || status.Running != 0 { return false - } else { - status.State.Phase = vkv1.Completed - return true } + status.State.Phase = vkv1.Completed + return true + }) } diff --git a/pkg/controllers/job/state/factory.go b/pkg/controllers/job/state/factory.go index e3e30a582d..aaffbaff81 100644 --- a/pkg/controllers/job/state/factory.go +++ b/pkg/controllers/job/state/factory.go @@ -23,12 +23,22 @@ import ( "volcano.sh/volcano/pkg/controllers/apis" ) +//PhaseMap to store the pod phases. type PhaseMap map[v1.PodPhase]struct{} + +//UpdateStatusFn updates the job status. type UpdateStatusFn func(status *vkv1.JobStatus) (jobPhaseChanged bool) + +//ActionFn will create or delete Pods according to Job's spec. type ActionFn func(job *apis.JobInfo, fn UpdateStatusFn) error + +//KillActionFn kill all Pods of Job with phase not in podRetainPhase. type KillActionFn func(job *apis.JobInfo, podRetainPhase PhaseMap, fn UpdateStatusFn) error +//PodRetainPhaseNone stores no phase var PodRetainPhaseNone = PhaseMap{} + +//PodRetainPhaseSoft stores PodSucceeded and PodFailed Phase var PodRetainPhaseSoft = PhaseMap{ v1.PodSucceeded: {}, v1.PodFailed: {}, @@ -43,11 +53,13 @@ var ( CreateJob ActionFn ) +//State interface type State interface { // Execute executes the actions based on current state. Execute(act vkv1.Action) error } +//NewState gets the state from the volcano job Phase func NewState(jobInfo *apis.JobInfo) State { job := jobInfo.Job switch job.Status.State.Phase { diff --git a/pkg/controllers/job/state/restarting.go b/pkg/controllers/job/state/restarting.go index df83503b83..d7c16a94e9 100644 --- a/pkg/controllers/job/state/restarting.go +++ b/pkg/controllers/job/state/restarting.go @@ -37,16 +37,15 @@ func (ps *restartingState) Execute(action vkv1.Action) error { // Failed is the phase that the job is restarted failed reached the maximum number of retries. status.State.Phase = vkv1.Failed return true - } else { - total := int32(0) - for _, task := range ps.job.Job.Spec.Tasks { - total += task.Replicas - } - - if total-status.Terminating >= status.MinAvailable { - status.State.Phase = vkv1.Pending - return true - } + } + total := int32(0) + for _, task := range ps.job.Job.Spec.Tasks { + total += task.Replicas + } + + if total-status.Terminating >= status.MinAvailable { + status.State.Phase = vkv1.Pending + return true } return false diff --git a/pkg/controllers/job/state/terminating.go b/pkg/controllers/job/state/terminating.go index b46c2429d6..f7e3bd98d4 100644 --- a/pkg/controllers/job/state/terminating.go +++ b/pkg/controllers/job/state/terminating.go @@ -30,9 +30,9 @@ func (ps *terminatingState) Execute(action vkv1.Action) error { // If any "alive" pods, still in Terminating phase if status.Terminating != 0 || status.Pending != 0 || status.Running != 0 { return false - } else { - status.State.Phase = vkv1.Terminated - return true } + status.State.Phase = vkv1.Terminated + return true + }) } diff --git a/pkg/controllers/job/state/util.go b/pkg/controllers/job/state/util.go index f1e33b65b6..6dc33e7016 100644 --- a/pkg/controllers/job/state/util.go +++ b/pkg/controllers/job/state/util.go @@ -20,9 +20,10 @@ import ( vkv1 "volcano.sh/volcano/pkg/apis/batch/v1alpha1" ) -// The default number of retries. +//DefaultMaxRetry is the default number of retries. const DefaultMaxRetry int32 = 3 +//TotalTasks returns number of tasks in a given volcano job func TotalTasks(job *vkv1.Job) int32 { var rep int32 From 03f4f1890663f92a874d55682f1ea989974cf0aa Mon Sep 17 00:00:00 2001 From: Rajadeepan D Ramesh Date: Mon, 24 Jun 2019 11:34:02 +0530 Subject: [PATCH 13/26] Adding UT test cases to queue controller --- .../queue/queue_controller_test.go | 279 ++++++++++++++++++ 1 file changed, 279 insertions(+) create mode 100644 pkg/controllers/queue/queue_controller_test.go diff --git a/pkg/controllers/queue/queue_controller_test.go b/pkg/controllers/queue/queue_controller_test.go new file mode 100644 index 0000000000..073f9b4bbd --- /dev/null +++ b/pkg/controllers/queue/queue_controller_test.go @@ -0,0 +1,279 @@ +/* +Copyright 2019 The Volcano Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package queue + +import ( + "testing" + + kbv1alpha1 "github.com/kubernetes-sigs/kube-batch/pkg/apis/scheduling/v1alpha1" + kubebatchclient "github.com/kubernetes-sigs/kube-batch/pkg/client/clientset/versioned/fake" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + kubeclient "k8s.io/client-go/kubernetes/fake" + "k8s.io/client-go/tools/cache" +) + +func newFakeController() *Controller { + KubeBatchClientSet := kubebatchclient.NewSimpleClientset() + KubeClientSet := kubeclient.NewSimpleClientset() + + controller := NewQueueController(KubeClientSet, KubeBatchClientSet) + return controller +} + +func TestAddQueue(t *testing.T) { + testCases := []struct { + Name string + queue *kbv1alpha1.Queue + ExpectValue int + }{ + { + Name: "AddQueue", + queue: &kbv1alpha1.Queue{ + ObjectMeta: metav1.ObjectMeta{ + Name: "c1", + }, + Spec: kbv1alpha1.QueueSpec{ + Weight: 1, + }, + }, + ExpectValue: 1, + }, + } + + for i, testcase := range testCases { + c := newFakeController() + + c.addQueue(testcase.queue) + + if testcase.ExpectValue != c.queue.Len() { + t.Errorf("case %d (%s): expected: %v, got %v ", i, testcase.Name, testcase.ExpectValue, c.queue.Len()) + } + } +} + +func TestDeleteQueue(t *testing.T) { + testCases := []struct { + Name string + queue *kbv1alpha1.Queue + ExpectValue bool + }{ + { + Name: "DeleteQueue", + queue: &kbv1alpha1.Queue{ + ObjectMeta: metav1.ObjectMeta{ + Name: "c1", + }, + Spec: kbv1alpha1.QueueSpec{ + Weight: 1, + }, + }, + ExpectValue: false, + }, + } + + for i, testcase := range testCases { + c := newFakeController() + c.podGroups[testcase.queue.Name] = make(map[string]struct{}) + + c.deleteQueue(testcase.queue) + + if _, ok := c.podGroups[testcase.queue.Name]; ok != testcase.ExpectValue { + t.Errorf("case %d (%s): expected: %v, got %v ", i, testcase.Name, testcase.ExpectValue, ok) + } + } + +} + +func TestAddPodGroup(t *testing.T) { + namespace := "c1" + + testCases := []struct { + Name string + podGroup *kbv1alpha1.PodGroup + ExpectValue int + }{ + { + Name: "addpodgroup", + podGroup: &kbv1alpha1.PodGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1", + Namespace: namespace, + }, + Spec: kbv1alpha1.PodGroupSpec{ + Queue: "c1", + }, + }, + ExpectValue: 1, + }, + } + + for i, testcase := range testCases { + c := newFakeController() + + c.addPodGroup(testcase.podGroup) + + if testcase.ExpectValue != c.queue.Len() { + t.Errorf("case %d (%s): expected: %v, got %v ", i, testcase.Name, testcase.ExpectValue, c.queue.Len()) + } + if testcase.ExpectValue != len(c.podGroups[testcase.podGroup.Spec.Queue]) { + t.Errorf("case %d (%s): expected: %v, got %v ", i, testcase.Name, testcase.ExpectValue, len(c.podGroups[testcase.podGroup.Spec.Queue])) + } + } + +} + +func TestDeletePodGroup(t *testing.T) { + namespace := "c1" + + testCases := []struct { + Name string + podGroup *kbv1alpha1.PodGroup + ExpectValue bool + }{ + { + Name: "deletepodgroup", + podGroup: &kbv1alpha1.PodGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1", + Namespace: namespace, + }, + Spec: kbv1alpha1.PodGroupSpec{ + Queue: "c1", + }, + }, + ExpectValue: false, + }, + } + + for i, testcase := range testCases { + c := newFakeController() + + key, _ := cache.MetaNamespaceKeyFunc(testcase.podGroup) + c.podGroups[testcase.podGroup.Spec.Queue] = make(map[string]struct{}) + c.podGroups[testcase.podGroup.Spec.Queue][key] = struct{}{} + + c.deletePodGroup(testcase.podGroup) + if _, ok := c.podGroups[testcase.podGroup.Spec.Queue][key]; ok != testcase.ExpectValue { + t.Errorf("case %d (%s): expected: %v, got %v ", i, testcase.Name, testcase.ExpectValue, ok) + } + } +} + +func TestUpdatePodGroup(t *testing.T) { + namespace := "c1" + + testCases := []struct { + Name string + podGroupold *kbv1alpha1.PodGroup + podGroupnew *kbv1alpha1.PodGroup + ExpectValue int + }{ + { + Name: "updatepodgroup", + podGroupold: &kbv1alpha1.PodGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1", + Namespace: namespace, + }, + Spec: kbv1alpha1.PodGroupSpec{ + Queue: "c1", + }, + Status: kbv1alpha1.PodGroupStatus{ + Phase: kbv1alpha1.PodGroupPending, + }, + }, + podGroupnew: &kbv1alpha1.PodGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1", + Namespace: namespace, + }, + Spec: kbv1alpha1.PodGroupSpec{ + Queue: "c1", + }, + Status: kbv1alpha1.PodGroupStatus{ + Phase: kbv1alpha1.PodGroupRunning, + }, + }, + ExpectValue: 1, + }, + } + + for i, testcase := range testCases { + c := newFakeController() + + c.updatePodGroup(testcase.podGroupold, testcase.podGroupnew) + + if testcase.ExpectValue != c.queue.Len() { + t.Errorf("case %d (%s): expected: %v, got %v ", i, testcase.Name, testcase.ExpectValue, c.queue.Len()) + } + } +} + +func TestSyncQueue(t *testing.T) { + namespace := "c1" + + testCases := []struct { + Name string + podGroup *kbv1alpha1.PodGroup + queue *kbv1alpha1.Queue + ExpectValue int32 + }{ + { + Name: "syncQueue", + podGroup: &kbv1alpha1.PodGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1", + Namespace: namespace, + }, + Spec: kbv1alpha1.PodGroupSpec{ + Queue: "c1", + }, + Status: kbv1alpha1.PodGroupStatus{ + Phase: kbv1alpha1.PodGroupPending, + }, + }, + queue: &kbv1alpha1.Queue{ + ObjectMeta: metav1.ObjectMeta{ + Name: "c1", + }, + Spec: kbv1alpha1.QueueSpec{ + Weight: 1, + }, + }, + ExpectValue: 1, + }, + } + + for i, testcase := range testCases { + c := newFakeController() + + key, _ := cache.MetaNamespaceKeyFunc(testcase.podGroup) + c.podGroups[testcase.podGroup.Spec.Queue] = make(map[string]struct{}) + c.podGroups[testcase.podGroup.Spec.Queue][key] = struct{}{} + + c.pgInformer.Informer().GetIndexer().Add(testcase.podGroup) + c.queueInformer.Informer().GetIndexer().Add(testcase.queue) + c.kbClient.SchedulingV1alpha1().Queues().Create(testcase.queue) + + err := c.syncQueue(testcase.queue.Name) + item, _ := c.kbClient.SchedulingV1alpha1().Queues().Get(testcase.queue.Name, metav1.GetOptions{}) + if err != nil && testcase.ExpectValue != item.Status.Pending { + t.Errorf("case %d (%s): expected: %v, got %v ", i, testcase.Name, testcase.ExpectValue, c.queue.Len()) + } + } + +} From d87d7359ca5fe2f43a2ead7856b3a672518a32ee Mon Sep 17 00:00:00 2001 From: Thandayuthapani Date: Mon, 24 Jun 2019 12:48:02 +0530 Subject: [PATCH 14/26] UT cases for pkg/controllers/cache package --- pkg/controllers/cache/cache_test.go | 846 ++++++++++++++++++++++++++++ 1 file changed, 846 insertions(+) create mode 100644 pkg/controllers/cache/cache_test.go diff --git a/pkg/controllers/cache/cache_test.go b/pkg/controllers/cache/cache_test.go new file mode 100644 index 0000000000..cbe010d8f5 --- /dev/null +++ b/pkg/controllers/cache/cache_test.go @@ -0,0 +1,846 @@ +/* +Copyright 2019 The Volcano Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cache + +import ( + "fmt" + "testing" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "volcano.sh/volcano/pkg/apis/batch/v1alpha1" + "volcano.sh/volcano/pkg/controllers/apis" +) + +func TestJobCache_Add(t *testing.T) { + namespace := "test" + + testcases := []struct { + Name string + Job *v1alpha1.Job + JobsInCache map[string]*v1alpha1.Job + ExpectedVal error + }{ + { + Name: "Success case", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + }, + JobsInCache: nil, + ExpectedVal: nil, + }, + { + Name: "Error case", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + }, + JobsInCache: map[string]*v1alpha1.Job{ + "job1": { + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + }, + }, + ExpectedVal: fmt.Errorf("duplicated jobInfo <%s/%s>", "test", "job1"), + }, + } + + for i, testcase := range testcases { + jobCache := New() + + for _, job := range testcase.JobsInCache { + err := jobCache.Add(job) + if err != nil { + t.Errorf("Expected not to occur while adding job, but got error: %s in case %d", err, i) + } + } + + err := jobCache.Add(testcase.Job) + if err != nil && testcase.ExpectedVal != nil && err.Error() != testcase.ExpectedVal.Error() { + t.Errorf("Expected Return Value to be: %s, but got: %s in case %d", testcase.ExpectedVal, err, i) + } + } +} + +func TestJobCache_GetStatus(t *testing.T) { + namespace := "test" + + testcases := []struct { + Name string + Job *v1alpha1.Job + JobsInCache map[string]*v1alpha1.Job + ExpectedVal v1alpha1.JobState + ExpectedErr error + }{ + { + Name: "Success Case", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + }, + JobsInCache: map[string]*v1alpha1.Job{ + "job1": { + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Completed, + }, + }, + }, + }, + ExpectedVal: v1alpha1.JobState{ + Phase: v1alpha1.Completed, + }, + ExpectedErr: nil, + }, + { + Name: "Error Case", + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + }, + JobsInCache: nil, + ExpectedVal: v1alpha1.JobState{ + Phase: v1alpha1.Completed, + }, + ExpectedErr: fmt.Errorf("failed to find job <%s/%s>", namespace, "job1"), + }, + } + + for i, testcase := range testcases { + jobCache := New() + + for _, job := range testcase.JobsInCache { + err := jobCache.Add(job) + if err != nil { + t.Errorf("Expected not to occur while adding job, but got error: %s in case %d", err, i) + } + } + + status, err := jobCache.GetStatus(fmt.Sprintf("%s/%s", testcase.Job.Namespace, testcase.Job.Name)) + if err != nil && testcase.ExpectedErr != nil && err.Error() != testcase.ExpectedErr.Error() { + t.Errorf("Expected to get: %s, but got: %s in case %d", testcase.ExpectedErr, err, i) + } + if status != nil && status.State.Phase != testcase.ExpectedVal.Phase { + t.Errorf("Expected Return Value to be: %s, but got: %s in case %d", testcase.ExpectedVal, status.State.Phase, i) + } + } +} + +func TestJobCache_Get(t *testing.T) { + namespace := "test" + + testcases := []struct { + Name string + Key string + JobsInCache map[string]*v1alpha1.Job + ExpectedVal *apis.JobInfo + ExpectedErr error + }{ + { + Name: "Success Case", + Key: fmt.Sprintf("%s/%s", namespace, "job1"), + JobsInCache: map[string]*v1alpha1.Job{ + "job1": { + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Completed, + }, + }, + }, + }, + ExpectedVal: &apis.JobInfo{ + Name: "job1", + Namespace: namespace, + Job: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Completed, + }, + }, + }, + }, + ExpectedErr: nil, + }, + { + Name: "Error Case", + Key: fmt.Sprintf("%s/%s", namespace, "job1"), + JobsInCache: nil, + ExpectedVal: &apis.JobInfo{}, + ExpectedErr: fmt.Errorf("failed to find job <%s/%s>", namespace, "job1"), + }, + } + + for i, testcase := range testcases { + jobCache := New() + + for _, job := range testcase.JobsInCache { + err := jobCache.Add(job) + if err != nil { + t.Errorf("Expected not to occur while adding job, but got error: %s in case %d", err, i) + } + } + + job, err := jobCache.Get(testcase.Key) + if err != nil && testcase.ExpectedErr != nil && err.Error() != testcase.ExpectedErr.Error() { + t.Errorf("Expected to get: %s, but got: %s in case %d", testcase.ExpectedErr, err, i) + } + if job != nil && (job.Name != testcase.ExpectedVal.Name || job.Job.Name != testcase.ExpectedVal.Job.Name) { + fmt.Println(job.Job) + t.Errorf("Expected Return Value to be same but got different values in case %d", i) + } + } +} + +func TestJobCache_Update(t *testing.T) { + namespace := "test" + + testcases := []struct { + Name string + JobsInCache map[string]*v1alpha1.Job + UpdatedJob *v1alpha1.Job + ExpectedErr error + }{ + { + Name: "Success Case", + JobsInCache: map[string]*v1alpha1.Job{ + "job1": { + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Running, + }, + }, + }, + }, + UpdatedJob: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Completed, + }, + }, + }, + ExpectedErr: nil, + }, + { + Name: "Error Case", + JobsInCache: nil, + UpdatedJob: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Completed, + }, + }, + }, + ExpectedErr: fmt.Errorf("failed to find job <%s/%s>", namespace, "job1"), + }, + } + + for i, testcase := range testcases { + jobCache := New() + + for _, job := range testcase.JobsInCache { + err := jobCache.Add(job) + if err != nil { + t.Errorf("Expected not to occur while adding job, but got error: %s in case %d", err, i) + } + } + + err := jobCache.Update(testcase.UpdatedJob) + if err != nil && testcase.ExpectedErr != nil && err.Error() != testcase.ExpectedErr.Error() { + t.Errorf("Expected to get: %s, but got: %s in case %d", testcase.ExpectedErr, err, i) + } + if testcase.ExpectedErr == nil { + job, err := jobCache.Get(fmt.Sprintf("%s/%s", testcase.UpdatedJob.Namespace, testcase.UpdatedJob.Name)) + if err != nil { + t.Errorf("Expected Error not to have occured in case %d", i) + } + if job.Job.Status.State.Phase != testcase.UpdatedJob.Status.State.Phase { + t.Errorf("Error in updating Job, Expected: %s, but got: %s in case %d", testcase.UpdatedJob.Status.State.Phase, job.Job.Status.State.Phase, i) + } + } + } +} + +func TestJobCache_Delete(t *testing.T) { + namespace := "test" + + testcases := []struct { + Name string + JobsInCache map[string]*v1alpha1.Job + DeleteJob *v1alpha1.Job + ExpectedErr error + }{ + { + Name: "Success Case", + JobsInCache: map[string]*v1alpha1.Job{ + "job1": { + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Running, + }, + }, + }, + }, + DeleteJob: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Completed, + }, + }, + }, + ExpectedErr: nil, + }, + { + Name: "Error Case", + JobsInCache: nil, + DeleteJob: &v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Completed, + }, + }, + }, + ExpectedErr: fmt.Errorf("failed to find job <%s/%s>", namespace, "job1"), + }, + } + + for i, testcase := range testcases { + jobCache := New() + + for _, job := range testcase.JobsInCache { + err := jobCache.Add(job) + if err != nil { + t.Errorf("Expected not to occur while adding job, but got error: %s in case %d", err, i) + } + } + + err := jobCache.Delete(testcase.DeleteJob) + if err != nil && testcase.ExpectedErr != nil && err.Error() != testcase.ExpectedErr.Error() { + t.Errorf("Expected to get: %s, but got: %s in case %d", testcase.ExpectedErr, err, i) + } + if testcase.ExpectedErr == nil { + job, err := jobCache.Get(fmt.Sprintf("%s/%s", testcase.DeleteJob.Namespace, testcase.DeleteJob.Name)) + if err == nil { + t.Errorf("Expected Error to have occured in case %d", i) + } + if job != nil { + t.Errorf("Expected Job to be nil but got value in case %d", i) + } + } + } +} + +func TestJobCache_AddPod(t *testing.T) { + namespace := "test" + + testcases := []struct { + Name string + JobsInCache map[string]*v1alpha1.Job + AddPod *v1.Pod + ExpectedErr error + }{ + { + Name: "Success Case", + JobsInCache: map[string]*v1alpha1.Job{ + "job1": { + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Running, + }, + }, + }, + }, + AddPod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod1", + Namespace: namespace, + Annotations: map[string]string{ + v1alpha1.JobNameKey: "job1", + v1alpha1.TaskSpecKey: "task1", + v1alpha1.JobVersion: "1", + }, + }, + Status: v1.PodStatus{ + Phase: v1.PodRunning, + }, + }, + ExpectedErr: nil, + }, + { + Name: "Error Case", + JobsInCache: nil, + AddPod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod1", + Namespace: namespace, + Annotations: map[string]string{ + v1alpha1.TaskSpecKey: "task1", + v1alpha1.JobVersion: "1", + }, + }, + Status: v1.PodStatus{ + Phase: v1.PodRunning, + }, + }, + ExpectedErr: fmt.Errorf("failed to find job name of pod <%s/%s>", namespace, "pod1"), + }, + } + + for i, testcase := range testcases { + jobCache := New() + + for _, job := range testcase.JobsInCache { + err := jobCache.Add(job) + if err != nil { + t.Errorf("Expected not to occur while adding job, but got error: %s in case %d", err, i) + } + } + + err := jobCache.AddPod(testcase.AddPod) + if err != nil && testcase.ExpectedErr != nil && err.Error() != testcase.ExpectedErr.Error() { + t.Errorf("Expected Error to be: %s, but got: %s in case %d", testcase.ExpectedErr.Error(), err.Error(), i) + } + + if err == nil { + job, err := jobCache.Get(fmt.Sprintf("%s/%s", testcase.JobsInCache["job1"].Namespace, testcase.JobsInCache["job1"].Name)) + if err != nil { + t.Errorf("Expected Error not to occur while retriving job from cache in case %d", i) + } + if err == nil { + if len(job.Pods) != 1 { + t.Errorf("Expected Len to 1 but got %d in case %d", len(job.Pods), i) + } + } + } + } +} + +func TestJobCache_DeletePod(t *testing.T) { + namespace := "test" + + testcases := []struct { + Name string + JobsInCache map[string]*v1alpha1.Job + AddPod map[string]*v1.Pod + DeletePod *v1.Pod + ExpectedErr error + }{ + { + Name: "Success Case", + JobsInCache: map[string]*v1alpha1.Job{ + "job1": { + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Running, + }, + }, + }, + }, + AddPod: map[string]*v1.Pod{ + "pod1": { + ObjectMeta: metav1.ObjectMeta{ + Name: "pod1", + Namespace: namespace, + Annotations: map[string]string{ + v1alpha1.JobNameKey: "job1", + v1alpha1.TaskSpecKey: "task1", + v1alpha1.JobVersion: "1", + }, + }, + Status: v1.PodStatus{ + Phase: v1.PodRunning, + }, + }, + "pod2": { + ObjectMeta: metav1.ObjectMeta{ + Name: "pod2", + Namespace: namespace, + Annotations: map[string]string{ + v1alpha1.JobNameKey: "job1", + v1alpha1.TaskSpecKey: "task1", + v1alpha1.JobVersion: "1", + }, + }, + Status: v1.PodStatus{ + Phase: v1.PodRunning, + }, + }, + }, + DeletePod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod1", + Namespace: namespace, + Annotations: map[string]string{ + v1alpha1.JobNameKey: "job1", + v1alpha1.TaskSpecKey: "task1", + v1alpha1.JobVersion: "1", + }, + }, + Status: v1.PodStatus{ + Phase: v1.PodRunning, + }, + }, + ExpectedErr: nil, + }, + } + + for i, testcase := range testcases { + jobCache := New() + + for _, job := range testcase.JobsInCache { + err := jobCache.Add(job) + if err != nil { + t.Errorf("Expected not to occur while adding job, but got error: %s in case %d", err, i) + } + } + + for _, pod := range testcase.AddPod { + err := jobCache.AddPod(pod) + if err != nil { + t.Errorf("Expected Error not occur when adding Adding Pod in case %d", i) + } + } + + err := jobCache.DeletePod(testcase.DeletePod) + if err != nil && testcase.ExpectedErr != nil && err.Error() != testcase.ExpectedErr.Error() { + t.Errorf("Expected Error to be: %s, but got: %s in case %d", testcase.ExpectedErr.Error(), err.Error(), i) + } + + if err == nil { + job, err := jobCache.Get(fmt.Sprintf("%s/%s", namespace, "job1")) + if err != nil { + t.Errorf("Expected Error not to have occured but got error: %s in case %d", err, i) + } + if len(job.Pods["task1"]) != 1 { + t.Errorf("Expected total pods to be 1, but got: %d in case %d", len(job.Pods["task1"]), i) + } + } + } +} + +func TestJobCache_UpdatePod(t *testing.T) { + namespace := "test" + + testcases := []struct { + Name string + JobsInCache map[string]*v1alpha1.Job + AddPod map[string]*v1.Pod + UpdatePod *v1.Pod + ExpectedErr error + }{ + { + Name: "Success Case", + JobsInCache: map[string]*v1alpha1.Job{ + "job1": { + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Running, + }, + }, + }, + }, + AddPod: map[string]*v1.Pod{ + "pod1": { + ObjectMeta: metav1.ObjectMeta{ + Name: "pod1", + Namespace: namespace, + Annotations: map[string]string{ + v1alpha1.JobNameKey: "job1", + v1alpha1.TaskSpecKey: "task1", + v1alpha1.JobVersion: "1", + }, + }, + Status: v1.PodStatus{ + Phase: v1.PodRunning, + }, + }, + "pod2": { + ObjectMeta: metav1.ObjectMeta{ + Name: "pod2", + Namespace: namespace, + Annotations: map[string]string{ + v1alpha1.JobNameKey: "job1", + v1alpha1.TaskSpecKey: "task1", + v1alpha1.JobVersion: "1", + }, + }, + Status: v1.PodStatus{ + Phase: v1.PodRunning, + }, + }, + }, + UpdatePod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod1", + Namespace: namespace, + Annotations: map[string]string{ + v1alpha1.JobNameKey: "job1", + v1alpha1.TaskSpecKey: "task1", + v1alpha1.JobVersion: "1", + }, + }, + Status: v1.PodStatus{ + Phase: v1.PodSucceeded, + }, + }, + ExpectedErr: nil, + }, + } + + for i, testcase := range testcases { + jobCache := New() + + for _, job := range testcase.JobsInCache { + err := jobCache.Add(job) + if err != nil { + t.Errorf("Expected not to occur while adding job, but got error: %s in case %d", err, i) + } + } + + for _, pod := range testcase.AddPod { + err := jobCache.AddPod(pod) + if err != nil { + t.Errorf("Expected Error not occur when adding Adding Pod in case %d", i) + } + } + + err := jobCache.UpdatePod(testcase.UpdatePod) + if err != nil && testcase.ExpectedErr != nil && err.Error() != testcase.ExpectedErr.Error() { + t.Errorf("Expected Error to be: %s, but got: %s in case %d", testcase.ExpectedErr.Error(), err.Error(), i) + } + + if err == nil { + job, err := jobCache.Get(fmt.Sprintf("%s/%s", namespace, "job1")) + if err != nil { + t.Errorf("Expected Error not to have occured but got error: %s in case %d", err, i) + } + for _, task := range job.Pods { + for _, pod := range task { + if pod.Name == testcase.UpdatePod.Name { + if pod.Status.Phase != testcase.UpdatePod.Status.Phase { + t.Errorf("Expected Pod status to be updated to %s, but got %s in case %d", testcase.UpdatePod.Status.Phase, pod.Status.Phase, i) + } + } + } + } + } + } +} + +func TestJobCache_TaskCompleted(t *testing.T) { + namespace := "test" + + testcases := []struct { + Name string + JobsInCache map[string]*v1alpha1.Job + AddPod map[string]*v1.Pod + ExpectedVal bool + }{ + { + Name: "Success Case", + JobsInCache: map[string]*v1alpha1.Job{ + "job1": { + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + Tasks: []v1alpha1.TaskSpec{ + { + Name: "task1", + Replicas: 2, + }, + }, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Running, + }, + }, + }, + }, + AddPod: map[string]*v1.Pod{ + "pod1": { + ObjectMeta: metav1.ObjectMeta{ + Name: "pod1", + Namespace: namespace, + Annotations: map[string]string{ + v1alpha1.JobNameKey: "job1", + v1alpha1.TaskSpecKey: "task1", + v1alpha1.JobVersion: "1", + }, + }, + Status: v1.PodStatus{ + Phase: v1.PodSucceeded, + }, + }, + "pod2": { + ObjectMeta: metav1.ObjectMeta{ + Name: "pod2", + Namespace: namespace, + Annotations: map[string]string{ + v1alpha1.JobNameKey: "job1", + v1alpha1.TaskSpecKey: "task1", + v1alpha1.JobVersion: "1", + }, + }, + Status: v1.PodStatus{ + Phase: v1.PodSucceeded, + }, + }, + }, + ExpectedVal: true, + }, + { + Name: "False Case", + JobsInCache: map[string]*v1alpha1.Job{ + "job1": { + ObjectMeta: metav1.ObjectMeta{ + Name: "job1", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + Tasks: []v1alpha1.TaskSpec{ + { + Name: "task1", + Replicas: 2, + }, + }, + }, + Status: v1alpha1.JobStatus{ + State: v1alpha1.JobState{ + Phase: v1alpha1.Running, + }, + }, + }, + }, + AddPod: map[string]*v1.Pod{ + "pod1": { + ObjectMeta: metav1.ObjectMeta{ + Name: "pod1", + Namespace: namespace, + Annotations: map[string]string{ + v1alpha1.JobNameKey: "job1", + v1alpha1.TaskSpecKey: "task1", + v1alpha1.JobVersion: "1", + }, + }, + Status: v1.PodStatus{ + Phase: v1.PodSucceeded, + }, + }, + "pod2": { + ObjectMeta: metav1.ObjectMeta{ + Name: "pod2", + Namespace: namespace, + Annotations: map[string]string{ + v1alpha1.JobNameKey: "job1", + v1alpha1.TaskSpecKey: "task1", + v1alpha1.JobVersion: "1", + }, + }, + Status: v1.PodStatus{ + Phase: v1.PodRunning, + }, + }, + }, + ExpectedVal: false, + }, + } + + for i, testcase := range testcases { + jobCache := New() + + for _, job := range testcase.JobsInCache { + err := jobCache.Add(job) + if err != nil { + t.Errorf("Expected not to occur while adding job, but got error: %s in case %d", err, i) + } + } + + for _, pod := range testcase.AddPod { + err := jobCache.AddPod(pod) + if err != nil { + t.Errorf("Expected Error not occur when adding Adding Pod in case %d", i) + } + } + + completed := jobCache.TaskCompleted(fmt.Sprintf("%s/%s", namespace, "job1"), "task1") + if completed != testcase.ExpectedVal { + t.Errorf("Expected Return Value to be: %t, but got: %t in case %d", testcase.ExpectedVal, completed, i) + } + } +} From 462b1d7308f4997df1d7ef513897eaa8578d1582 Mon Sep 17 00:00:00 2001 From: Thandayuthapani Date: Mon, 24 Jun 2019 12:53:57 +0530 Subject: [PATCH 15/26] Address Review Comments --- pkg/controllers/garbagecollector/garbagecollector_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/controllers/garbagecollector/garbagecollector_test.go b/pkg/controllers/garbagecollector/garbagecollector_test.go index f431f7c560..698af4181f 100644 --- a/pkg/controllers/garbagecollector/garbagecollector_test.go +++ b/pkg/controllers/garbagecollector/garbagecollector_test.go @@ -160,7 +160,7 @@ func TestGarbageCollector_IsJobFinished(t *testing.T) { ExpectedVal bool }{ { - Name: "Success Case", + Name: "True Case", Job: &v1alpha1.Job{ ObjectMeta: metav1.ObjectMeta{ Name: "job1", @@ -175,7 +175,7 @@ func TestGarbageCollector_IsJobFinished(t *testing.T) { ExpectedVal: true, }, { - Name: "Success Case", + Name: "False Case", Job: &v1alpha1.Job{ ObjectMeta: metav1.ObjectMeta{ Name: "job1", From 9b405200773730625ef6b3ec86ce2e40af0f45d6 Mon Sep 17 00:00:00 2001 From: Rajadeepan D Ramesh Date: Mon, 24 Jun 2019 18:34:56 +0530 Subject: [PATCH 16/26] Adding UT test cases to apis package --- pkg/controllers/apis/job_info_test.go | 226 ++++++++++++++++++++++++++ 1 file changed, 226 insertions(+) create mode 100644 pkg/controllers/apis/job_info_test.go diff --git a/pkg/controllers/apis/job_info_test.go b/pkg/controllers/apis/job_info_test.go new file mode 100644 index 0000000000..4a8d36db92 --- /dev/null +++ b/pkg/controllers/apis/job_info_test.go @@ -0,0 +1,226 @@ +/* +Copyright 2019 The Volcano Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package apis + +import ( + "fmt" + "testing" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + vkbatchv1 "volcano.sh/volcano/pkg/apis/batch/v1alpha1" +) + +func TestAddPod(t *testing.T) { + namespace := "test" + name := "pod1" + + testCases := []struct { + Name string + jobinfo JobInfo + pod *v1.Pod + ExpectValue bool + ExpectErr string + }{ + { + Name: "AddPod", + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + UID: types.UID(fmt.Sprintf("%v-%v", namespace, name)), + Name: name, + Namespace: namespace, + Labels: nil, + Annotations: map[string]string{vkbatchv1.JobNameKey: "job1", + vkbatchv1.JobVersion: "0", + vkbatchv1.TaskSpecKey: "task1"}, + }, + Status: v1.PodStatus{ + Phase: v1.PodRunning, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "nginx", + Image: "nginx:latest", + }, + }, + }, + }, + jobinfo: JobInfo{ + Pods: make(map[string]map[string]*v1.Pod), + }, + ExpectValue: true, + ExpectErr: "duplicated pod", + }, + } + + for i, testcase := range testCases { + err := testcase.jobinfo.AddPod(testcase.pod) + if err != nil { + t.Fatalf("AddPod() error: %v", err) + } + + if _, ok := testcase.jobinfo.Pods["task1"][testcase.pod.Name]; ok != testcase.ExpectValue { + t.Errorf("case %d (%s): expected: %v, got %v ", i, testcase.Name, testcase.ExpectValue, ok) + } + + err = testcase.jobinfo.AddPod(testcase.pod) + + if err == nil { + t.Errorf("case %d (%s): expected: %v, got %v ", i, testcase.Name, testcase.ExpectErr, nil) + } + } + +} + +func TestDeletePod(t *testing.T) { + namespace := "test" + name := "pod1" + + testCases := []struct { + Name string + jobinfo JobInfo + pod *v1.Pod + ExpectValue bool + }{ + { + Name: "DeletePod", + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + UID: types.UID(fmt.Sprintf("%v-%v", namespace, name)), + Name: name, + Namespace: namespace, + Labels: nil, + Annotations: map[string]string{vkbatchv1.JobNameKey: "job1", + vkbatchv1.JobVersion: "0", + vkbatchv1.TaskSpecKey: "task1"}, + }, + Status: v1.PodStatus{ + Phase: v1.PodRunning, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "nginx", + Image: "nginx:latest", + }, + }, + }, + }, + jobinfo: JobInfo{ + Pods: make(map[string]map[string]*v1.Pod), + }, + ExpectValue: false, + }, + } + + for i, testcase := range testCases { + + testcase.jobinfo.Pods["task1"] = make(map[string]*v1.Pod) + testcase.jobinfo.Pods["task1"][testcase.pod.Name] = testcase.pod + + err := testcase.jobinfo.DeletePod(testcase.pod) + if err != nil { + t.Fatalf("DeletePod() error: %v", err) + } + if _, ok := testcase.jobinfo.Pods["task1"][testcase.pod.Name]; ok != testcase.ExpectValue { + t.Errorf("case %d (%s): expected: %v, got %v ", i, testcase.Name, testcase.ExpectValue, ok) + } + } +} + +func TestUpdatePod(t *testing.T) { + namespace := "test" + name := "pod1" + + testCases := []struct { + Name string + jobinfo JobInfo + oldpod *v1.Pod + newpod *v1.Pod + ExpectValue v1.PodPhase + }{ + { + Name: "UpdatePod", + oldpod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + UID: types.UID(fmt.Sprintf("%v-%v", namespace, name)), + Name: name, + Namespace: namespace, + Labels: nil, + Annotations: map[string]string{vkbatchv1.JobNameKey: "job1", + vkbatchv1.JobVersion: "0", + vkbatchv1.TaskSpecKey: "task1"}, + }, + Status: v1.PodStatus{ + Phase: v1.PodRunning, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "nginx", + Image: "nginx:latest", + }, + }, + }, + }, + newpod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + UID: types.UID(fmt.Sprintf("%v-%v", namespace, name)), + Name: name, + Namespace: namespace, + Labels: nil, + Annotations: map[string]string{vkbatchv1.JobNameKey: "job1", + vkbatchv1.JobVersion: "0", + vkbatchv1.TaskSpecKey: "task1"}, + }, + Status: v1.PodStatus{ + Phase: v1.PodSucceeded, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "nginx", + Image: "nginx:latest", + }, + }, + }, + }, + jobinfo: JobInfo{ + Pods: make(map[string]map[string]*v1.Pod), + }, + ExpectValue: v1.PodSucceeded, + }, + } + + for i, testcase := range testCases { + + testcase.jobinfo.Pods["task1"] = make(map[string]*v1.Pod) + testcase.jobinfo.Pods["task1"][testcase.oldpod.Name] = testcase.oldpod + + err := testcase.jobinfo.UpdatePod(testcase.newpod) + if err != nil { + t.Fatalf("UpdatePod() error: %v", err) + } + if val, ok := testcase.jobinfo.Pods["task1"][testcase.newpod.Name]; ok != true { + t.Errorf("case %d (%s): expected: %v, got %v ", i, testcase.Name, true, ok) + } else if val.Status.Phase != v1.PodSucceeded { + t.Errorf("case %d (%s): expected: %v, got %v ", i, testcase.Name, testcase.ExpectValue, val.Status.Phase) + } + } +} From b10beba53337a017c657bc6a08fd3d96c2e0709e Mon Sep 17 00:00:00 2001 From: shivramsrivastava Date: Wed, 26 Jun 2019 14:55:25 +0530 Subject: [PATCH 17/26] Improving the code coverage for admission controller pkg --- pkg/admission/admit_job_test.go | 651 ++++++++++++++++++++++++++++++++ 1 file changed, 651 insertions(+) diff --git a/pkg/admission/admit_job_test.go b/pkg/admission/admit_job_test.go index 8b9a1f8aef..ee615edeee 100644 --- a/pkg/admission/admit_job_test.go +++ b/pkg/admission/admit_job_test.go @@ -34,6 +34,7 @@ func TestValidateExecution(t *testing.T) { namespace := "test" var invTTL int32 = -1 + var policyExitCode int32 = -1 testCases := []struct { Name string @@ -288,6 +289,656 @@ func TestValidateExecution(t *testing.T) { ret: "'ttlSecondsAfterFinished' cannot be less than zero", ExpectErr: true, }, + // min-MinAvailable less than zero + { + Name: "minAvailable-lessThanZero", + Job: v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "minAvailable-lessThanZero", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + MinAvailable: -1, + Queue: "default", + Tasks: []v1alpha1.TaskSpec{ + { + Name: "task-1", + Replicas: 1, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"name": "test"}, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fake-name", + Image: "busybox:1.24", + }, + }, + }, + }, + }, + }, + }, + }, + reviewResponse: v1beta1.AdmissionResponse{Allowed: false}, + ret: "'minAvailable' cannot be less than zero.", + ExpectErr: true, + }, + // maxretry less than zero + { + Name: "maxretry-lessThanZero", + Job: v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "maxretry-lessThanZero", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + MinAvailable: 1, + MaxRetry: -1, + Queue: "default", + Tasks: []v1alpha1.TaskSpec{ + { + Name: "task-1", + Replicas: 1, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"name": "test"}, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fake-name", + Image: "busybox:1.24", + }, + }, + }, + }, + }, + }, + }, + }, + reviewResponse: v1beta1.AdmissionResponse{Allowed: false}, + ret: "'maxRetry' cannot be less than zero.", + ExpectErr: true, + }, + // no task specified in the job + { + Name: "no-task", + Job: v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "no-task", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + MinAvailable: 1, + Queue: "default", + Tasks: []v1alpha1.TaskSpec{}, + }, + }, + reviewResponse: v1beta1.AdmissionResponse{Allowed: false}, + ret: "No task specified in job spec", + ExpectErr: true, + }, + // replica set less than zero + { + Name: "replica-lessThanZero", + Job: v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "replica-lessThanZero", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + MinAvailable: 1, + Queue: "default", + Tasks: []v1alpha1.TaskSpec{ + { + Name: "task-1", + Replicas: -1, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"name": "test"}, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fake-name", + Image: "busybox:1.24", + }, + }, + }, + }, + }, + }, + }, + }, + reviewResponse: v1beta1.AdmissionResponse{Allowed: false}, + ret: "'replicas' is not set positive in task: task-1;", + ExpectErr: true, + }, + // task name error + { + Name: "nonDNS-task", + Job: v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "replica-lessThanZero", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + MinAvailable: 1, + Queue: "default", + Tasks: []v1alpha1.TaskSpec{ + { + Name: "Task-1", + Replicas: 1, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"name": "test"}, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fake-name", + Image: "busybox:1.24", + }, + }, + }, + }, + }, + }, + }, + }, + reviewResponse: v1beta1.AdmissionResponse{Allowed: false}, + ret: "[a DNS-1123 label must consist of lower case alphanumeric characters or '-', and " + + "must start and end with an alphanumeric character (e.g. 'my-name', " + + "or '123-abc', regex used for validation is '[a-z0-9]([-a-z0-9]*[a-z0-9])?')];", + ExpectErr: true, + }, + // Policy Event with exit code + { + Name: "job-policy-withExitCode", + Job: v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job-policy-withExitCode", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + MinAvailable: 1, + Queue: "default", + Tasks: []v1alpha1.TaskSpec{ + { + Name: "task-1", + Replicas: 1, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"name": "test"}, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fake-name", + Image: "busybox:1.24", + }, + }, + }, + }, + }, + }, + Policies: []v1alpha1.LifecyclePolicy{ + { + Event: v1alpha1.PodFailedEvent, + Action: v1alpha1.AbortJobAction, + ExitCode: &policyExitCode, + }, + }, + }, + }, + reviewResponse: v1beta1.AdmissionResponse{Allowed: true}, + ret: "must not specify event and exitCode simultaneously", + ExpectErr: true, + }, + // Both policy event and exit code are nil + { + Name: "policy-noEvent-noExCode", + Job: v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "policy-noEvent-noExCode", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + MinAvailable: 1, + Queue: "default", + Tasks: []v1alpha1.TaskSpec{ + { + Name: "task-1", + Replicas: 1, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"name": "test"}, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fake-name", + Image: "busybox:1.24", + }, + }, + }, + }, + }, + }, + Policies: []v1alpha1.LifecyclePolicy{ + { + Action: v1alpha1.AbortJobAction, + }, + }, + }, + }, + reviewResponse: v1beta1.AdmissionResponse{Allowed: true}, + ret: "either event and exitCode should be specified", + ExpectErr: true, + }, + // invalid policy event + { + Name: "invalid-policy-event", + Job: v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "invalid-policy-event", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + MinAvailable: 1, + Queue: "default", + Tasks: []v1alpha1.TaskSpec{ + { + Name: "task-1", + Replicas: 1, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"name": "test"}, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fake-name", + Image: "busybox:1.24", + }, + }, + }, + }, + }, + }, + Policies: []v1alpha1.LifecyclePolicy{ + { + Event: v1alpha1.Event("someFakeEvent"), + Action: v1alpha1.AbortJobAction, + }, + }, + }, + }, + reviewResponse: v1beta1.AdmissionResponse{Allowed: true}, + ret: "invalid policy event", + ExpectErr: true, + }, + // invalid policy action + { + Name: "invalid-policy-action", + Job: v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "invalid-policy-action", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + MinAvailable: 1, + Queue: "default", + Tasks: []v1alpha1.TaskSpec{ + { + Name: "task-1", + Replicas: 1, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"name": "test"}, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fake-name", + Image: "busybox:1.24", + }, + }, + }, + }, + }, + }, + Policies: []v1alpha1.LifecyclePolicy{ + { + Event: v1alpha1.PodEvictedEvent, + Action: v1alpha1.Action("someFakeAction"), + }, + }, + }, + }, + reviewResponse: v1beta1.AdmissionResponse{Allowed: true}, + ret: "invalid policy action", + ExpectErr: true, + }, + // policy exit-code zero + { + Name: "policy-extcode-zero", + Job: v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "policy-extcode-zero", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + MinAvailable: 1, + Queue: "default", + Tasks: []v1alpha1.TaskSpec{ + { + Name: "task-1", + Replicas: 1, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"name": "test"}, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fake-name", + Image: "busybox:1.24", + }, + }, + }, + }, + }, + }, + Policies: []v1alpha1.LifecyclePolicy{ + { + Action: v1alpha1.AbortJobAction, + ExitCode: func(i int32) *int32 { + return &i + }(int32(0)), + }, + }, + }, + }, + reviewResponse: v1beta1.AdmissionResponse{Allowed: true}, + ret: "0 is not a valid error code", + ExpectErr: true, + }, + // duplicate policy exit-code + { + Name: "duplicate-exitcode", + Job: v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "duplicate-exitcode", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + MinAvailable: 1, + Queue: "default", + Tasks: []v1alpha1.TaskSpec{ + { + Name: "task-1", + Replicas: 1, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"name": "test"}, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fake-name", + Image: "busybox:1.24", + }, + }, + }, + }, + }, + }, + Policies: []v1alpha1.LifecyclePolicy{ + { + ExitCode: func(i int32) *int32 { + return &i + }(int32(1)), + }, + { + ExitCode: func(i int32) *int32 { + return &i + }(int32(1)), + }, + }, + }, + }, + reviewResponse: v1beta1.AdmissionResponse{Allowed: true}, + ret: "duplicate exitCode 1", + ExpectErr: true, + }, + // Policy with any event and other events + { + Name: "job-policy-withExitCode", + Job: v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job-policy-withExitCode", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + MinAvailable: 1, + Queue: "default", + Tasks: []v1alpha1.TaskSpec{ + { + Name: "task-1", + Replicas: 1, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"name": "test"}, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fake-name", + Image: "busybox:1.24", + }, + }, + }, + }, + }, + }, + Policies: []v1alpha1.LifecyclePolicy{ + { + Event: v1alpha1.AnyEvent, + Action: v1alpha1.AbortJobAction, + }, + { + Event: v1alpha1.PodFailedEvent, + Action: v1alpha1.RestartJobAction, + }, + }, + }, + }, + reviewResponse: v1beta1.AdmissionResponse{Allowed: true}, + ret: "if there's * here, no other policy should be here", + ExpectErr: true, + }, + // invalid mount volume + { + Name: "invalid-mount-volume", + Job: v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "invalid-mount-volume", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + MinAvailable: 1, + Queue: "default", + Tasks: []v1alpha1.TaskSpec{ + { + Name: "task-1", + Replicas: 1, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"name": "test"}, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fake-name", + Image: "busybox:1.24", + }, + }, + }, + }, + }, + }, + Policies: []v1alpha1.LifecyclePolicy{ + { + Event: v1alpha1.AnyEvent, + Action: v1alpha1.AbortJobAction, + }, + }, + Volumes: []v1alpha1.VolumeSpec{ + { + MountPath: "", + }, + }, + }, + }, + reviewResponse: v1beta1.AdmissionResponse{Allowed: true}, + ret: " mountPath is required;", + ExpectErr: true, + }, + // duplicate mount volume + { + Name: "duplicate-mount-volume", + Job: v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "duplicate-mount-volume", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + MinAvailable: 1, + Queue: "default", + Tasks: []v1alpha1.TaskSpec{ + { + Name: "task-1", + Replicas: 1, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"name": "test"}, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fake-name", + Image: "busybox:1.24", + }, + }, + }, + }, + }, + }, + Policies: []v1alpha1.LifecyclePolicy{ + { + Event: v1alpha1.AnyEvent, + Action: v1alpha1.AbortJobAction, + }, + }, + Volumes: []v1alpha1.VolumeSpec{ + { + MountPath: "/var", + }, + { + MountPath: "/var", + }, + }, + }, + }, + reviewResponse: v1beta1.AdmissionResponse{Allowed: true}, + ret: " duplicated mountPath: /var;", + ExpectErr: true, + }, + // task Policy with any event and other events + { + Name: "taskpolicy-withAnyandOthrEvent", + Job: v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "taskpolicy-withAnyandOthrEvent", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + MinAvailable: 1, + Queue: "default", + Tasks: []v1alpha1.TaskSpec{ + { + Name: "task-1", + Replicas: 1, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"name": "test"}, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fake-name", + Image: "busybox:1.24", + }, + }, + }, + }, + Policies: []v1alpha1.LifecyclePolicy{ + { + Event: v1alpha1.AnyEvent, + Action: v1alpha1.AbortJobAction, + }, + { + Event: v1alpha1.PodFailedEvent, + Action: v1alpha1.RestartJobAction, + }, + }, + }, + }, + }, + }, + reviewResponse: v1beta1.AdmissionResponse{Allowed: true}, + ret: "if there's * here, no other policy should be here", + ExpectErr: true, + }, + // job with no queue created + { + Name: "job-with-noQueue", + Job: v1alpha1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job-with-noQueue", + Namespace: namespace, + }, + Spec: v1alpha1.JobSpec{ + MinAvailable: 1, + Queue: "jobQueue", + Tasks: []v1alpha1.TaskSpec{ + { + Name: "task-1", + Replicas: 1, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"name": "test"}, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "fake-name", + Image: "busybox:1.24", + }, + }, + }, + }, + }, + }, + }, + }, + reviewResponse: v1beta1.AdmissionResponse{Allowed: true}, + ret: "Job not created with error: ", + ExpectErr: true, + }, } for _, testCase := range testCases { From e318bd076e993a4a2cb0ab662c07f3d85b6d1fb9 Mon Sep 17 00:00:00 2001 From: wangyuqing4 Date: Thu, 30 May 2019 11:05:59 +0800 Subject: [PATCH 18/26] fix vk-controller cache --- pkg/controllers/job/job_controller.go | 8 ++ pkg/controllers/job/job_controller_actions.go | 7 ++ pkg/controllers/job/job_controller_resync.go | 84 +++++++++++++++++++ 3 files changed, 99 insertions(+) create mode 100644 pkg/controllers/job/job_controller_resync.go diff --git a/pkg/controllers/job/job_controller.go b/pkg/controllers/job/job_controller.go index c82c875355..66e600a0db 100644 --- a/pkg/controllers/job/job_controller.go +++ b/pkg/controllers/job/job_controller.go @@ -18,6 +18,7 @@ package job import ( "fmt" + "sync" "github.com/golang/glog" @@ -100,6 +101,9 @@ type Controller struct { //Job Event recorder recorder record.EventRecorder priorityClasses map[string]*v1beta1.PriorityClass + + sync.Mutex + errTasks workqueue.RateLimitingInterface } // NewJobController create new Job Controller @@ -122,6 +126,7 @@ func NewJobController( queue: workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()), commandQueue: workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()), cache: jobcache.New(), + errTasks: newRateLimitingQueue(), recorder: recorder, priorityClasses: make(map[string]*v1beta1.PriorityClass), } @@ -204,6 +209,9 @@ func (cc *Controller) Run(stopCh <-chan struct{}) { go cc.cache.Run(stopCh) + // Re-sync error tasks. + go wait.Until(cc.processResyncTask, 0, stopCh) + glog.Infof("JobController is running ...... ") } diff --git a/pkg/controllers/job/job_controller_actions.go b/pkg/controllers/job/job_controller_actions.go index 9aa1fbac6b..994b4e337c 100644 --- a/pkg/controllers/job/job_controller_actions.go +++ b/pkg/controllers/job/job_controller_actions.go @@ -72,6 +72,7 @@ func (cc *Controller) killJob(jobInfo *apis.JobInfo, podRetainPhase state.PhaseM } // record the err, and then collect the pod info like retained pod errs = append(errs, err) + cc.resyncTask(pod) } switch pod.Status.Phase { @@ -271,6 +272,11 @@ func (cc *Controller) syncJob(jobInfo *apis.JobInfo, updateStatus state.UpdateSt pod.Name, job.Name, err) creationErrs = append(creationErrs, err) } else { + if err != nil && apierrors.IsAlreadyExists(err) { + cc.resyncTask(pod) + } + + // TODO: maybe not pending status, maybe unknown. pending++ glog.V(3).Infof("Created Task <%s> of Job <%s/%s>", pod.Name, job.Namespace, job.Name) @@ -298,6 +304,7 @@ func (cc *Controller) syncJob(jobInfo *apis.JobInfo, updateStatus state.UpdateSt glog.Errorf("Failed to delete pod %s for Job %s, err %#v", pod.Name, job.Name, err) deletionErrs = append(deletionErrs, err) + cc.resyncTask(pod) } else { glog.V(3).Infof("Deleted Task <%s> of Job <%s/%s>", pod.Name, job.Namespace, job.Name) diff --git a/pkg/controllers/job/job_controller_resync.go b/pkg/controllers/job/job_controller_resync.go new file mode 100644 index 0000000000..ed124849bf --- /dev/null +++ b/pkg/controllers/job/job_controller_resync.go @@ -0,0 +1,84 @@ +/* +Copyright 2019 The Volcano Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package job + +import ( + "fmt" + "time" + + "golang.org/x/time/rate" + + "github.com/golang/glog" + + "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/util/workqueue" +) + +func newRateLimitingQueue() workqueue.RateLimitingInterface { + return workqueue.NewRateLimitingQueue(workqueue.NewMaxOfRateLimiter( + workqueue.NewItemExponentialFailureRateLimiter(5*time.Millisecond, 180*time.Second), + // 10 qps, 100 bucket size. This is only for retry speed and its only the overall factor (not per item) + &workqueue.BucketRateLimiter{Limiter: rate.NewLimiter(rate.Limit(10), 100)}, + )) +} + +func (cc *Controller) processResyncTask() { + obj, shutdown := cc.errTasks.Get() + if shutdown { + return + } + + defer cc.errTasks.Done(obj) + + task, ok := obj.(*v1.Pod) + if !ok { + glog.Errorf("failed to convert %v to *v1.Pod", obj) + return + } + + if err := cc.syncTask(task); err != nil { + glog.Errorf("Failed to sync pod <%v/%v>, retry it.", task.Namespace, task.Name) + cc.resyncTask(task) + } +} + +func (cc *Controller) syncTask(oldTask *v1.Pod) error { + cc.Mutex.Lock() + defer cc.Mutex.Unlock() + + newPod, err := cc.kubeClients.CoreV1().Pods(oldTask.Namespace).Get(oldTask.Name, metav1.GetOptions{}) + if err != nil { + if errors.IsNotFound(err) { + if err := cc.cache.DeletePod(oldTask); err != nil { + glog.Errorf("failed to delete cache pod <%v/%v>, err %v.", oldTask.Namespace, oldTask.Name, err) + return err + } + glog.V(3).Infof("Pod <%v/%v> was deleted, removed from cache.", oldTask.Namespace, oldTask.Name) + + return nil + } + return fmt.Errorf("failed to get Pod <%v/%v>: err %v", oldTask.Namespace, oldTask.Name, err) + } + + return cc.cache.UpdatePod(newPod) +} + +func (cc *Controller) resyncTask(task *v1.Pod) { + cc.errTasks.AddRateLimited(task) +} From 56fa5f266259f9c6725f695cd068dfeb97ee146b Mon Sep 17 00:00:00 2001 From: wangyuqing4 Date: Mon, 3 Jun 2019 19:16:42 +0800 Subject: [PATCH 19/26] fix pc panic --- pkg/controllers/job/job_controller_actions.go | 3 +++ pkg/controllers/job/job_controller_handler.go | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/pkg/controllers/job/job_controller_actions.go b/pkg/controllers/job/job_controller_actions.go index 994b4e337c..773456eb0d 100644 --- a/pkg/controllers/job/job_controller_actions.go +++ b/pkg/controllers/job/job_controller_actions.go @@ -491,6 +491,9 @@ func (cc *Controller) deleteJobPod(jobName string, pod *v1.Pod) error { } func (cc *Controller) calcPGMinResources(job *vkv1.Job) *v1.ResourceList { + cc.Mutex.Lock() + defer cc.Mutex.Unlock() + // sort task by priorityClasses var tasksPriority TasksPriority for index := range job.Spec.Tasks { diff --git a/pkg/controllers/job/job_controller_handler.go b/pkg/controllers/job/job_controller_handler.go index 8df166507a..0f521ee9aa 100644 --- a/pkg/controllers/job/job_controller_handler.go +++ b/pkg/controllers/job/job_controller_handler.go @@ -394,6 +394,9 @@ func (cc *Controller) addPriorityClass(obj interface{}) { return } + cc.Mutex.Lock() + defer cc.Mutex.Unlock() + cc.priorityClasses[pc.Name] = pc return } @@ -404,6 +407,9 @@ func (cc *Controller) deletePriorityClass(obj interface{}) { return } + cc.Mutex.Lock() + defer cc.Mutex.Unlock() + delete(cc.priorityClasses, pc.Name) return } From 78b194c30a389518351d811e8f888fc232e79c12 Mon Sep 17 00:00:00 2001 From: wangyuqing4 Date: Wed, 12 Jun 2019 16:43:47 +0800 Subject: [PATCH 20/26] optimize resyncTask --- pkg/controllers/job/job_controller_resync.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pkg/controllers/job/job_controller_resync.go b/pkg/controllers/job/job_controller_resync.go index ed124849bf..80fb8f4546 100644 --- a/pkg/controllers/job/job_controller_resync.go +++ b/pkg/controllers/job/job_controller_resync.go @@ -44,6 +44,12 @@ func (cc *Controller) processResyncTask() { return } + // one task only resync 10 times + if cc.errTasks.NumRequeues(obj) > 10 { + cc.errTasks.Forget(obj) + return + } + defer cc.errTasks.Done(obj) task, ok := obj.(*v1.Pod) @@ -53,7 +59,7 @@ func (cc *Controller) processResyncTask() { } if err := cc.syncTask(task); err != nil { - glog.Errorf("Failed to sync pod <%v/%v>, retry it.", task.Namespace, task.Name) + glog.Errorf("Failed to sync pod <%v/%v>, retry it, err %v", task.Namespace, task.Name, err) cc.resyncTask(task) } } From cff2aee47bc72718972f49e9d855790677cb7786 Mon Sep 17 00:00:00 2001 From: Rajadeepan D Ramesh Date: Wed, 26 Jun 2019 19:18:58 +0530 Subject: [PATCH 21/26] Adding UT for cli job package --- pkg/cli/job/delete_test.go | 65 +++++++++++++++++++++++++++++++ pkg/cli/job/list_test.go | 64 +++++++++++++++++++++++++++++++ pkg/cli/job/resume_test.go | 76 +++++++++++++++++++++++++++++++++++++ pkg/cli/job/run_test.go | 63 ++++++++++++++++++++++++++++++ pkg/cli/job/suspend_test.go | 76 +++++++++++++++++++++++++++++++++++++ pkg/cli/job/view_test.go | 65 +++++++++++++++++++++++++++++++ 6 files changed, 409 insertions(+) create mode 100644 pkg/cli/job/delete_test.go create mode 100644 pkg/cli/job/list_test.go create mode 100644 pkg/cli/job/resume_test.go create mode 100644 pkg/cli/job/run_test.go create mode 100644 pkg/cli/job/suspend_test.go create mode 100644 pkg/cli/job/view_test.go diff --git a/pkg/cli/job/delete_test.go b/pkg/cli/job/delete_test.go new file mode 100644 index 0000000000..03b98e112c --- /dev/null +++ b/pkg/cli/job/delete_test.go @@ -0,0 +1,65 @@ +/* +Copyright 2019 The Volcano Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package job + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + v1alpha1 "volcano.sh/volcano/pkg/apis/batch/v1alpha1" +) + +func TestDeleteJobJob(t *testing.T) { + response := v1alpha1.Job{} + response.Name = "testJob" + + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + val, err := json.Marshal(response) + if err == nil { + w.Write(val) + } + + }) + + server := httptest.NewServer(handler) + defer server.Close() + + deleteJobFlags.Master = server.URL + deleteJobFlags.Namespace = "test" + deleteJobFlags.JobName = "testJob" + + testCases := []struct { + Name string + ExpectValue error + }{ + { + Name: "DeleteJob", + ExpectValue: nil, + }, + } + + for i, testcase := range testCases { + err := DeleteJob() + if err != nil { + t.Errorf("case %d (%s): expected: %v, got %v ", i, testcase.Name, testcase.ExpectValue, err) + } + } + +} diff --git a/pkg/cli/job/list_test.go b/pkg/cli/job/list_test.go new file mode 100644 index 0000000000..c2086e285f --- /dev/null +++ b/pkg/cli/job/list_test.go @@ -0,0 +1,64 @@ +/* +Copyright 2019 The Volcano Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package job + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + v1alpha1 "volcano.sh/volcano/pkg/apis/batch/v1alpha1" +) + +func TestListJob(t *testing.T) { + response := v1alpha1.JobList{} + response.Items = append(response.Items, v1alpha1.Job{}) + + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + val, err := json.Marshal(response) + if err == nil { + w.Write(val) + } + + }) + + server := httptest.NewServer(handler) + defer server.Close() + + listJobFlags.Master = server.URL + listJobFlags.Namespace = "test" + + testCases := []struct { + Name string + ExpectValue error + }{ + { + Name: "ListJob", + ExpectValue: nil, + }, + } + + for i, testcase := range testCases { + err := ListJobs() + if err != nil { + t.Errorf("case %d (%s): expected: %v, got %v ", i, testcase.Name, testcase.ExpectValue, err) + } + } + +} diff --git a/pkg/cli/job/resume_test.go b/pkg/cli/job/resume_test.go new file mode 100644 index 0000000000..1b81621e29 --- /dev/null +++ b/pkg/cli/job/resume_test.go @@ -0,0 +1,76 @@ +/* +Copyright 2019 The Volcano Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package job + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + + v1alpha1batch "volcano.sh/volcano/pkg/apis/batch/v1alpha1" + v1alpha1 "volcano.sh/volcano/pkg/apis/bus/v1alpha1" +) + +func TestResumeJob(t *testing.T) { + responsecommand := v1alpha1.Command{} + responsejob := v1alpha1batch.Job{} + + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if strings.HasSuffix(r.URL.Path, "command") { + w.Header().Set("Content-Type", "application/json") + val, err := json.Marshal(responsecommand) + if err == nil { + w.Write(val) + } + + } else { + w.Header().Set("Content-Type", "application/json") + val, err := json.Marshal(responsejob) + if err == nil { + w.Write(val) + } + + } + }) + + server := httptest.NewServer(handler) + defer server.Close() + + resumeJobFlags.Master = server.URL + resumeJobFlags.Namespace = "test" + resumeJobFlags.JobName = "testjob" + + testCases := []struct { + Name string + ExpectValue error + }{ + { + Name: "ResumeJob", + ExpectValue: nil, + }, + } + + for i, testcase := range testCases { + err := ResumeJob() + if err != nil { + t.Errorf("case %d (%s): expected: %v, got %v ", i, testcase.Name, testcase.ExpectValue, err) + } + } + +} diff --git a/pkg/cli/job/run_test.go b/pkg/cli/job/run_test.go new file mode 100644 index 0000000000..8594d73482 --- /dev/null +++ b/pkg/cli/job/run_test.go @@ -0,0 +1,63 @@ +/* +Copyright 2019 The Volcano Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package job + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + v1alpha1 "volcano.sh/volcano/pkg/apis/batch/v1alpha1" +) + +func TestCreateJob(t *testing.T) { + response := v1alpha1.Job{} + + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + val, err := json.Marshal(response) + if err == nil { + w.Write(val) + } + + }) + + server := httptest.NewServer(handler) + defer server.Close() + + launchJobFlags.Master = server.URL + launchJobFlags.Namespace = "test" + + testCases := []struct { + Name string + ExpectValue error + }{ + { + Name: "CreateJob", + ExpectValue: nil, + }, + } + + for i, testcase := range testCases { + err := RunJob() + if err != nil { + t.Errorf("case %d (%s): expected: %v, got %v ", i, testcase.Name, testcase.ExpectValue, err) + } + } + +} diff --git a/pkg/cli/job/suspend_test.go b/pkg/cli/job/suspend_test.go new file mode 100644 index 0000000000..b25daac72a --- /dev/null +++ b/pkg/cli/job/suspend_test.go @@ -0,0 +1,76 @@ +/* +Copyright 2019 The Volcano Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package job + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + + v1alpha1batch "volcano.sh/volcano/pkg/apis/batch/v1alpha1" + v1alpha1 "volcano.sh/volcano/pkg/apis/bus/v1alpha1" +) + +func TestSuspendJobJob(t *testing.T) { + responsecommand := v1alpha1.Command{} + responsejob := v1alpha1batch.Job{} + + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if strings.HasSuffix(r.URL.Path, "command") { + w.Header().Set("Content-Type", "application/json") + val, err := json.Marshal(responsecommand) + if err == nil { + w.Write(val) + } + + } else { + w.Header().Set("Content-Type", "application/json") + val, err := json.Marshal(responsejob) + if err == nil { + w.Write(val) + } + + } + }) + + server := httptest.NewServer(handler) + defer server.Close() + + suspendJobFlags.Master = server.URL + suspendJobFlags.Namespace = "test" + suspendJobFlags.JobName = "testjob" + + testCases := []struct { + Name string + ExpectValue error + }{ + { + Name: "SuspendJob", + ExpectValue: nil, + }, + } + + for i, testcase := range testCases { + err := SuspendJob() + if err != nil { + t.Errorf("case %d (%s): expected: %v, got %v ", i, testcase.Name, testcase.ExpectValue, err) + } + } + +} diff --git a/pkg/cli/job/view_test.go b/pkg/cli/job/view_test.go new file mode 100644 index 0000000000..9d279b8cb3 --- /dev/null +++ b/pkg/cli/job/view_test.go @@ -0,0 +1,65 @@ +/* +Copyright 2019 The Volcano Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package job + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + v1alpha1 "volcano.sh/volcano/pkg/apis/batch/v1alpha1" +) + +func TestViewJob(t *testing.T) { + response := v1alpha1.Job{} + response.Name = "testJob" + + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + val, err := json.Marshal(response) + if err == nil { + w.Write(val) + } + + }) + + server := httptest.NewServer(handler) + defer server.Close() + + viewJobFlags.Master = server.URL + viewJobFlags.Namespace = "test" + viewJobFlags.JobName = "testJob" + + testCases := []struct { + Name string + ExpectValue error + }{ + { + Name: "viewJob", + ExpectValue: nil, + }, + } + + for i, testcase := range testCases { + err := ViewJob() + if err != nil { + t.Errorf("case %d (%s): expected: %v, got %v ", i, testcase.Name, testcase.ExpectValue, err) + } + } + +} From f7b4c8e125735c34d88dea4c95ffc46fa84bdaab Mon Sep 17 00:00:00 2001 From: Klaus Ma Date: Thu, 27 Jun 2019 14:23:50 +0800 Subject: [PATCH 22/26] KubeCon 2019 China Demo. Signed-off-by: Klaus Ma --- .../kubecon-2019-china/drf/nginx-1.yaml | 26 +++++ .../kubecon-2019-china/drf/nginx-2.yaml | 26 +++++ .../samples/kubecon-2019-china/drf/nginx.yaml | 25 ++++ .../kubecon-2019-china/gang/mpi-example.yaml | 77 +++++++++++++ .../kubecon-2019-china/gang/nginx.yaml | 25 ++++ .../lm-horovod-tf-mnist-v0.5.yaml | 73 ++++++++++++ .../mpi-sample/mpi-example.yaml | 81 +++++++++++++ .../node-selector/nginx.yaml | 28 +++++ .../kubecon-2019-china/scripts/node-info.go | 93 +++++++++++++++ .../kubecon-2019-china/scripts/watch.sh | 41 +++++++ .../spark-sample/remove-all.sh | 4 + .../spark-sample/spark-pi.yaml | 69 +++++++++++ .../task-priority/elastic-job.yaml | 38 +++++++ .../task-priority/high-priority.yaml | 5 + .../task-priority/nginx.yaml | 25 ++++ .../task-priority/normal-priority.yaml | 5 + .../tf-sample/tf-example.yaml | 107 ++++++++++++++++++ 17 files changed, 748 insertions(+) create mode 100644 docs/samples/kubecon-2019-china/drf/nginx-1.yaml create mode 100644 docs/samples/kubecon-2019-china/drf/nginx-2.yaml create mode 100644 docs/samples/kubecon-2019-china/drf/nginx.yaml create mode 100644 docs/samples/kubecon-2019-china/gang/mpi-example.yaml create mode 100644 docs/samples/kubecon-2019-china/gang/nginx.yaml create mode 100644 docs/samples/kubecon-2019-china/horovod-sample/lm-horovod-tf-mnist-v0.5.yaml create mode 100644 docs/samples/kubecon-2019-china/mpi-sample/mpi-example.yaml create mode 100644 docs/samples/kubecon-2019-china/node-selector/nginx.yaml create mode 100644 docs/samples/kubecon-2019-china/scripts/node-info.go create mode 100644 docs/samples/kubecon-2019-china/scripts/watch.sh create mode 100644 docs/samples/kubecon-2019-china/spark-sample/remove-all.sh create mode 100644 docs/samples/kubecon-2019-china/spark-sample/spark-pi.yaml create mode 100644 docs/samples/kubecon-2019-china/task-priority/elastic-job.yaml create mode 100644 docs/samples/kubecon-2019-china/task-priority/high-priority.yaml create mode 100644 docs/samples/kubecon-2019-china/task-priority/nginx.yaml create mode 100644 docs/samples/kubecon-2019-china/task-priority/normal-priority.yaml create mode 100644 docs/samples/kubecon-2019-china/tf-sample/tf-example.yaml diff --git a/docs/samples/kubecon-2019-china/drf/nginx-1.yaml b/docs/samples/kubecon-2019-china/drf/nginx-1.yaml new file mode 100644 index 0000000000..b5ef601d53 --- /dev/null +++ b/docs/samples/kubecon-2019-china/drf/nginx-1.yaml @@ -0,0 +1,26 @@ +apiVersion: apps/v1 +kind: ReplicaSet +metadata: + name: nginx-1 + labels: + app: nginx-1 +spec: + # modify replicas according to your case + replicas: 8 + selector: + matchLabels: + app: nginx-1 + template: + metadata: + labels: + app: nginx-1 + spec: + schedulerName: volcano + containers: + - name: nginx-1 + image: nginx + resources: + requests: + cpu: "1000m" + limits: + cpu: "1000m" diff --git a/docs/samples/kubecon-2019-china/drf/nginx-2.yaml b/docs/samples/kubecon-2019-china/drf/nginx-2.yaml new file mode 100644 index 0000000000..5ed48f7bce --- /dev/null +++ b/docs/samples/kubecon-2019-china/drf/nginx-2.yaml @@ -0,0 +1,26 @@ +apiVersion: apps/v1 +kind: ReplicaSet +metadata: + name: nginx-2 + labels: + app: nginx-2 +spec: + # modify replicas according to your case + replicas: 8 + selector: + matchLabels: + app: nginx-2 + template: + metadata: + labels: + app: nginx-2 + spec: + schedulerName: volcano + containers: + - name: nginx-2 + image: nginx + resources: + requests: + cpu: "1000m" + limits: + cpu: "1000m" diff --git a/docs/samples/kubecon-2019-china/drf/nginx.yaml b/docs/samples/kubecon-2019-china/drf/nginx.yaml new file mode 100644 index 0000000000..762476b498 --- /dev/null +++ b/docs/samples/kubecon-2019-china/drf/nginx.yaml @@ -0,0 +1,25 @@ +apiVersion: apps/v1 +kind: ReplicaSet +metadata: + name: nginx + labels: + app: nginx +spec: + # modify replicas according to your case + replicas: 8 + selector: + matchLabels: + app: nginx + template: + metadata: + labels: + app: nginx + spec: + containers: + - name: nginx + image: nginx + resources: + requests: + cpu: "1000m" + limits: + cpu: "1000m" diff --git a/docs/samples/kubecon-2019-china/gang/mpi-example.yaml b/docs/samples/kubecon-2019-china/gang/mpi-example.yaml new file mode 100644 index 0000000000..d5b94d9aa4 --- /dev/null +++ b/docs/samples/kubecon-2019-china/gang/mpi-example.yaml @@ -0,0 +1,77 @@ +apiVersion: batch.volcano.sh/v1alpha1 +kind: Job +metadata: + name: lm-mpi-job + labels: + # 根据业务需要设置作业类型 + "volcano.sh/job-type": "MPI" +spec: + # 设置最小需要的服务 (小于总replicas数) + minAvailable: 4 + schedulerName: volcano + plugins: + # 提供 ssh 免密认证 + ssh: [] + # 提供运行作业所需要的网络信息,hosts文件,headless service等 + svc: [] + # 如果有pod被 杀死,重启整个作业 + policies: + - event: PodEvicted + action: RestartJob + tasks: + - replicas: 1 + name: mpimaster + # 当 mpiexec 结束,认识整个mpi作业结束 + policies: + - event: TaskCompleted + action: CompleteJob + template: + spec: + # Volcano 的信息会统一放到 /etc/volcano 目录下 + containers: + - command: + - /bin/sh + - -c + - | + MPI_HOST=`cat /etc/volcano/mpiworker.host | tr "\n" ","`; + mkdir -p /var/run/sshd; /usr/sbin/sshd; + mpiexec --allow-run-as-root --host ${MPI_HOST} -np 3 mpi_hello_world; + image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/example-mpi:0.0.1 + name: mpimaster + ports: + - containerPort: 22 + name: mpijob-port + workingDir: /home + resources: + requests: + cpu: "500m" + limits: + cpu: "500m" + restartPolicy: OnFailure + imagePullSecrets: + - name: default-secret + - replicas: 3 + name: mpiworker + template: + spec: + containers: + - command: + - /bin/sh + - -c + - | + mkdir -p /var/run/sshd; /usr/sbin/sshd -D; + image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/example-mpi:0.0.1 + name: mpiworker + ports: + - containerPort: 22 + name: mpijob-port + workingDir: /home + resources: + requests: + cpu: "1000m" + limits: + cpu: "1000m" + restartPolicy: OnFailure + imagePullSecrets: + - name: default-secret + diff --git a/docs/samples/kubecon-2019-china/gang/nginx.yaml b/docs/samples/kubecon-2019-china/gang/nginx.yaml new file mode 100644 index 0000000000..6c95274a50 --- /dev/null +++ b/docs/samples/kubecon-2019-china/gang/nginx.yaml @@ -0,0 +1,25 @@ +apiVersion: apps/v1 +kind: ReplicaSet +metadata: + name: nginx + labels: + app: nginx +spec: + # modify replicas according to your case + replicas: 6 + selector: + matchLabels: + app: nginx + template: + metadata: + labels: + app: nginx + spec: + containers: + - name: nginx + image: nginx + resources: + requests: + cpu: "1000m" + limits: + cpu: "1000m" diff --git a/docs/samples/kubecon-2019-china/horovod-sample/lm-horovod-tf-mnist-v0.5.yaml b/docs/samples/kubecon-2019-china/horovod-sample/lm-horovod-tf-mnist-v0.5.yaml new file mode 100644 index 0000000000..09df4b8698 --- /dev/null +++ b/docs/samples/kubecon-2019-china/horovod-sample/lm-horovod-tf-mnist-v0.5.yaml @@ -0,0 +1,73 @@ +apiVersion: batch.volcano.sh/v1alpha1 +kind: Job +metadata: + name: lm-horovod-job + labels: + "volcano.sh/job-type": Horovod +spec: + minAvailable: 4 + schedulerName: volcano + plugins: + ssh: [] + svc: [] + # 如果有pod被 杀死,重启整个作业 + policies: + - event: PodEvicted + action: RestartJob + tasks: + - replicas: 1 + name: master + policies: + - event: TaskCompleted + action: CompleteJob + template: + spec: + containers: + - command: + - /bin/sh + - -c + - | + WORKER_HOST=`cat /etc/volcano/worker.host | tr "\n" ","`; + mkdir -p /var/run/sshd; /usr/sbin/sshd; + mpiexec --allow-run-as-root --host ${WORKER_HOST} -np 3 python tensorflow_mnist_lm.py; + image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/horovod-tf-mnist:0.5 + name: master + ports: + - containerPort: 22 + name: job-port + resources: + requests: + cpu: "500m" + memory: "1024Mi" + limits: + cpu: "500m" + memory: "1024Mi" + restartPolicy: OnFailure + imagePullSecrets: + - name: default-secret + - replicas: 3 + name: worker + template: + spec: + containers: + - command: + - /bin/sh + - -c + - | + mkdir -p /var/run/sshd; /usr/sbin/sshd -D; + image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/horovod-tf-mnist:0.5 + name: worker + ports: + - containerPort: 22 + name: job-port + resources: + requests: + cpu: "1000m" + memory: "2048Mi" + limits: + cpu: "1000m" + memory: "2048Mi" + restartPolicy: OnFailure + imagePullSecrets: + - name: default-secret +--- diff --git a/docs/samples/kubecon-2019-china/mpi-sample/mpi-example.yaml b/docs/samples/kubecon-2019-china/mpi-sample/mpi-example.yaml new file mode 100644 index 0000000000..de5cfc0826 --- /dev/null +++ b/docs/samples/kubecon-2019-china/mpi-sample/mpi-example.yaml @@ -0,0 +1,81 @@ +apiVersion: batch.volcano.sh/v1alpha1 +kind: Job +metadata: + name: lm-mpi-job + labels: + # 根据业务需要设置作业类型 + "volcano.sh/job-type": "MPI" +spec: + # 设置最小需要的服务 (小于总replicas数) + minAvailable: 3 + schedulerName: volcano + plugins: + # 提供 ssh 免密认证 + ssh: [] + # 提供运行作业所需要的网络信息,hosts文件,headless service等 + svc: [] + # 如果有pod被 杀死,重启整个作业 + policies: + - event: PodEvicted + action: RestartJob + tasks: + - replicas: 1 + name: mpimaster + # 当 mpiexec 结束,认识整个mpi作业结束 + policies: + - event: TaskCompleted + action: CompleteJob + template: + spec: + # Volcano 的信息会统一放到 /etc/volcano 目录下 + containers: + - command: + - /bin/sh + - -c + - | + MPI_HOST=`cat /etc/volcano/mpiworker.host | tr "\n" ","`; + mkdir -p /var/run/sshd; /usr/sbin/sshd; + mpiexec --allow-run-as-root --host ${MPI_HOST} -np 2 mpi_hello_world; + image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/example-mpi:0.0.1 + name: mpimaster + ports: + - containerPort: 22 + name: mpijob-port + workingDir: /home + resources: + requests: + cpu: "500m" + memory: "1024Mi" + limits: + cpu: "500m" + memory: "1024Mi" + restartPolicy: OnFailure + imagePullSecrets: + - name: default-secret + - replicas: 2 + name: mpiworker + template: + spec: + containers: + - command: + - /bin/sh + - -c + - | + mkdir -p /var/run/sshd; /usr/sbin/sshd -D; + image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/example-mpi:0.0.1 + name: mpiworker + ports: + - containerPort: 22 + name: mpijob-port + workingDir: /home + resources: + requests: + cpu: "1024m" + memory: "2048Mi" + limits: + cpu: "1024m" + memory: "2048Mi" + restartPolicy: OnFailure + imagePullSecrets: + - name: default-secret + diff --git a/docs/samples/kubecon-2019-china/node-selector/nginx.yaml b/docs/samples/kubecon-2019-china/node-selector/nginx.yaml new file mode 100644 index 0000000000..82501f8a2b --- /dev/null +++ b/docs/samples/kubecon-2019-china/node-selector/nginx.yaml @@ -0,0 +1,28 @@ +apiVersion: apps/v1 +kind: ReplicaSet +metadata: + name: nginx + labels: + app: nginx +spec: + replicas: 8 + selector: + matchLabels: + app: nginx + template: + metadata: + labels: + app: nginx + spec: + schedulerName: volcano + # Volcano Scheduler already pass conformance test!!! + nodeSelector: + "kubernetes.io/hostname": "192.168.23.24" + containers: + - name: nginx + image: nginx + resources: + requests: + cpu: "1000m" + limits: + cpu: "1000m" diff --git a/docs/samples/kubecon-2019-china/scripts/node-info.go b/docs/samples/kubecon-2019-china/scripts/node-info.go new file mode 100644 index 0000000000..98268064b5 --- /dev/null +++ b/docs/samples/kubecon-2019-china/scripts/node-info.go @@ -0,0 +1,93 @@ +package main + +import ( + "fmt" + v1 "k8s.io/api/core/v1" + + "github.com/kubernetes-sigs/kube-batch/pkg/scheduler/api" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" +) + +func main() { + config, err := clientcmd.BuildConfigFromFlags("", "C:/Users/m00483107/.kube/config") + if err != nil { + panic(err) + } + kubeClient := kubernetes.NewForConfigOrDie(config) + nodes, err := kubeClient.CoreV1().Nodes().List(metav1.ListOptions{}) + if err != nil { + panic(err) + } + pods, err := kubeClient.CoreV1().Pods("").List(metav1.ListOptions{}) + if err != nil { + panic(err) + } + + fmt.Print("\n--------------------------------------------------------------------------------------------------------------\n") + fmt.Printf(" %-13s |", " ") + for _, n := range nodes.Items { + fmt.Printf(" %-20s |", n.Name) + } + fmt.Print("\n--------------------------------------------------------------------------------------------------------------\n") + fmt.Printf(" %-13s |", "Alloctable") + for _, n := range nodes.Items { + res := fmt.Sprintf("cpu: %s", n.Status.Allocatable.Cpu()) + fmt.Printf(" %-20s |", res) + } + fmt.Println() + fmt.Printf(" %-13s |", " ") + for _, n := range nodes.Items { + res := fmt.Sprintf("mem: %s", n.Status.Allocatable.Memory()) + fmt.Printf(" %-20s |", res) + } + fmt.Print("\n--------------------------------------------------------------------------------------------------------------\n") + + podMap := map[string]*api.Resource{} + + for _, p := range pods.Items { + nodeName := p.Spec.NodeName + // Only account running pods here. + if p.Status.Phase == v1.PodSucceeded || p.Status.Phase == v1.PodFailed { + continue + } + if len(nodeName) == 0 { + continue + } + if _, found := podMap[nodeName]; !found { + podMap[nodeName] = api.EmptyResource() + } + res := api.GetPodResourceWithoutInitContainers(&p) + podMap[nodeName].Add(res) + } + + fmt.Printf(" %-13s |", "Idle") + for _, n := range nodes.Items { + allocate := n.Status.Allocatable.DeepCopy() + c := allocate.Cpu() + if r, found := podMap[n.Name]; found { + cpu := c.MilliValue() - int64(r.MilliCPU) + c.SetMilli(cpu) + } + + res := fmt.Sprintf("cpu: %s", c) + fmt.Printf(" %-20s |", res) + } + fmt.Println() + fmt.Printf(" %-13s |", " ") + for _, n := range nodes.Items { + allocate := n.Status.Allocatable.DeepCopy() + c := allocate.Memory() + if r, found := podMap[n.Name]; found { + cpu := c.Value() - int64(r.Memory) + c.Set(cpu) + } + + res := fmt.Sprintf("mem: %s", c) + fmt.Printf(" %-20s |", res) + } + fmt.Print("\n--------------------------------------------------------------------------------------------------------------\n") + +} diff --git a/docs/samples/kubecon-2019-china/scripts/watch.sh b/docs/samples/kubecon-2019-china/scripts/watch.sh new file mode 100644 index 0000000000..4e22321e00 --- /dev/null +++ b/docs/samples/kubecon-2019-china/scripts/watch.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +watch_cluster() { + echo "Nodes:" + node-info.exe + + echo "" + echo "" + + echo "Pods:" + echo "-------------------------------" + kubectl get pods + + echo "" + echo "" + + echo "Volcano Jobs:" + echo "-------------------------------" + vkctl job list +} + +if [ $# == 0 ]; then + watch_cluster +else + + while getopts "f" arg + do + case $arg in + "f") + while [ 1 ] + do + clear + watch_cluster + sleep 3 + done + ;; + ?) + echo "Unknown arguments" + esac + done +fi \ No newline at end of file diff --git a/docs/samples/kubecon-2019-china/spark-sample/remove-all.sh b/docs/samples/kubecon-2019-china/spark-sample/remove-all.sh new file mode 100644 index 0000000000..6fb056bf27 --- /dev/null +++ b/docs/samples/kubecon-2019-china/spark-sample/remove-all.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +kubectl delete podgroups --all +kubectl delete sparkapp --all diff --git a/docs/samples/kubecon-2019-china/spark-sample/spark-pi.yaml b/docs/samples/kubecon-2019-china/spark-sample/spark-pi.yaml new file mode 100644 index 0000000000..19c66e49e3 --- /dev/null +++ b/docs/samples/kubecon-2019-china/spark-sample/spark-pi.yaml @@ -0,0 +1,69 @@ +# +# Copyright 2017 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: "sparkoperator.k8s.io/v1beta1" +kind: SparkApplication +metadata: + name: spark-pi + namespace: default +spec: + type: Scala + mode: cluster + image: "swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/spark:v2.4.0" + imagePullPolicy: Always + mainClass: org.apache.spark.examples.SparkPi + mainApplicationFile: "local:///opt/spark/examples/jars/spark-examples_2.11-2.4.0.jar" + sparkVersion: "2.4.0" + imagePullSecrets: + - default-secret + restartPolicy: + type: Never + volumes: + - name: "test-volume" + hostPath: + path: "/tmp" + type: Directory + driver: + schedulerName: volcano + cores: 0.1 + coreLimit: "200m" + memory: "512m" + labels: + version: 2.4.0 + serviceAccount: spark-operator-sparkoperator + volumeMounts: + - name: "test-volume" + mountPath: "/tmp" + executor: + annotations: + scheduling.k8s.io/group-name: spark-pi-group + schedulerName: volcano + cores: 1 + instances: 15 + memory: "512m" + labels: + version: 2.4.0 + volumeMounts: + - name: "test-volume" + mountPath: "/tmp" +--- +apiVersion: scheduling.incubator.k8s.io/v1alpha1 +kind: PodGroup +metadata: + name: spark-pi-group +spec: + minMember: 3 + + diff --git a/docs/samples/kubecon-2019-china/task-priority/elastic-job.yaml b/docs/samples/kubecon-2019-china/task-priority/elastic-job.yaml new file mode 100644 index 0000000000..0a9d19b519 --- /dev/null +++ b/docs/samples/kubecon-2019-china/task-priority/elastic-job.yaml @@ -0,0 +1,38 @@ +apiVersion: batch.volcano.sh/v1alpha1 +kind: Job +metadata: + name: elastic-job + labels: + # 根据业务需要设置作业类型 + "volcano.sh/job-type": "ElasticJob" +spec: + minAvailable: 3 + schedulerName: volcano + tasks: + - replicas: 9 + name: worker + template: + spec: + priorityClass: normal-pri + containers: + - image: nginx + name: workers + resources: + requests: + cpu: "1000m" + limits: + cpu: "1000m" + - replicas: 1 + name: master + template: + spec: + priorityClass: high-pri + containers: + - image: nginx + name: master + resources: + requests: + cpu: "1000m" + limits: + cpu: "1000m" + diff --git a/docs/samples/kubecon-2019-china/task-priority/high-priority.yaml b/docs/samples/kubecon-2019-china/task-priority/high-priority.yaml new file mode 100644 index 0000000000..9b9d82282d --- /dev/null +++ b/docs/samples/kubecon-2019-china/task-priority/high-priority.yaml @@ -0,0 +1,5 @@ +apiVersion: scheduling.k8s.io/v1beta1 +kind: PriorityClass +metadata: + name: high-pri +value: 1000 diff --git a/docs/samples/kubecon-2019-china/task-priority/nginx.yaml b/docs/samples/kubecon-2019-china/task-priority/nginx.yaml new file mode 100644 index 0000000000..49c9fdb23a --- /dev/null +++ b/docs/samples/kubecon-2019-china/task-priority/nginx.yaml @@ -0,0 +1,25 @@ +apiVersion: apps/v1 +kind: ReplicaSet +metadata: + name: nginx + labels: + app: nginx +spec: + # modify replicas according to your case + replicas: 4 + selector: + matchLabels: + app: nginx + template: + metadata: + labels: + app: nginx + spec: + containers: + - name: nginx + image: nginx + resources: + requests: + cpu: "1000m" + limits: + cpu: "1000m" diff --git a/docs/samples/kubecon-2019-china/task-priority/normal-priority.yaml b/docs/samples/kubecon-2019-china/task-priority/normal-priority.yaml new file mode 100644 index 0000000000..d581d5c168 --- /dev/null +++ b/docs/samples/kubecon-2019-china/task-priority/normal-priority.yaml @@ -0,0 +1,5 @@ +apiVersion: scheduling.k8s.io/v1beta1 +kind: PriorityClass +metadata: + name: high-pri +value: 10 diff --git a/docs/samples/kubecon-2019-china/tf-sample/tf-example.yaml b/docs/samples/kubecon-2019-china/tf-sample/tf-example.yaml new file mode 100644 index 0000000000..5c792a7a13 --- /dev/null +++ b/docs/samples/kubecon-2019-china/tf-sample/tf-example.yaml @@ -0,0 +1,107 @@ +################################################ +# # +# Demo for running TF tasks on Volcano # +# # +################################################ +# +# This yaml used to demonstrate how to running a TF task via Volcano Job, +# the running sample program is from TF benchmark +# (https://github.com/tensorflow/benchmarks/tree/master/scripts/tf_cnn_benchmarks) +# The equivalent command when running locally: +# +# python tf_cnn_benchmarks.py --num_gpus=1 --batch_size=32 --model=resnet50 --variable_update=parameter_server +# --local_parameter_device=cpu --device=cpu --data_format=NHWC +# +# The output from ps or worker pod can be used to identify whether the TF cluster +# has been correctly configured: +# +# (log from worker pod....) +# 2019-04-23 11:10:25.554248: I tensorflow/core/distributed_runtime/rpc/grpc_channel.cc:215] +# Initialize GrpcChannelCache for job ps -> {0 -> tensorflow-benchmark-ps-0.tensorflow-benchmark:2222} +# 2019-04-23 11:10:25.554308: I tensorflow/core/distributed_runtime/rpc/grpc_channel.cc:215] +# Initialize GrpcChannelCache for job worker -> {0 -> localhost:2222} +# +# (log from ps pod....) +# 2019-04-23 11:10:25.552827: I tensorflow/core/distributed_runtime/rpc/grpc_channel.cc:215] +# Initialize GrpcChannelCache for job ps -> {0 -> localhost:2222} +# 2019-04-23 11:10:25.552861: I tensorflow/core/distributed_runtime/rpc/grpc_channel.cc:215] +# Initialize GrpcChannelCache for job worker -> {0 -> tensorflow-benchmark-worker-0.tensorflow-benchmark:2222} +# +# **NOTES**: This example may take about an hour to finish. When running multiple jobs, please ensure enough resource +# is guaranteed for each of the worker pods. + +apiVersion: batch.volcano.sh/v1alpha1 +kind: Job +metadata: + name: tensorflow-benchmark + labels: + "volcano.sh/job-type": "Tensorflow" +spec: + minAvailable: 3 + schedulerName: volcano + plugins: + env: [] + svc: [] + policies: + - event: PodEvicted + action: RestartJob + tasks: + - replicas: 1 + name: ps + template: + spec: + imagePullSecrets: + - name: default-secret + containers: + - command: + - sh + - -c + - | + PS_HOST=`cat /etc/volcano/ps.host | sed 's/$/&:2222/g' | tr "\n" ","`; + WORKER_HOST=`cat /etc/volcano/worker.host | sed 's/$/&:2222/g' | tr "\n" ","`; + python tf_cnn_benchmarks.py --batch_size=32 --model=resnet50 --variable_update=parameter_server --flush_stdout=true --num_gpus=1 --local_parameter_device=cpu --device=cpu --data_format=NHWC --job_name=ps --task_index=${VK_TASK_INDEX} --ps_hosts=${PS_HOST} --worker_hosts=${WORKER_HOST} + image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/example-tf:0.0.1 + name: tensorflow + ports: + - containerPort: 2222 + name: tfjob-port + resources: + requests: + cpu: "1000m" + memory: "2048Mi" + limits: + cpu: "1000m" + memory: "2048Mi" + workingDir: /opt/tf-benchmarks/scripts/tf_cnn_benchmarks + restartPolicy: OnFailure + - replicas: 2 + name: worker + policies: + - event: TaskCompleted + action: CompleteJob + template: + spec: + imagePullSecrets: + - name: default-secret + containers: + - command: + - sh + - -c + - | + PS_HOST=`cat /etc/volcano/ps.host | sed 's/$/&:2222/g' | tr "\n" ","`; + WORKER_HOST=`cat /etc/volcano/worker.host | sed 's/$/&:2222/g' | tr "\n" ","`; + python tf_cnn_benchmarks.py --batch_size=32 --model=resnet50 --variable_update=parameter_server --flush_stdout=true --num_gpus=1 --local_parameter_device=cpu --device=cpu --data_format=NHWC --job_name=worker --task_index=${VK_TASK_INDEX} --ps_hosts=${PS_HOST} --worker_hosts=${WORKER_HOST} + image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/example-tf:0.0.1 + name: tensorflow + ports: + - containerPort: 2222 + name: tfjob-port + resources: + requests: + cpu: "2000m" + memory: "2048Mi" + limits: + cpu: "2000m" + memory: "4096Mi" + workingDir: /opt/tf-benchmarks/scripts/tf_cnn_benchmarks + restartPolicy: OnFailure From 1e5f4325aebe4f4f64845f9c60230e9a5d8dd5af Mon Sep 17 00:00:00 2001 From: wangyuqing4 Date: Mon, 3 Jun 2019 19:35:29 +0800 Subject: [PATCH 23/26] fix initialize job status --- pkg/apis/batch/v1alpha1/job.go | 2 ++ pkg/controllers/job/job_controller_actions.go | 27 +++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/pkg/apis/batch/v1alpha1/job.go b/pkg/apis/batch/v1alpha1/job.go index 7ddf67aac0..4f8a566a8a 100644 --- a/pkg/apis/batch/v1alpha1/job.go +++ b/pkg/apis/batch/v1alpha1/job.go @@ -114,6 +114,8 @@ const ( PodGroupError JobEvent = "PodGroupError" //ExecuteAction action issued event for each action ExecuteAction JobEvent = "ExecuteAction" + //JobStatusError is generated if update job status failed + JobStatusError JobEvent = "JobStatusError" ) // Event represent the phase of Job, e.g. pod-failed. diff --git a/pkg/controllers/job/job_controller_actions.go b/pkg/controllers/job/job_controller_actions.go index 773456eb0d..cc54ef2fa2 100644 --- a/pkg/controllers/job/job_controller_actions.go +++ b/pkg/controllers/job/job_controller_actions.go @@ -154,6 +154,12 @@ func (cc *Controller) createJob(jobInfo *apis.JobInfo, updateStatus state.Update job := jobInfo.Job.DeepCopy() glog.Infof("Current Version is: %d of job: %s/%s", job.Status.Version, job.Namespace, job.Name) + if err := cc.initJobStatus(job); err != nil { + cc.recorder.Event(job, v1.EventTypeWarning, string(vkv1.JobStatusError), + fmt.Sprintf("Failed to initialize job status, err: %v", err)) + return err + } + if err := cc.pluginOnJobAdd(job); err != nil { cc.recorder.Event(job, v1.EventTypeWarning, string(vkv1.PluginError), fmt.Sprintf("Execute plugin when job add failed, err: %v", err)) @@ -523,3 +529,24 @@ func (cc *Controller) calcPGMinResources(job *vkv1.Job) *v1.ResourceList { return &minAvailableTasksRes } + +func (cc *Controller) initJobStatus(job *vkv1.Job) error { + if job.Status.State.Phase != "" { + return nil + } + + job.Status.State.Phase = vkv1.Pending + if job, err := cc.vkClients.BatchV1alpha1().Jobs(job.Namespace).UpdateStatus(job); err != nil { + glog.Errorf("Failed to update status of Job %v/%v: %v", + job.Namespace, job.Name, err) + return err + } else { + if err := cc.cache.Update(job); err != nil { + glog.Errorf("CreateJob - Failed to update Job %v/%v in cache: %v", + job.Namespace, job.Name, err) + return err + } + } + + return nil +} From a5e70b829cfd01e88853f33ab232671ccf6f0b6c Mon Sep 17 00:00:00 2001 From: wangyuqing4 Date: Wed, 5 Jun 2019 10:55:14 +0800 Subject: [PATCH 24/26] fix Pending job minA --- pkg/controllers/job/job_controller_actions.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/controllers/job/job_controller_actions.go b/pkg/controllers/job/job_controller_actions.go index cc54ef2fa2..99a0426a2b 100644 --- a/pkg/controllers/job/job_controller_actions.go +++ b/pkg/controllers/job/job_controller_actions.go @@ -536,6 +536,7 @@ func (cc *Controller) initJobStatus(job *vkv1.Job) error { } job.Status.State.Phase = vkv1.Pending + job.Status.MinAvailable = int32(job.Spec.MinAvailable) if job, err := cc.vkClients.BatchV1alpha1().Jobs(job.Namespace).UpdateStatus(job); err != nil { glog.Errorf("Failed to update status of Job %v/%v: %v", job.Namespace, job.Name, err) From 6d4a3f710f736d90bc799105c0b0ad7f5c99a111 Mon Sep 17 00:00:00 2001 From: lminzhw Date: Fri, 28 Jun 2019 15:30:52 +0800 Subject: [PATCH 25/26] fix golint --- pkg/controllers/job/job_controller_actions.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pkg/controllers/job/job_controller_actions.go b/pkg/controllers/job/job_controller_actions.go index 99a0426a2b..77ed2047e1 100644 --- a/pkg/controllers/job/job_controller_actions.go +++ b/pkg/controllers/job/job_controller_actions.go @@ -537,16 +537,16 @@ func (cc *Controller) initJobStatus(job *vkv1.Job) error { job.Status.State.Phase = vkv1.Pending job.Status.MinAvailable = int32(job.Spec.MinAvailable) - if job, err := cc.vkClients.BatchV1alpha1().Jobs(job.Namespace).UpdateStatus(job); err != nil { + job, err := cc.vkClients.BatchV1alpha1().Jobs(job.Namespace).UpdateStatus(job) + if err != nil { glog.Errorf("Failed to update status of Job %v/%v: %v", job.Namespace, job.Name, err) return err - } else { - if err := cc.cache.Update(job); err != nil { - glog.Errorf("CreateJob - Failed to update Job %v/%v in cache: %v", - job.Namespace, job.Name, err) - return err - } + } + if err := cc.cache.Update(job); err != nil { + glog.Errorf("CreateJob - Failed to update Job %v/%v in cache: %v", + job.Namespace, job.Name, err) + return err } return nil From 525aa30c32174765acf09530719a616a5d4d0aea Mon Sep 17 00:00:00 2001 From: soolaugust Date: Fri, 28 Jun 2019 16:14:08 +0800 Subject: [PATCH 26/26] fix some words in README doc --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index fe38068b4e..04faae9fd3 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Volcano is a batch system built on Kubernetes. It provides a suite of mechanisms Kubernetes that are commonly required by many classes of batch & elastic workload including: 1. machine learning/deep learning, -2. bioinformatics/genomics, and +2. bioinformatics/genomics 3. other "big data" applications. These types of applications typically run on generalized domain @@ -68,7 +68,7 @@ Install Volcano using following command, ``` helm install volcano/volcano --namespace --name -For eg : +e.g : helm install volcano/volcano --namespace volcano-trial --name volcano-trial ``` @@ -112,12 +112,12 @@ Secondly, install helm chart. ``` helm install installer/helm/chart/volcano --namespace --name -For eg : +e.g : helm install installer/helm/chart/volcano --namespace volcano-trial --name volcano-trial ``` -To Verify your installation run the following commands: +To verify your installation run the following commands: ``` #1. Verify the Running Pods