Skip to content

Commit

Permalink
WIP debug migration
Browse files Browse the repository at this point in the history
  • Loading branch information
wongma7 committed Nov 21, 2020
1 parent 53469af commit 8595420
Show file tree
Hide file tree
Showing 4 changed files with 198 additions and 145 deletions.
1 change: 1 addition & 0 deletions hack/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ebs-e2e-test/
305 changes: 173 additions & 132 deletions hack/run-e2e-test
Original file line number Diff line number Diff line change
Expand Up @@ -16,151 +16,192 @@

set -euo pipefail

OS_ARCH=$(go env GOOS)-amd64
TEST_ID=$RANDOM
CLUSTER_NAME=test-cluster-$TEST_ID
TEST_DIR=/tmp/ebs-e2e-test
BASE_DIR=$(dirname $0)
REGION=${AWS_REGION-us-west-2}
ZONES=${AWS_AVAILABILITY_ZONES-us-west-2a,us-west-2b,us-west-2c}
FOCUS=${GINKGO_FOCUS-"[ebs-csi-e2e]"}
NODES=${GINKGO_NODES:-4}
K8S_VERSION=${K8S_VERSION-1.18.10}
INSTANCE_TYPE=${INSTANCE_TYPE-c4.large}

source $(dirname "${BASH_SOURCE}")/utils/helm.sh

echo "Testing in region: $REGION and zones: $ZONES"
TEST_ID=${TEST_ID:-$RANDOM}
CLUSTER_NAME=test-cluster-${TEST_ID}

KOPS_DOWNLOAD_URL=https://github.com/kubernetes/kops/releases/download/v1.18.2/kops-$OS_ARCH
KOPS_PATH=$TEST_DIR/kops
KOPS_STATE_FILE=s3://k8s-kops-csi-e2e
BASE=$(realpath "${BASH_SOURCE[0]}")
BASE_DIR=$(dirname "${BASE}")
TEST_DIR=${BASE_DIR}/ebs-e2e-test
BIN_DIR=${TEST_DIR}/bin
SSH_KEY_PATH=${TEST_DIR}/id_rsa

# Download kops if not yet
if [[ ! -e $KOPS_PATH ]]; then
mkdir -p $TEST_DIR
echo "Downloading KOPS from $KOPS_DOWNLOAD_URL to $KOPS_PATH"
curl -L -X GET $KOPS_DOWNLOAD_URL -o $KOPS_PATH
REGION=${AWS_REGION-us-west-2}
ZONES=${AWS_AVAILABILITY_ZONES:-us-west-2a,us-west-2b,us-west-2c}
INSTANCE_TYPE=${INSTANCE_TYPE:-c4.large}

K8S_VERSION=${K8S_VERSION:-1.18.10}
KOPS_VERSION=${KOPS_VERSION:-1.18.2}
KOPS_STATE_FILE=${KOPS_STATE_FILE:-s3://k8s-kops-csi-e2e}

KUBECONFIG=${KUBECONFIG:-"${HOME}/.kube/config"}
ARTIFACTS=${ARTIFACTS:-"${TEST_DIR}/artifacts"}
GINKGO_FOCUS=${GINKGO_FOCUS:-"\[ebs-csi-migration\]"}
GINKGO_SKIP=${GINKGO_SKIP:-"\[Disruptive\]"}
GINKGO_NODES=${GINKGO_NODES:-4}
CHECK_MIGRATION=${CHECK_MIGRATION:-"true"}

CLEAN=${CLEAN:-"true"}

loudecho() {
echo "###"
echo "## ${1}"
echo "#"
}

loudecho "Testing in region ${REGION} and zones ${ZONES}"
mkdir -p "${BIN_DIR}"

loudecho "Installing kops ${KOPS_VERSION} to ${BIN_DIR}"
source "${BASE_DIR}"/utils/kops.sh
kops::install "${BIN_DIR}" "${KOPS_VERSION}"
KOPS_BIN=${BIN_DIR}/kops

loudecho "Installing helm to ${BIN_DIR}"
source "${BASE_DIR}"/utils/helm.sh
helm::install "${BIN_DIR}"
HELM_BIN=${BIN_DIR}/helm

loudecho "Installing ginkgo to ${BIN_DIR}"
GINKGO_BIN=${BIN_DIR}/ginkgo
if [[ ! -e ${GINKGO_BIN} ]]; then
export GOPATH=${TEST_DIR}
export GOBIN=${BIN_DIR}
export GO111MODULE=on
pushd /tmp
go get github.com/onsi/ginkgo/[email protected]
popd
fi

chmod +x $KOPS_PATH

helm::install

echo "Build and push test driver image"
eval $(aws ecr get-login --region $REGION --no-include-email)
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
IMAGE_TAG=$TEST_ID
IMAGE_NAME=$AWS_ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/aws-ebs-csi-driver
docker build -t $IMAGE_NAME:$IMAGE_TAG .
docker push $IMAGE_NAME:$IMAGE_TAG

IMAGE_NAME=${AWS_ACCOUNT_ID}.dkr.ecr.${REGION}.amazonaws.com/aws-ebs-csi-driver
IMAGE_TAG=${TEST_ID}
set +e
echo "Creating cluster $CLUSTER_NAME"
CLUSTER_YAML_PATH=$TEST_DIR/$CLUSTER_NAME.yaml
SSH_KEY_PATH=$TEST_DIR/id_rsa
ssh-keygen -P csi-e2e -f $SSH_KEY_PATH
if docker images | grep "${IMAGE_NAME}" | grep "${IMAGE_TAG}"; then
set -e
loudecho "Assuming ${IMAGE_NAME}:${IMAGE_TAG} has been built and pushed"
else
set -e
loudecho "Building and pushing test driver image to ${IMAGE_NAME}:${IMAGE_TAG}"
eval "$(aws ecr get-login --region "${REGION}" --no-include-email)"
docker build -t "${IMAGE_NAME}":"${IMAGE_TAG}" .
docker push "${IMAGE_NAME}":"${IMAGE_TAG}"
fi

$KOPS_PATH create cluster --state $KOPS_STATE_FILE \
--zones $ZONES \
loudecho "Generating SSH key $SSH_KEY_PATH"
if [[ ! -e ${SSH_KEY_PATH} ]]; then
ssh-keygen -P csi-e2e -f "${SSH_KEY_PATH}"
fi

set +e
if ${KOPS_BIN} get cluster --state "${KOPS_STATE_FILE}" "${CLUSTER_NAME}".k8s.local; then
set -e
loudecho "Updating cluster $CLUSTER_NAME"
else
set -e
loudecho "Creating cluster $CLUSTER_NAME"
${KOPS_BIN} create cluster --state "${KOPS_STATE_FILE}" \
--zones "${ZONES}" \
--node-count=3 \
--node-size=$INSTANCE_TYPE \
--kubernetes-version=$K8S_VERSION \
--ssh-public-key=$SSH_KEY_PATH.pub \
$CLUSTER_NAME.k8s.local
$KOPS_PATH get cluster --state $KOPS_STATE_FILE $CLUSTER_NAME.k8s.local -o yaml > $CLUSTER_YAML_PATH
cat $BASE_DIR/feature-gates.yaml >> $CLUSTER_YAML_PATH
cat $BASE_DIR/additional-policies.yaml >> $CLUSTER_YAML_PATH
$KOPS_PATH replace --state $KOPS_STATE_FILE -f $CLUSTER_YAML_PATH
$KOPS_PATH update cluster --state $KOPS_STATE_FILE $CLUSTER_NAME.k8s.local --yes

# Wait for cluster creation
while [[ 1 ]]; do
$KOPS_PATH validate cluster --state $KOPS_STATE_FILE
ret=$?
if [[ $ret -eq 0 ]]; then
break
else
echo "Waiting cluster to be created"
sleep 30
fi
done;

echo "Deploying driver"
helm::init

helm install --name aws-ebs-csi-driver \
--set enableVolumeScheduling=true \
--set enableVolumeResizing=true \
--set enableVolumeSnapshot=true \
--set image.repository=$IMAGE_NAME \
--set image.tag=$IMAGE_TAG \
./aws-ebs-csi-driver

# Run the test
if [[ "$GINKGO_FOCUS" == "\[ebs-csi-migration\]" ]]; then
# TODO known test failures to skip temporarily
# - should not allow expansion of pvcs without AllowVolumeExpansion property
# - Test passes but cleanup fails, need https://github.com/kubernetes/kubernetes/pull/81107
# - (block volmode) Verify if offline PVC expansion works / should resize volume when PVC is edited while pod is using it
# - NodeExpand for BlockVolumes not well-defined, need more investigation and possibly https://github.com/container-storage-interface/spec/issues/380
# - should provision storage with mount options
# - Known bug, need https://github.com/kubernetes/kubernetes/pull/80191 but not yet in a patch release
pushd ./tests/e2e-migration
go get github.com/onsi/ginkgo/ginkgo
SKIP="\[Disruptive\]\
|should.provision.storage.with.mount.options\
|should.not.mount./.map.unused.volumes.in.a.pod"
$GOBIN/ginkgo -p -nodes=$NODES -v --focus="$FOCUS" --skip="$SKIP" --noColor ./... -- -kubeconfig=$HOME/.kube/config -report-dir=$ARTIFACTS -gce-zone=${ZONES%,*}
TEST_PASS=$?
popd

# There should have been no calls to the in-tree driver kubernetes.io/aws-ebs but many calls to ebs.csi.aws.com
# Find the controller-manager log and read its metrics to verify
NODE=$(kubectl get node -l kubernetes.io/role=master -o json | jq -r ".items[].metadata.name")
kubectl port-forward kube-controller-manager-$NODE 10252:10252 -n kube-system&

# Ensure port forwarding is succeeded
while true
do
HEALTHZ=$(curl -s 127.0.0.1:10252/healthz)
if [[ $HEALTHZ == "ok" ]]; then
echo "Port forwarding is succeeded"
break
else
echo "Port forwarding is not yet ready"
fi
sleep 1
done

curl 127.0.0.1:10252/metrics -s | grep -a 'volume_operation_total_seconds_bucket{operation_name="provision",plugin_name="ebs.csi.aws.com"'
CSI_CALLED=$?
curl 127.0.0.1:10252/metrics -s | grep -a 'volume_operation_total_seconds_bucket{operation_name="provision",plugin_name="kubernetes.io/aws-ebs"'
INTREE_CALLED=$?

echo "TEST_PASS: $TEST_PASS CSI_CALLED: $CSI_CALLED INTREE_CALLED: $INTREE_CALLED"

# TEST_PASS if tests passed, CSI was called, and In-tree was not called
if [ "$TEST_PASS" == 0 ] && [ "$CSI_CALLED" == 0 ] && [ "$INTREE_CALLED" == 1 ]; then
TEST_PASS=0
--node-size="${INSTANCE_TYPE}" \
--kubernetes-version="${K8S_VERSION}" \
--ssh-public-key="${SSH_KEY_PATH}".pub \
"${CLUSTER_NAME}".k8s.local
fi

CLUSTER_YAML_PATH=${TEST_DIR}/${CLUSTER_NAME}.yaml
${KOPS_BIN} get cluster --state "${KOPS_STATE_FILE}" "${CLUSTER_NAME}".k8s.local -o yaml > "${CLUSTER_YAML_PATH}"
cat "${BASE_DIR}"/feature-gates.yaml >> "${CLUSTER_YAML_PATH}"
cat "${BASE_DIR}"/additional-policies.yaml >> "${CLUSTER_YAML_PATH}"
${KOPS_BIN} replace --state "${KOPS_STATE_FILE}" -f "${CLUSTER_YAML_PATH}"
${KOPS_BIN} update cluster --state "${KOPS_STATE_FILE}" "${CLUSTER_NAME}".k8s.local --yes

loudecho "Validating cluster $CLUSTER_NAME"
${KOPS_BIN} validate cluster --state "${KOPS_STATE_FILE}" --wait 10m
VALID=$?
if [[ $VALID -ne 0 ]]; then
exit 1
fi

loudecho "Deploying driver"
${HELM_BIN} upgrade --install aws-ebs-csi-driver \
--namespace kube-system \
--set enableVolumeScheduling=true \
--set enableVolumeResizing=true \
--set enableVolumeSnapshot=true \
--set image.repository="${IMAGE_NAME}" \
--set image.tag="${IMAGE_TAG}" \
./aws-ebs-csi-driver

loudecho "Testing focus ${GINKGO_FOCUS}"
set -x
${GINKGO_BIN} -p -nodes="${GINKGO_NODES}" -v --focus="${GINKGO_FOCUS}" --skip="${GINKGO_SKIP}" ./tests/e2e-migration/... -- -kubeconfig="${KUBECONFIG}" -report-dir="${ARTIFACTS}" -gce-zone="${ZONES%,*}"
TEST_PASSED=$?
set +x

if [[ "${CHECK_MIGRATION}" == true ]]; then
loudecho "Checking migration"
# There should have been no calls to the in-tree driver kubernetes.io/aws-ebs but many calls to ebs.csi.aws.com
# Find the controller-manager log and read its metrics to verify
NODE=$(kubectl get node -l kubernetes.io/role=master -o json | jq -r ".items[].metadata.name")
kubectl port-forward kube-controller-manager-"${NODE}" 10252:10252 -n kube-system &

# Ensure port forwarding succeeded
while true; do
set +e
HEALTHZ=$(curl -s 127.0.0.1:10252/healthz)
set -e
if [[ ${HEALTHZ} == "ok" ]]; then
loudecho "Port forwarding succeeded"
break
else
TEST_PASS=1
loudecho "Port forwarding is not yet ready"
fi
else
go get github.com/onsi/ginkgo/ginkgo
export KUBECONFIG=$HOME/.kube/config
$GOBIN/ginkgo -p -nodes=$NODES -v --focus="$FOCUS" tests/e2e -- -report-dir=$ARTIFACTS
TEST_PASS=$?
sleep 1
done

set +e
curl 127.0.0.1:10252/metrics -s | grep -a 'volume_operation_total_seconds_bucket{operation_name="provision",plugin_name="ebs.csi.aws.com"'
CSI_CALLED=${PIPESTATUS[1]}
set -e

set +e
curl 127.0.0.1:10252/metrics -s | grep -a 'volume_operation_total_seconds_bucket{operation_name="provision",plugin_name="kubernetes.io/aws-ebs"'
INTREE_CALLED=${PIPESTATUS[1]}
set -e

for PROC in $(jobs -p); do
kill "${PROC}"
done

loudecho "CSI_CALLED: ${CSI_CALLED}"
loudecho "INTREE_CALLED: ${INTREE_CALLED}"

# TEST_PASSED if tests passed, CSI was called, and In-tree was not called
if [ "${TEST_PASSED}" == 0 ] && [ "${CSI_CALLED}" == 0 ] && [ "${INTREE_CALLED}" == 1 ]; then
TEST_PASSED=0
else
TEST_PASSED=1
fi
fi

echo "Removing driver"
helm del --purge aws-ebs-csi-driver
if [[ "${CLEAN}" == true ]]; then
loudecho "Cleaning"

loudecho "Removing driver"
${HELM_BIN} del aws-ebs-csi-driver

echo "Deleting cluster $CLUSTER_NAME"
$KOPS_PATH delete cluster --name $CLUSTER_NAME.k8s.local --state $KOPS_STATE_FILE --yes
loudecho "Deleting cluster ${CLUSTER_NAME}"
${KOPS_BIN} delete cluster --name "${CLUSTER_NAME}".k8s.local --state "${KOPS_STATE_FILE}" --yes

rm -rf $TEST_DIR
rm -rf "${TEST_DIR}"
else
loudecho "Not cleaning"
fi

if [[ $TEST_PASS -ne 0 ]]; then
exit 1
loudecho "TEST_PASSED: ${TEST_PASSED}"
if [[ $TEST_PASSED -ne 0 ]]; then
loudecho "FAIL!"
exit 1
else
loudecho "SUCCESS!"
fi
22 changes: 9 additions & 13 deletions hack/utils/helm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,14 @@

set -uo pipefail

OS_ARCH=$(go env GOOS)-amd64

helm::install() {
declare -r helm_name=helm-v2.16.0-$OS_ARCH.tar.gz
wget https://get.helm.sh/$helm_name
tar xvzf $helm_name
mv $OS_ARCH/helm /usr/local/bin/helm
}

helm::init() {
declare -r rbac_file_path=$(dirname "${BASH_SOURCE}")/tiller-rbac.yaml
kubectl apply -f $rbac_file_path
helm init --service-account tiller --history-max 200 --wait
kubectl get po -n kube-system
INSTALL_PATH=${1}
if [[ ! -e ${INSTALL_PATH}/helm ]]; then
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3
chmod 700 get_helm.sh
export USE_SUDO=false
export HELM_INSTALL_DIR=${INSTALL_PATH}
./get_helm.sh
rm get_helm.sh
fi
}
15 changes: 15 additions & 0 deletions hack/utils/kops.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash

set -uo pipefail

OS_ARCH=$(go env GOOS)-amd64

kops::install() {
INSTALL_PATH=${1}
KOPS_VERSION=${2}
if [[ ! -e ${INSTALL_PATH}/kops ]]; then
KOPS_DOWNLOAD_URL=https://github.com/kubernetes/kops/releases/download/v${KOPS_VERSION}/kops-${OS_ARCH}
curl -L -X GET "${KOPS_DOWNLOAD_URL}" -o "${INSTALL_PATH}"/kops
chmod +x "${INSTALL_PATH}"/kops
fi
}

0 comments on commit 8595420

Please sign in to comment.