Skip to content

Commit

Permalink
blocking cse on cluster nodes ready (Azure#2225)
Browse files Browse the repository at this point in the history
* blocking cse on cluster nodes ready

* deal with agent-only clusters

* use kubectl var and ignore stderr

* increase node active check timeout to 30 mins
  • Loading branch information
jackfrancis authored Mar 6, 2018
1 parent bc8c9bd commit 0b3d84a
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 46 deletions.
88 changes: 43 additions & 45 deletions parts/k8s/kubernetesmastercustomscript.sh
Original file line number Diff line number Diff line change
@@ -1,36 +1,6 @@
#!/bin/bash

###########################################################
# START SECRET DATA - ECHO DISABLED
###########################################################

# Following parameters now read from environment variable
# Fields for `azure.json`
# TENANT_ID SUBSCRIPTION_ID RESOURCE_GROUP LOCATION SUBNET
# NETWORK_SECURITY_GROUP VIRTUAL_NETWORK VIRTUAL_NETWORK_RESOURCE_GROUP ROUTE_TABLE PRIMARY_AVAILABILITY_SET
# SERVICE_PRINCIPAL_CLIENT_ID SERVICE_PRINCIPAL_CLIENT_SECRET KUBELET_PRIVATE_KEY TARGET_ENVIRONMENT NETWORK_POLICY
# FQDNSuffix VNET_CNI_PLUGINS_URL CNI_PLUGINS_URL MAX_PODS KUBECONFIG_SERVER

# Default values for backoff configuration
# CLOUDPROVIDER_BACKOFF CLOUDPROVIDER_BACKOFF_RETRIES CLOUDPROVIDER_BACKOFF_EXPONENT CLOUDPROVIDER_BACKOFF_DURATION CLOUDPROVIDER_BACKOFF_JITTER
# Default values for rate limit configuration
# CLOUDPROVIDER_RATELIMIT CLOUDPROVIDER_RATELIMIT_QPS CLOUDPROVIDER_RATELIMIT_BUCKET

# USE_MANAGED_IDENTITY_EXTENSION USE_INSTANCE_METADATA

# Master only secrets
# APISERVER_PRIVATE_KEY CA_CERTIFICATE CA_PRIVATE_KEY MASTER_FQDN KUBECONFIG_CERTIFICATE
# KUBECONFIG_KEY ETCD_SERVER_CERTIFICATE ETCD_SERVER_PRIVATE_KEY ETCD_CLIENT_CERTIFICATE ETCD_CLIENT_PRIVATE_KEY
# ETCD_PEER_CERTIFICATES ETCD_PEER_PRIVATE_KEYS ADMINUSER MASTER_INDEX

set -x
# Capture Interesting Network Stuffs during provision
packetCaptureProvision() {
tcpdump -G 600 -W 1 -n -vv -w /var/log/azure/dnsdump.pcap -Z root -i eth0 udp port 53 > /dev/null 2>&1 &
}

packetCaptureProvision

# Find distro name via ID value in releases files and upcase
OS=$(cat /etc/*-release | grep ^ID= | tr -d 'ID="' | awk '{print toupper($0)}')
UBUNTU_OS_NAME="UBUNTU"
Expand Down Expand Up @@ -566,35 +536,63 @@ function ensureJournal(){
fi
}

function ensureApiserver() {
function ensureK8s() {
if $REBOOTREQUIRED; then
return
fi
kubernetesStarted=1
k8sHealthy=1
nodesActive=1
nodesReady=1
for i in {1..600}; do
if [ -e $KUBECTL ]
then
$KUBECTL cluster-info
break
fi
sleep 1
done
for i in {1..600}; do
$KUBECTL 2>/dev/null cluster-info
if [ "$?" = "0" ]
then
echo "kubernetes started, took $i seconds"
kubernetesStarted=0
echo "k8s cluster is healthy, took $i seconds"
k8sHealthy=0
break
fi
else
/usr/bin/docker ps | grep apiserver
if [ "$?" = "0" ]
sleep 1
done
if [ $k8sHealthy -ne 0 ]
then
echo "k8s cluster is not healthy after $i seconds"
exit 3
fi
for i in {1..1800}; do
nodes=$(${KUBECTL} get nodes 2>/dev/null | grep 'Ready' | wc -l)
if [ $nodes -eq $TOTAL_NODES ]
then
echo "kubernetes started, took $i seconds"
kubernetesStarted=0
echo "all nodes are participating, took $i seconds"
nodesActive=0
break
fi
sleep 1
done
if [ $nodesActive -ne 0 ]
then
echo "still waiting for active nodes after $i seconds"
exit 3
fi
for i in {1..600}; do
notReady=$(${KUBECTL} get nodes 2>/dev/null | grep 'NotReady' | wc -l)
if [ $notReady -eq 0 ]
then
echo "all nodes are Ready, took $i seconds"
nodesReady=0
break
fi
fi
sleep 1
done
if [ $kubernetesStarted -ne 0 ]
if [ $nodesReady -ne 0 ]
then
echo "kubernetes did not start"
echo "still waiting for Ready nodes after $i seconds"
exit 3
fi
}
Expand Down Expand Up @@ -649,7 +647,7 @@ function ensurePodSecurityPolicy(){
fi
POD_SECURITY_POLICY_FILE="/etc/kubernetes/manifests/pod-security-policy.yaml"
if [ -f $POD_SECURITY_POLICY_FILE ]; then
kubectl create -f $POD_SECURITY_POLICY_FILE
$KUBECTL create -f $POD_SECURITY_POLICY_FILE
fi
}

Expand Down Expand Up @@ -742,7 +740,7 @@ if [[ ! -z "${MASTER_NODE}" ]]; then
ensureFilepath $DOCKER
ensureEtcdDataDir
ensureEtcd
ensureApiserver
ensureK8s
ensurePodSecurityPolicy
fi

Expand Down
3 changes: 2 additions & 1 deletion parts/k8s/kubernetesmastervars.t
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"etcdVersion": "[parameters('etcdVersion')]",
"maxVMsPerPool": 100,
"apiServerCertificate": "[parameters('apiServerCertificate')]",
"totalNodes": "[parameters('totalNodes')]",
{{ if not IsHostedMaster }}
"apiServerPrivateKey": "[parameters('apiServerPrivateKey')]",
"etcdServerCertificate": "[parameters('etcdServerCertificate')]",
Expand Down Expand Up @@ -213,7 +214,7 @@
"provisionScriptParametersCommon": "[concat('TENANT_ID=',variables('tenantID'),' APISERVER_PUBLIC_KEY=',variables('apiserverCertificate'),' SUBSCRIPTION_ID=',variables('subscriptionId'),' RESOURCE_GROUP=',variables('resourceGroup'),' LOCATION=',variables('location'),' SUBNET=',variables('subnetName'),' NETWORK_SECURITY_GROUP=',variables('nsgName'),' VIRTUAL_NETWORK=',variables('virtualNetworkName'),' VIRTUAL_NETWORK_RESOURCE_GROUP=',variables('virtualNetworkResourceGroupName'),' ROUTE_TABLE=',variables('routeTableName'),' PRIMARY_AVAILABILITY_SET=',variables('primaryAvailabilitySetName'),' SERVICE_PRINCIPAL_CLIENT_ID=',variables('servicePrincipalClientId'),' SERVICE_PRINCIPAL_CLIENT_SECRET=',variables('singleQuote'),variables('servicePrincipalClientSecret'),variables('singleQuote'),' KUBELET_PRIVATE_KEY=',variables('clientPrivateKey'),' TARGET_ENVIRONMENT=',variables('targetEnvironment'),' NETWORK_POLICY=',variables('networkPolicy'),' FQDNSuffix=',variables('fqdnEndpointSuffix'),' VNET_CNI_PLUGINS_URL=',variables('vnetCniLinuxPluginsURL'),' CNI_PLUGINS_URL=',variables('cniPluginsURL'),' MAX_PODS=',variables('maxPods'),' CLOUDPROVIDER_BACKOFF=',variables('cloudProviderBackoff'),' CLOUDPROVIDER_BACKOFF_RETRIES=',variables('cloudProviderBackoffRetries'),' CLOUDPROVIDER_BACKOFF_EXPONENT=',variables('cloudProviderBackoffExponent'),' CLOUDPROVIDER_BACKOFF_DURATION=',variables('cloudProviderBackoffDuration'),' CLOUDPROVIDER_BACKOFF_JITTER=',variables('cloudProviderBackoffJitter'),' CLOUDPROVIDER_RATELIMIT=',variables('cloudProviderRatelimit'),' CLOUDPROVIDER_RATELIMIT_QPS=',variables('cloudProviderRatelimitQPS'),' CLOUDPROVIDER_RATELIMIT_BUCKET=',variables('cloudProviderRatelimitBucket'),' USE_MANAGED_IDENTITY_EXTENSION=',variables('useManagedIdentityExtension'),' USE_INSTANCE_METADATA=',variables('useInstanceMetadata'),' CONTAINER_RUNTIME=',variables('containerRuntime'),' KUBECONFIG_SERVER=',variables('kubeconfigServer'))]",

{{if not IsHostedMaster}}
"provisionScriptParametersMaster": "[concat('MASTER_NODE=true APISERVER_PRIVATE_KEY=',variables('apiServerPrivateKey'),' CA_CERTIFICATE=',variables('caCertificate'),' CA_PRIVATE_KEY=',variables('caPrivateKey'),' MASTER_FQDN=',variables('masterFqdnPrefix'),' KUBECONFIG_CERTIFICATE=',variables('kubeConfigCertificate'),' KUBECONFIG_KEY=',variables('kubeConfigPrivateKey'),' ETCD_SERVER_CERTIFICATE=',variables('etcdServerCertificate'),' ETCD_CLIENT_CERTIFICATE=',variables('etcdClientCertificate'),' ETCD_SERVER_PRIVATE_KEY=',variables('etcdServerPrivateKey'),' ETCD_CLIENT_PRIVATE_KEY=',variables('etcdClientPrivateKey'),' ETCD_PEER_CERTIFICATES=',string(variables('etcdPeerCertificates')),' ETCD_PEER_PRIVATE_KEYS=',string(variables('etcdPeerPrivateKeys')),' ADMINUSER=',variables('username'))]",
"provisionScriptParametersMaster": "[concat('MASTER_NODE=true TOTAL_NODES=',variables('totalNodes'),' APISERVER_PRIVATE_KEY=',variables('apiServerPrivateKey'),' CA_CERTIFICATE=',variables('caCertificate'),' CA_PRIVATE_KEY=',variables('caPrivateKey'),' MASTER_FQDN=',variables('masterFqdnPrefix'),' KUBECONFIG_CERTIFICATE=',variables('kubeConfigCertificate'),' KUBECONFIG_KEY=',variables('kubeConfigPrivateKey'),' ETCD_SERVER_CERTIFICATE=',variables('etcdServerCertificate'),' ETCD_CLIENT_CERTIFICATE=',variables('etcdClientCertificate'),' ETCD_SERVER_PRIVATE_KEY=',variables('etcdServerPrivateKey'),' ETCD_CLIENT_PRIVATE_KEY=',variables('etcdClientPrivateKey'),' ETCD_PEER_CERTIFICATES=',string(variables('etcdPeerCertificates')),' ETCD_PEER_PRIVATE_KEYS=',string(variables('etcdPeerPrivateKeys')),' ADMINUSER=',variables('username'))]",
{{end}}
"generateProxyCertsScript": "{{GetKubernetesB64GenerateProxyCerts}}",
"orchestratorNameVersionTag": "{{.OrchestratorProfile.OrchestratorType}}:{{.OrchestratorProfile.OrchestratorVersion}}",
Expand Down
6 changes: 6 additions & 0 deletions parts/k8s/kubernetesparams.t
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@
"type": "string"
},
{{else}}
"totalNodes": {
"metadata": {
"description": "Number of nodes (masters + agents) in the cluster"
},
"type": "int"
},
"etcdServerCertificate": {
"metadata": {
"description": "The base 64 server certificate used on the master"
Expand Down
8 changes: 8 additions & 0 deletions pkg/acsengine/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,14 @@ func getParameters(cs *api.ContainerService, isClassicMode bool, generatorCode s
addValue(parametersMap, "etcdDownloadURLBase", cloudSpecConfig.KubernetesSpecConfig.EtcdDownloadURLBase)
addValue(parametersMap, "etcdVersion", cs.Properties.OrchestratorProfile.KubernetesConfig.EtcdVersion)
addValue(parametersMap, "etcdDiskSizeGB", cs.Properties.OrchestratorProfile.KubernetesConfig.EtcdDiskSizeGB)
var totalNodes int
if cs.Properties.MasterProfile != nil {
totalNodes = cs.Properties.MasterProfile.Count
}
for _, pool := range cs.Properties.AgentPoolProfiles {
totalNodes = totalNodes + pool.Count
}
addValue(parametersMap, "totalNodes", totalNodes)

if properties.OrchestratorProfile.KubernetesConfig == nil ||
!properties.OrchestratorProfile.KubernetesConfig.UseManagedIdentity {
Expand Down

0 comments on commit 0b3d84a

Please sign in to comment.