Skip to content
This repository has been archived by the owner on Oct 24, 2023. It is now read-only.

Commit

Permalink
reuse clean up logic
Browse files Browse the repository at this point in the history
  • Loading branch information
jsturtevant committed Nov 4, 2020
1 parent 560c39e commit ce8d812
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 87 deletions.
68 changes: 3 additions & 65 deletions staging/provisioning/windows/kubeletstart.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -184,75 +184,13 @@ Update-CNIConfigKubenetContainerD($podCIDR, $masterSubnetGW) {
Add-Content -Path $global:CNIConfig -Value (ConvertTo-Json $configJson -Depth 20)
}

function CleanUpNetwork($networkname) {
$hnsNetwork = Get-HnsNetwork | ? Name -EQ $networkname
if ($hnsNetwork) {
# Cleanup all containers
Write-Log "Cleaning up containers"
if ($UseContainerD -eq $true) {
ctr.exe -n k8s.io c ls -q | ForEach-Object { ctr -n k8s.io tasks kill $_ }
ctr.exe -n k8s.io c ls -q | ForEach-Object { ctr -n k8s.io c rm $_ }
}
else {
docker.exe ps -q | ForEach-Object { docker rm $_ -f }
}

#
# Stop services
#
Write-Log "Stopping kubeproxy service"
Stop-Service kubeproxy

Write-Log "Stopping kubelet service"
Stop-Service kubelet

Write-Log "Cleaning up persisted HNS policy lists"
# Workaround for https://github.com/kubernetes/kubernetes/pull/68923 in < 1.14,
# and https://github.com/kubernetes/kubernetes/pull/78612 for <= 1.15
#
# October patch 10.0.17763.1554 introduced a breaking change
# which requires the hns policy list to be removed before network if it gets into a bad state
# See https://github.com/Azure/aks-engine/pull/3956#issuecomment-720797433 for more info
# Kubeproxy doesn't fail becuase errors are not handled:
# https://github.com/delulu/kubernetes/blob/524de768bb64b7adff76792ca3bf0f0ece1e849f/pkg/proxy/winkernel/proxier.go#L532
Get-HnsPolicyList | Remove-HnsPolicyList

Write-Host "Cleaning up old HNS network found"
Remove-HnsNetwork $hnsNetwork
Start-Sleep 10
}
}


if ($global:NetworkPlugin -eq "azure") {
Write-Host "NetworkPlugin azure, starting kubelet."

Write-Host "Cleaning stale CNI data"
# Kill all cni instances & stale data left by cni
# Cleanup all files related to cni
taskkill /IM azure-vnet.exe /f
taskkill /IM azure-vnet-ipam.exe /f

$filesToRemove = @(
"c:\k\azure-vnet.json",
"c:\k\azure-vnet.json.lock",
"c:\k\azure-vnet-ipam.json",
"c:\k\azure-vnet-ipam.json.lock"
"c:\k\azure-vnet-ipamv6.json",
"c:\k\azure-vnet-ipamv6.json.lock"
)

foreach ($file in $filesToRemove) {
if (Test-Path $file) {
Write-Log "Deleting stale file at $file"
Remove-Item $file
}
}

# Find if network created by CNI exists, if yes, remove it
# This is required to keep the network non-persistent behavior
# Going forward, this would be done by HNS automatically during restart of the node
CleanUpNetwork("azure")
./windowsnodereset.ps1 -CleanupOnly $true

# Restart Kubeproxy, which would wait, until the network is created
# This was fixed in 1.15, workaround still needed for 1.14 https://github.com/kubernetes/kubernetes/pull/78612
Expand Down Expand Up @@ -291,7 +229,7 @@ if (($global:NetworkPlugin -eq "kubenet") -and ($global:ContainerRuntime -eq "do
$process | Stop-Process | Out-Null
}

CleanUpNetwork($global:NetworkMode.ToLower())
./windowsnodereset.ps1 -CleanupOnly $true

Write-Host "Creating a new hns Network"
$hnsNetwork = New-HNSNetwork -Type $global:NetworkMode -AddressPrefix $podCIDR -Gateway $masterSubnetGW -Name $global:NetworkMode.ToLower() -Verbose
Expand Down Expand Up @@ -338,7 +276,7 @@ if (($global:NetworkPlugin -eq "kubenet") -and ($global:ContainerRuntime -eq "co
$process | Stop-Process | Out-Null
}

CleanUpNetwork($global:NetworkMode.ToLower())
./windowsnodereset.ps1 -CleanupOnly $true

Write-Host "Creating a new hns Network"
$hnsNetwork = New-HNSNetwork -Type $global:NetworkMode -AddressPrefix $podCIDR -Gateway $masterSubnetGW -Name $global:NetworkMode.ToLower() -Verbose
Expand Down
67 changes: 45 additions & 22 deletions staging/provisioning/windows/windowsnodereset.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@
cleanup actions to help ensure the node comes up cleanly.
#>

Param(
[parameter(HelpMessage = "CleanupOnly does clean up but does not start the services")]
[bool] $CleanupOnly = $false,
)

$global:LogPath = "c:\k\windowsnodereset.log"
$global:HNSModule = "c:\k\hns.psm1"

Expand Down Expand Up @@ -49,19 +54,22 @@ if ($global:EnableHostsConfigAgent) {
# Perform cleanup
#

Write-Log "Cleaning up persisted HNS policy lists"
# Workaround for https://github.com/kubernetes/kubernetes/pull/68923 in < 1.14,
# and https://github.com/kubernetes/kubernetes/pull/78612 for <= 1.15
#
# October patch 10.0.17763.1554 introduced a breaking change
# which requires the hns policy list to be removed before network if it gets into a bad state
# See https://github.com/Azure/aks-engine/pull/3956#issuecomment-720797433 for more info
# Kubeproxy doesn't fail becuase errors are not handled:
# https://github.com/delulu/kubernetes/blob/524de768bb64b7adff76792ca3bf0f0ece1e849f/pkg/proxy/winkernel/proxier.go#L532
Write-Log "Cleaning up persisted HNS policy lists"
Get-HnsPolicyList | Remove-HnsPolicyList

$hnsNetwork = Get-HnsNetwork | Where-Object Name -EQ azure
$networkname = $global:NetworkMode.ToLower()
if ($global:NetworkPlugin -eq "azure"){
$networkname = "azure"
}

$hnsNetwork = Get-HnsNetwork | ? Name -EQ $networkname
if ($hnsNetwork) {
# Cleanup all containers
Write-Log "Cleaning up containers"
if ($UseContainerD -eq $true) {
ctr.exe -n k8s.io c ls -q | ForEach-Object { ctr -n k8s.io tasks kill $_ }
Expand All @@ -71,9 +79,28 @@ if ($hnsNetwork) {
docker.exe ps -q | ForEach-Object { docker rm $_ -f }
}

Write-Log "Removing old HNS network 'azure'"
Write-Log "Cleaning up persisted HNS policy lists"
# Workaround for https://github.com/kubernetes/kubernetes/pull/68923 in < 1.14,
# and https://github.com/kubernetes/kubernetes/pull/78612 for <= 1.15
#
# October patch 10.0.17763.1554 introduced a breaking change
# which requires the hns policy list to be removed before network if it gets into a bad state
# See https://github.com/Azure/aks-engine/pull/3956#issuecomment-720797433 for more info
# Kubeproxy doesn't fail becuase errors are not handled:
# https://github.com/delulu/kubernetes/blob/524de768bb64b7adff76792ca3bf0f0ece1e849f/pkg/proxy/winkernel/proxier.go#L532
Get-HnsPolicyList | Remove-HnsPolicyList

Write-Host "Cleaning up old HNS network found"
Remove-HnsNetwork $hnsNetwork
Start-Sleep 10
}


if ($global:NetworkPlugin -eq "azure"){
Write-Host "Cleaning stale CNI data"

# Kill all cni instances & stale data left by cni
# Cleanup all files related to cni
taskkill /IM azure-vnet.exe /f
taskkill /IM azure-vnet-ipam.exe /f

Expand All @@ -94,22 +121,6 @@ if ($hnsNetwork) {
}
}



#
# Create required networks
#

# If using kubenet create the HNS network here.
# (The kubelet creates the HNS network when using azure-cni + azure cloud provider)
if ($global:NetworkPlugin -eq 'kubenet') {
Write-Log "Creating new hns network: $($global:NetworkMode.ToLower())"
$podCIDR = Get-PodCIDR
$masterSubnetGW = Get-DefaultGateway $global:MasterSubnet
New-HNSNetwork -Type $global:NetworkMode -AddressPrefix $podCIDR -Gateway $masterSubnetGW -Name $global:NetworkMode.ToLower() -Verbose
Start-sleep 10
}

#
# Start Services
#
Expand All @@ -119,6 +130,18 @@ if ($global:CsiProxyEnabled) {
Start-Service csi-proxy
}

if ($global:EnableHostsConfigAgent) {
Write-Log "Starting hosts-config-agent service"
Start-Service hosts-config-agent
}

# Don't restart following services if only doing cleanup
# they are started via kubeletstart
if ($CleanupOnly) {
Write-Log "Clean up only. Exiting windowsnodereset.ps1"
exit
}

Write-Log "Starting kubelet service"
Start-Service kubelet

Expand Down

0 comments on commit ce8d812

Please sign in to comment.