Skip to content

Commit

Permalink
Refactor remove node to allow removing dead nodes and etcd members (k…
Browse files Browse the repository at this point in the history
…ubernetes-sigs#5009)

Change-Id: I1c59249f08f16d0f6fd60df6ab61f17a0a7df189
  • Loading branch information
mattymo authored and k8s-ci-robot committed Aug 7, 2019
1 parent 7abf6a6 commit a44235d
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 18 deletions.
23 changes: 15 additions & 8 deletions docs/getting-started.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,20 +51,27 @@ You may want to add worker, master or etcd nodes to your existing cluster. This
Remove nodes
------------

You may want to remove **worker** nodes to your existing cluster. This can be done by re-running the `remove-node.yml` playbook. First, all nodes will be drained, then stop some kubernetes services and delete some certificates, and finally execute the kubectl command to delete these nodes. This can be combined with the add node function, This is generally helpful when doing something like autoscaling your clusters. Of course if a node is not working, you can remove the node and install it again.

Add worker nodes to the list under kube-node if you want to delete them (or utilize a [dynamic inventory](https://docs.ansible.com/ansible/intro_dynamic_inventory.html)).

ansible-playbook -i inventory/mycluster/hosts.yml remove-node.yml -b -v \
--private-key=~/.ssh/private_key

Use `--extra-vars "node=<nodename>,<nodename2>"` to select the node you want to delete.
You may want to remove **master**, **worker**, or **etcd** nodes from your
existing cluster. This can be done by re-running the `remove-node.yml`
playbook. First, all specified nodes will be drained, then stop some
kubernetes services and delete some certificates,
and finally execute the kubectl command to delete these nodes.
This can be combined with the add node function. This is generally helpful
when doing something like autoscaling your clusters. Of course, if a node
is not working, you can remove the node and install it again.

Use `--extra-vars "node=<nodename>,<nodename2>"` to select the node(s) you want to delete.
```
ansible-playbook -i inventory/mycluster/hosts.yml remove-node.yml -b -v \
--private-key=~/.ssh/private_key \
--extra-vars "node=nodename,nodename2"
```

If a node is completely unreachable by ssh, add `--extra-vars reset_nodes=no`
to skip the node reset step. If one node is unavailable, but others you wish
to remove are able to connect via SSH, you could set reset_nodes=no as a host
var in inventory.

Connecting to Kubernetes
------------------------

Expand Down
15 changes: 8 additions & 7 deletions remove-node.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
---
- hosts: localhost
become: no
gather_facts: no
tasks:
- name: "Check ansible version >=2.7.8"
assert:
Expand All @@ -12,12 +13,8 @@
vars:
ansible_connection: local

- hosts: all
vars:
ansible_ssh_pipelining: true
gather_facts: true

- hosts: "{{ node | default('etcd:k8s-cluster:calico-rr') }}"
gather_facts: no
vars_prompt:
name: "delete_nodes_confirmation"
prompt: "Are you sure you want to delete nodes state? Type 'yes' to delete nodes."
Expand All @@ -31,16 +28,20 @@
when: delete_nodes_confirmation != "yes"

- hosts: kube-master
gather_facts: no
roles:
- { role: kubespray-defaults }
- { role: remove-node/pre-remove, tags: pre-remove }

- hosts: "{{ node | default('kube-node') }}"
gather_facts: no
roles:
- { role: kubespray-defaults }
- { role: reset, tags: reset }
- { role: reset, tags: reset, when: reset_nodes|default(True) }

- hosts: kube-master
# Currently cannot remove first master or etcd
- hosts: "{{ node | default('kube-master[1:]:etcd[:1]') }}"
gather_facts: no
roles:
- { role: kubespray-defaults }
- { role: remove-node/post-remove, tags: post-remove }
51 changes: 48 additions & 3 deletions roles/remove-node/post-remove/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,54 @@
---
- name: Lookup node IP in kubernetes
shell: >-
{{ bin_dir }}/kubectl get nodes {{ node }}
-o jsonpath='{range.status.addresses[?(@.type=="InternalIP")]}{.address}{"\n"}{end}'
register: remove_node_ip
when:
- inventory_hostname in groups['etcd']
- ip is not defined
- access_ip is not defined
delegate_to: "{{ groups['etcd']|first }}"
failed_when: false

- name: Set node IP
set_fact:
node_ip: "{{ ip | default(access_ip | default(remove_node_ip.stdout)) | trim }}"

- name: Delete node
command: "{{ bin_dir }}/kubectl delete node {{ item }}"
with_items:
- "{{ node.split(',') | default(groups['kube-node']) }}"
command: "{{ bin_dir }}/kubectl delete node {{ inventory_hostname }}"
delegate_to: "{{ groups['kube-master']|first }}"
run_once: true
ignore_errors: yes

- name: Lookup etcd member id
shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_access_addresses }} member list | grep {{ node_ip }} | cut -d: -f1"
register: etcd_member_id
ignore_errors: true
changed_when: false
check_mode: no
tags:
- facts
environment:
ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/admin-{{ groups['etcd']|first }}.pem"
ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/admin-{{ groups['etcd']|first }}-key.pem"
ETCDCTL_CA_FILE: "{{ etcd_cert_dir }}/ca.pem"
delegate_to: "{{ groups['etcd']|first }}"
when: inventory_hostname in groups['etcd']

- name: Remove etcd member from cluster
shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_access_addresses }} member remove {{ etcd_member_id.stdout }}"
register: etcd_member_in_cluster
ignore_errors: true
changed_when: false
check_mode: no
tags:
- facts
environment:
ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/admin-{{ groups['etcd']|first }}.pem"
ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/admin-{{ groups['etcd']|first }}-key.pem"
ETCDCTL_CA_FILE: "{{ etcd_cert_dir }}/ca.pem"
delegate_to: "{{ groups['etcd']|first }}"
when:
- inventory_hostname in groups['etcd']
- not etcd_member_id.stdout

0 comments on commit a44235d

Please sign in to comment.