diff --git a/CHANGELOG-1.0.md b/CHANGELOG-1.0.md index 0cba2fc41e..ccda43523e 100644 --- a/CHANGELOG-1.0.md +++ b/CHANGELOG-1.0.md @@ -4,6 +4,16 @@ ### Added +- [#2976](https://github.com/epiphany-platform/epiphany/issues/2976) - Allow for custom Terraform scripts +- [#1076](https://github.com/epiphany-platform/epiphany/issues/1076) - Add sorting entries in the inventory file + +### Fixed + +- [#2989](https://github.com/epiphany-platform/epiphany/issues/2989) - Task `Remove swap from /etc/fstab` does not remove swap entry from file +- [#2653](https://github.com/epiphany-platform/epiphany/issues/2653) - Epicli is failing in air-gapped infra mode +- [#3004](https://github.com/epiphany-platform/epiphany/issues/3004) - `disk_size_gb` hardcoded in `/usr/local/epicli/data/azure/defaults/infrastructure/virtual-machine.yml` +- [#2934](https://github.com/epiphany-platform/epiphany/issues/2934) - Duplicated entries in image-registry defaults (LTS) +- [#3065](https://github.com/epiphany-platform/epiphany/issues/3065) - Flag `delete_os_disk_on_termination` has no effect when removing cluster - [#2996](https://github.com/epiphany-platform/epiphany/issues/2996) - Introduce the new configuration field to change a component name ## [1.0.2] 2022-01-26 diff --git a/core/src/epicli/.devcontainer/Dockerfile b/core/src/epicli/.devcontainer/Dockerfile index d5e0011985..090be0348b 100644 --- a/core/src/epicli/.devcontainer/Dockerfile +++ b/core/src/epicli/.devcontainer/Dockerfile @@ -4,13 +4,8 @@ ARG USERNAME=vscode ARG USER_UID=1000 ARG USER_GID=$USER_UID -COPY cert/cert.md cert/*.crt /usr/local/share/ca-certificates/ -COPY cert/cert.md cert/*.pem / -COPY cert/config-pre.sh / - RUN : INSTALL APT REQUIREMENTS \ && export DEBIAN_FRONTEND=noninteractive \ - && /bin/bash /config-pre.sh \ && apt-get -q update \ && apt-get -q install -y --no-install-recommends \ apt-utils dialog \ @@ -54,14 +49,11 @@ RUN : INSTALL PIP REQUIREMENTS \ && pip install --disable-pip-version-check --no-cache-dir --default-timeout=100 \ --requirement /requirements.txt \ && pip install --disable-pip-version-check --no-cache-dir --default-timeout=100 \ - poetry pylint pytest setuptools twine wheel - -COPY cert/config-post.sh / + poetry pylint pytest pytest_mock setuptools twine wheel RUN : SETUP USER, CERTS AND OTHERS \ && groupadd --gid $USER_GID $USERNAME \ && useradd -s /bin/bash --uid $USER_UID --gid $USER_GID -m $USERNAME \ && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \ && chmod ug=r,o= /etc/sudoers.d/$USERNAME \ - && setcap 'cap_net_bind_service=+ep' /usr/bin/ssh \ - && /bin/bash /config-post.sh + && setcap 'cap_net_bind_service=+ep' /usr/bin/ssh diff --git a/core/src/epicli/.devcontainer/cert/cert.md b/core/src/epicli/.devcontainer/cert/cert.md deleted file mode 100644 index f7c4427a9d..0000000000 --- a/core/src/epicli/.devcontainer/cert/cert.md +++ /dev/null @@ -1,6 +0,0 @@ -# Custom CA certificate/bundle - -Note that for the comments below the filenames of the certificate(s)/bundle do not matter, only the extensions. The certificate(s)/bundle need to be placed here before building the devcontainer. - -1. If you have one CA certificate you can add it here with the ```crt``` extension. -2. If you have multiple certificates in a chain/bundle you need to add them here individually with the ```crt``` extension and also add the single bundle with the ```pem``` extension containing the same certificates. This is needed unfortunally because not all tools inside the container except the single bundle. diff --git a/core/src/epicli/.devcontainer/cert/config-post.sh b/core/src/epicli/.devcontainer/cert/config-post.sh deleted file mode 100644 index b8dfc63507..0000000000 --- a/core/src/epicli/.devcontainer/cert/config-post.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash - -set -o errexit - -if ls /usr/local/share/ca-certificates/*.crt 1> /dev/null 2>&1; then - mkdir "/home/vscode/.aws/" - if ls /*.pem 1> /dev/null 2>&1; then - pattern="/*.pem" - files=( $pattern ) - f="${files[0]}" - echo "Setup AWS ca-bundle $f" - echo -e "[default]\nca_bundle=$f" >> "/home/vscode/.aws/config" - else - pattern="/usr/local/share/ca-certificates/*.crt" - files=( $pattern ) - f="${files[0]}" - echo "Setup AWS cert $f" - echo -e "[default]\nca_bundle=$f" >> "/home/vscode/.aws/config" - fi -else - echo "No cert/ca-bundle to setup" -fi diff --git a/core/src/epicli/.devcontainer/cert/config-pre.sh b/core/src/epicli/.devcontainer/cert/config-pre.sh deleted file mode 100644 index bf0619f973..0000000000 --- a/core/src/epicli/.devcontainer/cert/config-pre.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env bash - -set -o errexit - -if ls /usr/local/share/ca-certificates/*.crt 1> /dev/null 2>&1; then - pattern="/usr/local/share/ca-certificates/*.crt" - files=( $pattern ) - for i in "${files[@]}" - do - chmod 644 $i - done - if ls /*.pem 1> /dev/null 2>&1; then - pattern="/*.pem" - files=( $pattern ) - f="${files[0]}" - echo "Setting PIP ca-bundle $f" - chmod 644 $f - pip config set global.cert $f - pip config list - else - f="${files[0]}" - echo "Setting PIP cert $f" - pip config set global.cert $f - pip config list - fi - update-ca-certificates -else - echo "No cert/ca-bundle to setup" -fi diff --git a/core/src/epicli/cli/engine/providers/any/APIProxy.py b/core/src/epicli/cli/engine/providers/any/APIProxy.py index ee44d367e6..fcfbf4904f 100644 --- a/core/src/epicli/cli/engine/providers/any/APIProxy.py +++ b/core/src/epicli/cli/engine/providers/any/APIProxy.py @@ -1,6 +1,6 @@ from cli.helpers.doc_list_helpers import select_first from cli.helpers.Log import Log -from cli.models.AnsibleHostModel import AnsibleHostModel +from cli.models.AnsibleHostModel import AnsibleOrderedHostModel class APIProxy: @@ -22,5 +22,8 @@ def get_ips_for_feature(self, component_key): for machine in component_config.machines: machine_doc = select_first(self.config_docs, lambda x: x.kind == 'infrastructure/machine' and x.name == machine) - result.append(AnsibleHostModel(machine_doc.specification.hostname, machine_doc.specification.ip)) + result.append(AnsibleOrderedHostModel(machine_doc.specification.hostname, machine_doc.specification.ip)) + + result.sort() + return result diff --git a/core/src/epicli/cli/engine/providers/azure/APIProxy.py b/core/src/epicli/cli/engine/providers/azure/APIProxy.py index 2ae610a860..8090f65e28 100644 --- a/core/src/epicli/cli/engine/providers/azure/APIProxy.py +++ b/core/src/epicli/cli/engine/providers/azure/APIProxy.py @@ -5,7 +5,7 @@ from cli.helpers.Log import LogPipe, Log from cli.helpers.doc_list_helpers import select_first from cli.helpers.naming_helpers import resource_name, cluster_tag -from cli.models.AnsibleHostModel import AnsibleHostModel +from cli.models.AnsibleHostModel import AnsibleOrderedHostModel class APIProxy: def __init__(self, cluster_model, config_docs): @@ -54,7 +54,7 @@ def get_ips_for_feature(self, component_key): look_for_public_ip = self.cluster_model.specification.cloud.use_public_ips cluster = cluster_tag(self.cluster_prefix, self.cluster_name) running_instances = self.run(self, f'az vm list-ip-addresses --ids $(az resource list --query "[?type==\'Microsoft.Compute/virtualMachines\' && tags.{component_key} == \'\' && tags.cluster == \'{cluster}\'].id" --output tsv)') - result = [] + result: List[AnsibleOrderedHostModel] = [] for instance in running_instances: if isinstance(instance, list): instance = instance[0] @@ -66,7 +66,10 @@ def get_ips_for_feature(self, component_key): ip = instance['virtualMachine']['network']['publicIpAddresses'][0]['ipAddress'] else: ip = instance['virtualMachine']['network']['privateIpAddresses'][0] - result.append(AnsibleHostModel(name, ip)) + result.append(AnsibleOrderedHostModel(name, ip)) + + result.sort() + return result def get_storage_account_primary_key(self, storage_account_name): diff --git a/core/src/epicli/cli/engine/schema/DefaultMerger.py b/core/src/epicli/cli/engine/schema/DefaultMerger.py index a12630be5c..623bf0fa06 100644 --- a/core/src/epicli/cli/engine/schema/DefaultMerger.py +++ b/core/src/epicli/cli/engine/schema/DefaultMerger.py @@ -30,7 +30,10 @@ def merge_parent(self, files, doc): merged_parent['version'] = VERSION merge_objdict(merged_parent, doc) return merged_parent + default_config = select_first(self.docs, lambda x: x.name == 'default' and x.kind == doc.kind) default_doc = select_first(files, lambda x: x.name == 'default') + if default_config is not None: + merge_objdict(default_doc, default_config) default_doc['version'] = VERSION merge_objdict(default_doc, doc) - return default_doc \ No newline at end of file + return default_doc diff --git a/core/src/epicli/cli/engine/terraform/TerraformTemplateGenerator.py b/core/src/epicli/cli/engine/terraform/TerraformTemplateGenerator.py index c066231fc3..32f131ea5d 100644 --- a/core/src/epicli/cli/engine/terraform/TerraformTemplateGenerator.py +++ b/core/src/epicli/cli/engine/terraform/TerraformTemplateGenerator.py @@ -12,8 +12,9 @@ def __init__(self, cluster_model, infrastructure): def run(self): terraform_output_dir = get_terraform_path(self.cluster_model.specification.name) - # Remove generated .tf files (not tfstate). - remove_files_matching_glob(terraform_output_dir, '*.tf') + + # Only remove epicli generated .tf files, not tfstate or user created files. + remove_files_matching_glob(terraform_output_dir, '[0-9][0-9][0-9]_*.tf') templates = filter(lambda x: x.kind != 'infrastructure/cloud-init-custom-data', self.infrastructure) for idx, doc in enumerate(templates): diff --git a/core/src/epicli/cli/epicli.py b/core/src/epicli/cli/epicli.py index c389853b85..dd9e025504 100644 --- a/core/src/epicli/cli/epicli.py +++ b/core/src/epicli/cli/epicli.py @@ -227,7 +227,9 @@ def delete_parser(subparsers): sub_parser._action_groups.append(optional) def run_delete(args): - if not query_yes_no('Do you really want to delete your cluster?'): + if not query_yes_no('''You are trying to delete your cluster. +If your configuration does not allow to keep the existing disks used in the cluster, you will lose your data. +Make sure your data is safe. Do you really want to delete your cluster?'''): return 0 adjust_paths_from_build(args) with DeleteEngine(args) as engine: diff --git a/core/src/epicli/cli/models/AnsibleHostModel.py b/core/src/epicli/cli/models/AnsibleHostModel.py index 2c0d89e1b3..10874d84e1 100644 --- a/core/src/epicli/cli/models/AnsibleHostModel.py +++ b/core/src/epicli/cli/models/AnsibleHostModel.py @@ -1,4 +1,20 @@ class AnsibleHostModel: - def __init__(self, name, ip): - self.name = name - self.ip = ip + def __init__(self, name: str, ip: str): + self.name: str = name + self.ip: str = ip + + def __eq__(self, other) -> bool: + return (self.name == other.name and + self.ip == other.ip) + + def __lt__(self, other) -> bool: + pass + + +class AnsibleOrderedHostModel(AnsibleHostModel): + """ + Sortable variant of AnsibleHostModel + """ + + def __lt__(self, other) -> bool: + return self.name < other.name diff --git a/core/src/epicli/data/azure/defaults/infrastructure/virtual-machine.yml b/core/src/epicli/data/azure/defaults/infrastructure/virtual-machine.yml index 049b171997..d3081dc671 100644 --- a/core/src/epicli/data/azure/defaults/infrastructure/virtual-machine.yml +++ b/core/src/epicli/data/azure/defaults/infrastructure/virtual-machine.yml @@ -20,7 +20,6 @@ specification: sku: 18.04-LTS version: "18.04.202103151" # Never put latest on anything! Need to always pin the version number but testing we can get away with it storage_os_disk: - delete_on_termination: false managed: false caching: ReadWrite create_option: FromImage diff --git a/core/src/epicli/data/azure/terraform/infrastructure/virtual-machine.j2 b/core/src/epicli/data/azure/terraform/infrastructure/virtual-machine.j2 index 2e81de148d..bd38fd338e 100644 --- a/core/src/epicli/data/azure/terraform/infrastructure/virtual-machine.j2 +++ b/core/src/epicli/data/azure/terraform/infrastructure/virtual-machine.j2 @@ -49,10 +49,6 @@ resource "azurerm_virtual_machine" "{{ specification.name }}" { # windows specific stuff here maybe... hopefully never. {%- endif %} - {%- if specification.storage_os_disk.managed != true %} - delete_os_disk_on_termination = "{{ specification.storage_os_disk.delete_on_termination | lower }}" - {%- endif %} - storage_os_disk { name = "{{ specification.name }}-os-disk" caching = "{{ specification.storage_os_disk.caching }}" diff --git a/core/src/epicli/data/azure/validation/infrastructure/virtual-machine.yml b/core/src/epicli/data/azure/validation/infrastructure/virtual-machine.yml index 8ba3dc9c92..7552e38a6e 100644 --- a/core/src/epicli/data/azure/validation/infrastructure/virtual-machine.yml +++ b/core/src/epicli/data/azure/validation/infrastructure/virtual-machine.yml @@ -41,8 +41,6 @@ properties: storage_os_disk: type: object properties: - delete_on_termination: - type: boolean managed: type: boolean caching: diff --git a/core/src/epicli/data/common/ansible/playbooks/roles/common/tasks/main.yml b/core/src/epicli/data/common/ansible/playbooks/roles/common/tasks/main.yml index f920de7f25..a5c99eede9 100644 --- a/core/src/epicli/data/common/ansible/playbooks/roles/common/tasks/main.yml +++ b/core/src/epicli/data/common/ansible/playbooks/roles/common/tasks/main.yml @@ -26,11 +26,11 @@ - sysctl - name: Remove swap from /etc/fstab - mount: - backup: yes - fstype: swap - path: swap + lineinfile: + path: /etc/fstab + regexp: ^(?!#).*\sswap\s.*\d\s+\d$ state: absent + backup: yes tags: - disableswap diff --git a/core/src/epicli/data/common/ansible/playbooks/roles/preflight/tasks/check-routing.yml b/core/src/epicli/data/common/ansible/playbooks/roles/preflight/tasks/check-routing.yml new file mode 100644 index 0000000000..fde5103234 --- /dev/null +++ b/core/src/epicli/data/common/ansible/playbooks/roles/preflight/tasks/check-routing.yml @@ -0,0 +1,46 @@ +--- +- name: Check routing configuration + become: true + command: ip route show default 0.0.0.0/0 + register: ip_route_show_default + +- name: Assert default route exists + assert: + that: ip_route_show_default.stdout_lines | length > 0 + fail_msg: >- + No default route configured. At least one is required, read more in troubleshooting document. + quiet: true + +- name: Validate metric values if multiple default routes exist + when: ip_route_show_default.stdout_lines | length > 1 + block: + - name: Get metric values + become: true + shell: |- + set -o pipefail && \ + ip route show default 0.0.0.0/0 | awk '{if (! /metric/) print 0; else for (x=1;x- + At least two default routes have the same metric value. + Check routing configuration, read more in troubleshooting document. + +- include_vars: + file: roles/common/vars/main.yml + name: common_vars + +- name: Validate if ansible_default_ipv4.address matches address from inventory + when: + - common_vars.provider == "any" + - common_vars.specification.cloud is undefined + assert: + that: ansible_default_ipv4.address == ansible_host + fail_msg: >- + ansible_default_ipv4.address is {{ ansible_default_ipv4.address }} but inventory uses ip: {{ ansible_host }}. + Check default routing configuration, read more in troubleshooting document. + quiet: true diff --git a/core/src/epicli/data/common/ansible/playbooks/roles/preflight/tasks/main.yml b/core/src/epicli/data/common/ansible/playbooks/roles/preflight/tasks/main.yml index 9dfc568d1b..17a8c98bdf 100644 --- a/core/src/epicli/data/common/ansible/playbooks/roles/preflight/tasks/main.yml +++ b/core/src/epicli/data/common/ansible/playbooks/roles/preflight/tasks/main.yml @@ -3,6 +3,8 @@ - include_tasks: check-os.yml +- include_tasks: check-routing.yml + - name: Check if existing PostgreSQL needs to be migrated to distribution installed from PostgreSQL repository block: diff --git a/core/src/epicli/data/common/defaults/configuration/image-registry.yml b/core/src/epicli/data/common/defaults/configuration/image-registry.yml index d8e4b3846d..d2ef666457 100644 --- a/core/src/epicli/data/common/defaults/configuration/image-registry.yml +++ b/core/src/epicli/data/common/defaults/configuration/image-registry.yml @@ -74,18 +74,22 @@ specification: file_name: kube-scheduler-v1.14.6.tar - name: "k8s.gcr.io/kube-proxy:v1.14.6" file_name: kube-proxy-v1.14.6.tar - - name: "k8s.gcr.io/etcd:3.3.10" - file_name: etcd-3.3.10.tar - - name: "k8s.gcr.io/coredns:1.3.1" - file_name: coredns-1.3.1.tar + # Disable this dual entry because of validation issues. + # We leave it for reference. + # - name: "k8s.gcr.io/etcd:3.3.10" + # file_name: etcd-3.3.10.tar + # - name: "k8s.gcr.io/coredns:1.3.1" + # file_name: coredns-1.3.1.tar - name: "coredns/coredns:1.5.0" file_name: coredns-1.5.0.tar - - name: "k8s.gcr.io/pause:3.1" - file_name: pause-3.1.tar - - name: "quay.io/coreos/flannel:v0.11.0-amd64" - file_name: flannel-v0.11.0-amd64.tar - - name: "quay.io/coreos/flannel:v0.11.0" - file_name: flannel-v0.11.0.tar + # Disable this dual entry because of validation issues. + # We leave it for reference + # - name: "k8s.gcr.io/pause:3.1" + # file_name: pause-3.1.tar + # - name: "quay.io/coreos/flannel:v0.11.0-amd64" + # file_name: flannel-v0.11.0-amd64.tar + # - name: "quay.io/coreos/flannel:v0.11.0" + # file_name: flannel-v0.11.0.tar - name: "calico/cni:v3.8.1" file_name: cni-v3.8.1.tar - name: "calico/kube-controllers:v3.8.1" @@ -129,12 +133,14 @@ specification: file_name: kube-scheduler-v1.17.4.tar - name: "k8s.gcr.io/kube-proxy:v1.17.4" file_name: kube-proxy-v1.17.4.tar - - name: "k8s.gcr.io/etcd:3.4.3-0" - file_name: etcd-3.4.3-0.tar - - name: "k8s.gcr.io/coredns:1.6.5" - file_name: coredns-1.6.5.tar - - name: "k8s.gcr.io/pause:3.1" - file_name: pause-3.1.tar + # Disable this dual entry because of validation issues. + # We leave it for reference. + # - name: "k8s.gcr.io/etcd:3.4.3-0" + # file_name: etcd-3.4.3-0.tar + # - name: "k8s.gcr.io/coredns:1.6.5" + # file_name: coredns-1.6.5.tar + # - name: "k8s.gcr.io/pause:3.1" + # file_name: pause-3.1.tar # flannel - name: "quay.io/coreos/flannel:v0.11.0-amd64" file_name: flannel-v0.11.0-amd64.tar diff --git a/core/src/epicli/tests/__init__.py b/core/src/epicli/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/core/src/epicli/tests/engine/__init__.py b/core/src/epicli/tests/engine/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/core/src/epicli/tests/engine/providers/__init__.py b/core/src/epicli/tests/engine/providers/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/core/src/epicli/tests/engine/providers/any/__init__.py b/core/src/epicli/tests/engine/providers/any/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/core/src/epicli/tests/engine/providers/any/test_APIProxy.py b/core/src/epicli/tests/engine/providers/any/test_APIProxy.py new file mode 100644 index 0000000000..88714739cf --- /dev/null +++ b/core/src/epicli/tests/engine/providers/any/test_APIProxy.py @@ -0,0 +1,26 @@ +from pytest_mock import MockerFixture + +from cli.engine.providers.any.APIProxy import APIProxy +from cli.models.AnsibleHostModel import AnsibleOrderedHostModel +from tests.engine.providers.data.APIProxy_data import CLUSTER_MODEL, CONFIG_DOC + + +def test_get_ips_for_feature(mocker: MockerFixture): + """ + Make sure that hostnames in inventory are sorted. + """ + + mocker.patch('cli.engine.providers.any.APIProxy.Log') + proxy = APIProxy(CLUSTER_MODEL('any'), CONFIG_DOC()) + + EXPECTED_RESULT = [ + AnsibleOrderedHostModel('service-vm-0', '20.73.105.240'), + AnsibleOrderedHostModel('service-vm-1', '20.73.105.188'), + AnsibleOrderedHostModel('service-vm-2', '20.73.105.18'), + AnsibleOrderedHostModel('service-vm-3', '20.73.105.33'), + AnsibleOrderedHostModel('service-vm-4', '20.73.105.54') + ] + + result = proxy.get_ips_for_feature('service') + + assert EXPECTED_RESULT == result diff --git a/core/src/epicli/tests/engine/providers/aws/__init__.py b/core/src/epicli/tests/engine/providers/aws/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/core/src/epicli/tests/engine/providers/azure/__init__.py b/core/src/epicli/tests/engine/providers/azure/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/core/src/epicli/tests/engine/providers/azure/test_APIProxy.py b/core/src/epicli/tests/engine/providers/azure/test_APIProxy.py new file mode 100644 index 0000000000..f1e8568a08 --- /dev/null +++ b/core/src/epicli/tests/engine/providers/azure/test_APIProxy.py @@ -0,0 +1,27 @@ +from pytest_mock import MockerFixture + +from cli.engine.providers.azure.APIProxy import APIProxy +from cli.models.AnsibleHostModel import AnsibleOrderedHostModel +from tests.engine.providers.data.APIProxy_data import CLUSTER_MODEL, RUNNING_INSTANCES_AZURE + + +def test_get_ips_for_feature(mocker: MockerFixture): + """ + Make sure that hostnames in inventory are sorted. + """ + + mocker.patch('cli.engine.providers.azure.APIProxy.Log') + proxy = APIProxy(CLUSTER_MODEL('azure'), RUNNING_INSTANCES_AZURE) + proxy.run = (lambda *args: RUNNING_INSTANCES_AZURE) # mock run with prepared data + + EXPECTED_RESULT = [ + AnsibleOrderedHostModel('prefix-cluster-service-vm-0', '20.73.105.240'), + AnsibleOrderedHostModel('prefix-cluster-service-vm-1', '20.73.105.188'), + AnsibleOrderedHostModel('prefix-cluster-service-vm-2', '20.73.105.18'), + AnsibleOrderedHostModel('prefix-cluster-service-vm-3', '20.73.105.33'), + AnsibleOrderedHostModel('prefix-cluster-service-vm-4', '20.73.105.54') + ] + + result = proxy.get_ips_for_feature('service') + + assert EXPECTED_RESULT == result diff --git a/core/src/epicli/tests/engine/providers/data/APIProxy_data.py b/core/src/epicli/tests/engine/providers/data/APIProxy_data.py new file mode 100644 index 0000000000..29a110d72e --- /dev/null +++ b/core/src/epicli/tests/engine/providers/data/APIProxy_data.py @@ -0,0 +1,194 @@ +from typing import Dict, List + +from cli.helpers.ObjDict import ObjDict +from cli.helpers.objdict_helpers import dict_to_objdict + + +def CONFIG_DOC() -> ObjDict: + return dict_to_objdict([ + { + 'kind': 'infrastructure/machine', + 'title': 'Virtual Machine Infra', + 'provider': 'any', + 'name': 'service-0', + 'specification': { + 'ip': '20.73.105.18', + 'hostname': 'service-vm-2' + }, + 'version': '1.3.0dev' + }, + { + 'kind': 'infrastructure/machine', + 'title': 'Virtual Machine Infra', + 'provider': 'any', + 'name': 'service-1', + 'specification': { + 'ip': '20.73.105.54', + 'hostname': 'service-vm-4' + }, + 'version': '1.3.0dev' + }, + { + 'kind': 'infrastructure/machine', + 'title': 'Virtual Machine Infra', + 'provider': 'any', + 'name': 'service-2', + 'specification': { + 'ip': '20.73.105.188', + 'hostname': 'service-vm-1' + }, + 'version': '1.3.0dev' + }, + { + 'kind': 'infrastructure/machine', + 'title': 'Virtual Machine Infra', + 'provider': 'any', + 'name': 'service-3', + 'specification': { + 'ip': '20.73.105.240', + 'hostname': 'service-vm-0' + }, + 'version': '1.3.0dev' + }, + { + 'kind': 'infrastructure/machine', + 'title': 'Virtual Machine Infra', + 'provider': 'any', + 'name': 'service-4', + 'specification': { + 'ip': '20.73.105.33', + 'hostname': 'service-vm-3' + }, + 'version': '1.3.0dev' + }, + ]) + + +def CLUSTER_MODEL(provider: str) -> ObjDict: + return dict_to_objdict({ + 'kind': 'epiphany-cluster', + 'title': 'Epiphany cluster Config', + 'provider': f'{provider}', + 'name': 'default', + 'specification': { + 'prefix': 'prefix', + 'name': 'cluster', + 'admin_user': { + 'name': 'username', + 'key_path': '/path/to/key' + }, + 'cloud': { + 'k8s_as_cloud_service': False, + 'subscription_name': 'Subscription Name', + 'vnet_address_pool': '10.1.0.0/20', + 'use_public_ips': True, + 'use_service_principal': False, + 'region': 'West Europe', + 'network': {'use_network_security_groups': True}, + 'default_os_image': 'default', + 'hostname_domain_extension': '' + }, + 'components': { + 'service': { + 'count': 5, + 'machine': 'service-machine', + 'configuration': 'default', + 'subnets': [{'address_pool': '10.1.8.0/24'}], + 'machines': ['service-0', + 'service-1', + 'service-2', + 'service-3', + 'service-4'] + } + } + }, + 'version': '1.3.0dev' + }) + +RUNNING_INSTANCES_AZURE: List[List[Dict]] = [ + [ + {'virtualMachine': { + 'name': 'prefix-cluster-service-vm-0', + 'network': { + 'privateIpAddresses': ['10.1.8.6'], + 'publicIpAddresses': [ + {'id': '/subscriptions/subscription_hash/resourceGroups/prefix-cluster-rg/providers/Microsoft.Network/publicIPAddresses/prefix-cluster-service-pubip-0', + 'ipAddress': '20.73.105.240', + 'ipAllocationMethod': 'Static', + 'name': 'prefix-cluster-service-pubip-0', + 'resourceGroup': 'prefix-cluster-rg', + 'zone': '1'} + ] + }, + 'resourceGroup': 'prefix-cluster-rg'} + } + ], + [ + {'virtualMachine': { + 'name': 'prefix-cluster-service-vm-2', + 'network': { + 'privateIpAddresses': ['10.1.8.5'], + 'publicIpAddresses': [ + {'id': '/subscriptions/subscription_hash/resourceGroups/prefix-cluster-rg/providers/Microsoft.Network/publicIPAddresses/prefix-cluster-service-pubip-2', + 'ipAddress': '20.73.105.18', + 'ipAllocationMethod': 'Static', + 'name': 'prefix-cluster-service-pubip-2', + 'resourceGroup': 'prefix-cluster-rg', + 'zone': '1'} + ] + }, + 'resourceGroup': 'prefix-cluster-rg'} + } + ], + [ + {'virtualMachine': { + 'name': 'prefix-cluster-service-vm-1', + 'network': { + 'privateIpAddresses': ['10.1.8.4'], + 'publicIpAddresses': [ + {'id': '/subscriptions/subscription_hash/resourceGroups/prefix-cluster-rg/providers/Microsoft.Network/publicIPAddresses/prefix-cluster-service-pubip-2', + 'ipAddress': '20.73.105.188', + 'ipAllocationMethod': 'Static', + 'name': 'prefix-cluster-service-pubip-1', + 'resourceGroup': 'prefix-cluster-rg', + 'zone': '1'} + ] + }, + 'resourceGroup': 'prefix-cluster-rg'} + } + ], + [ + {'virtualMachine': { + 'name': 'prefix-cluster-service-vm-4', + 'network': { + 'privateIpAddresses': ['10.1.8.3'], + 'publicIpAddresses': [ + {'id': '/subscriptions/subscription_hash/resourceGroups/prefix-cluster-rg/providers/Microsoft.Network/publicIPAddresses/prefix-cluster-service-pubip-2', + 'ipAddress': '20.73.105.54', + 'ipAllocationMethod': 'Static', + 'name': 'prefix-cluster-service-pubip-4', + 'resourceGroup': 'prefix-cluster-rg', + 'zone': '1'} + ] + }, + 'resourceGroup': 'prefix-cluster-rg'} + } + ], + [ + {'virtualMachine': { + 'name': 'prefix-cluster-service-vm-3', + 'network': { + 'privateIpAddresses': ['10.1.8.2'], + 'publicIpAddresses': [ + {'id': '/subscriptions/subscription_hash/resourceGroups/prefix-cluster-rg/providers/Microsoft.Network/publicIPAddresses/prefix-cluster-service-pubip-2', + 'ipAddress': '20.73.105.33', + 'ipAllocationMethod': 'Static', + 'name': 'prefix-cluster-service-pubip-3', + 'resourceGroup': 'prefix-cluster-rg', + 'zone': '1'} + ] + }, + 'resourceGroup': 'prefix-cluster-rg'} + } + ] +] diff --git a/core/src/epicli/tests/helpers/__init__.py b/core/src/epicli/tests/helpers/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/core/src/epicli/tests/models/__init__.py b/core/src/epicli/tests/models/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/core/src/epicli/tests/models/test_AnsibleHostModel.py b/core/src/epicli/tests/models/test_AnsibleHostModel.py new file mode 100644 index 0000000000..432e70ecd4 --- /dev/null +++ b/core/src/epicli/tests/models/test_AnsibleHostModel.py @@ -0,0 +1,29 @@ +from typing import List + +from cli.models.AnsibleHostModel import AnsibleOrderedHostModel + + +def test_sort(): + """ + Test the `less` operator + """ + + EXPECTED_HOSTS: List[AnsibleOrderedHostModel] = [ + AnsibleOrderedHostModel('prefix-cluster-service-vm-0', '20.82.14.10'), + AnsibleOrderedHostModel('prefix-cluster-service-vm-1', '20.82.14.34'), + AnsibleOrderedHostModel('prefix-cluster-service-vm-2', '20.82.14.101'), + AnsibleOrderedHostModel('prefix-cluster-service-vm-3', '20.82.14.67'), + AnsibleOrderedHostModel('prefix-cluster-service-vm-4', '20.82.14.11'), + ] + + unordered_hosts: List[AnsibleOrderedHostModel] = [ + AnsibleOrderedHostModel('prefix-cluster-service-vm-4', '20.82.14.11'), + AnsibleOrderedHostModel('prefix-cluster-service-vm-1', '20.82.14.34'), + AnsibleOrderedHostModel('prefix-cluster-service-vm-3', '20.82.14.67'), + AnsibleOrderedHostModel('prefix-cluster-service-vm-0', '20.82.14.10'), + AnsibleOrderedHostModel('prefix-cluster-service-vm-2', '20.82.14.101') + ] + + unordered_hosts.sort() + + assert EXPECTED_HOSTS == unordered_hosts diff --git a/docs/home/HOWTO.md b/docs/home/HOWTO.md index 546593a3fb..93c3c57850 100644 --- a/docs/home/HOWTO.md +++ b/docs/home/HOWTO.md @@ -27,6 +27,7 @@ - [Build artifacts](./howto/CLUSTER.md#build-artifacts) - [Kafka replication and partition setting](./howto/CLUSTER.md#kafka-replication-and-partition-setting) - [RabbitMQ installation and setting](./howto/CLUSTER.md#rabbitmq-installation-and-setting) + - [How to provide additional custom Terraform templates](./howto/CLUSTER.md#how-to-provide-additional-custom-terraform-templates) - [Monitoring](./howto/MONITORING.md) - [How to configure Prometheus alerts](./howto/MONITORING.md#how-to-configure-prometheus-alerts) diff --git a/docs/home/TROUBLESHOOTING.md b/docs/home/TROUBLESHOOTING.md index 09736bc621..780d1f36d2 100644 --- a/docs/home/TROUBLESHOOTING.md +++ b/docs/home/TROUBLESHOOTING.md @@ -48,3 +48,16 @@ ERROR org.apache.kafka.common.errors.InvalidReplicationFactorException: Replicat This issue is saying the a replication of 1 is being attempted but there are no brokers '0'. This means that the kafka broker(s) are not running any longer. Kafka will start and attempt to establish connections etc. and if unable it will shutdown and log the message. So, when the verification script runs it will not be able to find a local broker (runs on each broker). Take a look at syslog/dmesg and run `sudo systemctl status kafka`. Most likely it is related to security (TLS/SSL) and/or network but it can also be incorrect settings in the config file `/opt/kafka/config/server.properties`. Correct and rerun the automation. + +## Networking + +Epicli uses Ansible to configure machines in cluster. Several tasks in Epiphany rely on ```ansible_default_ipv4``` variable. +In some specific configuration (mostly on-prem), this variable might be fetched wrong. Those cases are: +- more than one network interface per machine, +- changes in hardware configuration (add or remove network interface / rename interface), +- lack / wrong / multiplied default routing configuration. + +When ```ansible_default_ipv4``` is not equal to machine ip address used in inventory, installation fail with relevant error message. + +This means that machine's default routing configuration needs to be modified to use the same network interface (and ip address) used in inventory file. +Here you can read more about [routing configuration](http://linux-ip.net/html/basic-changing.html#basic-changing-default) diff --git a/docs/home/howto/CLUSTER.md b/docs/home/howto/CLUSTER.md index 6a32dc814f..5b5af08909 100644 --- a/docs/home/howto/CLUSTER.md +++ b/docs/home/howto/CLUSTER.md @@ -569,6 +569,11 @@ Epicli has a delete command to remove a cluster from a cloud provider (AWS, Azur From the defined cluster build folder it will take the information needed to remove the resources from the cloud provider. +### Note for Azure cloud provider + +Make sure you can safely remove OS and data disks - Epiphany does not support cluster removal from Azure +while preserving existing disks. + ## Single machine cluster *Please read first prerequisites related to [hostname requirements](./PREREQUISITES.md#hostname-requirements).* @@ -941,6 +946,33 @@ You can read more [here](https://www.confluent.io/blog/how-choose-number-topics- To install RabbitMQ in single mode just add rabbitmq role to your data.yaml for your server and in general roles section. All configuration on RabbitMQ - e.g. user other than guest creation should be performed manually. +## How to provide additional custom Terraform templates + +For both cloud providers (AWS, Azure) Epicli generates the following terraform components for deploying a cluster: + +- VPC (AWS) or VNet (Azure) +- Subnets inside the VPC or VNet +- Security rules between the subnets +- Virtual machines with network interfaces deployed in the different subnets + +Sometimes it is required to have additional resources like VPN access or other cloud native resources like EKS or AKS to this infrastructure. Epiphany gives the user the ability to add these additional resources during or after the cluster creation. + +The Terraform scripts Epicli generates will have the following naming convention: + +```shell +xxx_resourc-name-nr.tf +``` + +And will be placed in the following folder: + +```shell +/shared/build/clustername/terraform +``` + +When Epicli is run/re-run any Terraform scripts which will start with the ```xxx_*.tf``` filter will be removed and regenerated. The user can make custom Terraform scripts and place them allongside the Epicli generated ones and these will be applied/re-applied during the Epicli run. + +If you need to define any additional security rules for component subnets for custom infrastructure you can check the documentation [here](./SECURITY_GROUPS.md). + ## How to use Azure availability sets In your cluster yaml config declare as many as required objects of kind `infrastructure/availability-set` like