From 91a92302f72c754cdef1f1ca9d5f55cbbd5773fc Mon Sep 17 00:00:00 2001 From: Adam Grare Date: Mon, 1 Nov 2021 14:53:40 -0400 Subject: [PATCH 1/3] Only run kube_monitor threads on master MiqServer On podified there are multiple miq_server instances but only one "master". All miq_servers run on the same kubernetes cluster and thus only have to run one set of monitor_pods/monitor_deployments threads to update the global current_pods/current_deployments hashes. --- app/models/miq_server/worker_management/kubernetes.rb | 11 ++++++++++- .../miq_server/worker_management/kubernetes_spec.rb | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/app/models/miq_server/worker_management/kubernetes.rb b/app/models/miq_server/worker_management/kubernetes.rb index 3f949b6018a..9474acf78af 100644 --- a/app/models/miq_server/worker_management/kubernetes.rb +++ b/app/models/miq_server/worker_management/kubernetes.rb @@ -14,8 +14,15 @@ def sync_monitor end def sync_from_system - ensure_kube_monitors_started + # All miq_server instances have to reside on the same Kubernetes cluster, so + # we only have to sync the list of pods and deployments once + ensure_kube_monitors_started if my_server.is_master? + + # Before syncing the workers check for any orphaned worker rows that don't have + # a current pod and delete them cleanup_orphaned_worker_rows + + # Update worker deployments with updated settings such as cpu/memory limits sync_deployment_settings end @@ -131,6 +138,8 @@ def ensure_kube_monitors_started end def delete_failed_deployments + return unless my_server.is_master? + failed_deployments.each do |failed| orchestrator.delete_deployment(failed) end diff --git a/spec/models/miq_server/worker_management/kubernetes_spec.rb b/spec/models/miq_server/worker_management/kubernetes_spec.rb index ac619d08f9d..20fdaf8898d 100644 --- a/spec/models/miq_server/worker_management/kubernetes_spec.rb +++ b/spec/models/miq_server/worker_management/kubernetes_spec.rb @@ -1,7 +1,7 @@ require 'recursive-open-struct' RSpec.describe MiqServer::WorkerManagement::Kubernetes do - let(:server) { EvmSpecHelper.create_guid_miq_server_zone.second } + let(:server) { EvmSpecHelper.local_miq_server(:is_master => true) } let(:orchestrator) { double("ContainerOrchestrator") } let(:deployment_name) { '1-generic-79bb8b8bb5-8ggbg' } let(:pod_label) { '1-generic' } From 17ed688a02e3ea0e829ce77748fc0d51efadfcd6 Mon Sep 17 00:00:00 2001 From: Adam Grare Date: Tue, 2 Nov 2021 11:24:12 -0400 Subject: [PATCH 2/3] Don't rely on is_master? on podified --- app/models/miq_server/worker_management/kubernetes.rb | 10 ++++++++-- .../miq_server/worker_management/kubernetes_spec.rb | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/app/models/miq_server/worker_management/kubernetes.rb b/app/models/miq_server/worker_management/kubernetes.rb index 9474acf78af..a37b27901a4 100644 --- a/app/models/miq_server/worker_management/kubernetes.rb +++ b/app/models/miq_server/worker_management/kubernetes.rb @@ -16,7 +16,7 @@ def sync_monitor def sync_from_system # All miq_server instances have to reside on the same Kubernetes cluster, so # we only have to sync the list of pods and deployments once - ensure_kube_monitors_started if my_server.is_master? + ensure_kube_monitors_started if my_server_is_primary? # Before syncing the workers check for any orphaned worker rows that don't have # a current pod and delete them @@ -94,6 +94,12 @@ def constraints_changed?(current, desired) private + # In podified there is only one "primary" miq_server whose zone is "default", the + # other miq_server instances are simply to allow for additional zones + def my_server_is_primary? + my_server.zone&.name == "default" + end + def cpu_value_eql?(current, desired) # Convert to millicores if not already converted: "1" -> 1000; "1000m" -> 1000 current = current.to_s[-1] == "m" ? current.to_f : current.to_f * 1000 @@ -138,7 +144,7 @@ def ensure_kube_monitors_started end def delete_failed_deployments - return unless my_server.is_master? + return unless my_server_is_primary? failed_deployments.each do |failed| orchestrator.delete_deployment(failed) diff --git a/spec/models/miq_server/worker_management/kubernetes_spec.rb b/spec/models/miq_server/worker_management/kubernetes_spec.rb index 20fdaf8898d..bbfa7319332 100644 --- a/spec/models/miq_server/worker_management/kubernetes_spec.rb +++ b/spec/models/miq_server/worker_management/kubernetes_spec.rb @@ -1,7 +1,7 @@ require 'recursive-open-struct' RSpec.describe MiqServer::WorkerManagement::Kubernetes do - let(:server) { EvmSpecHelper.local_miq_server(:is_master => true) } + let(:server) { FactoryBot.create(:miq_server_in_default_zone).tap { |s| EvmSpecHelper.stub_as_local_server(s) } } let(:orchestrator) { double("ContainerOrchestrator") } let(:deployment_name) { '1-generic-79bb8b8bb5-8ggbg' } let(:pod_label) { '1-generic' } From 3773816f6066f7ecfbaf13bef3d7091729b923fe Mon Sep 17 00:00:00 2001 From: Adam Grare Date: Tue, 2 Nov 2021 12:21:56 -0400 Subject: [PATCH 3/3] Ensure primary miq_server is first in servers_from_db --- lib/workers/evm_server.rb | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/lib/workers/evm_server.rb b/lib/workers/evm_server.rb index ce51b445d72..1a30ead7246 100644 --- a/lib/workers/evm_server.rb +++ b/lib/workers/evm_server.rb @@ -87,7 +87,16 @@ def self.start(*args) private def servers_from_db - MiqEnvironment::Command.is_podified? ? MiqServer.in_my_region.to_a : [MiqServer.my_server(true)].compact + my_server = MiqServer.my_server(true) + + if MiqEnvironment::Command.is_podified? + # Ensure that the "primary" miq_server is first in the list of servers. + # This ensures that any work that is only done on the primary is completed + # before processing the rest of the servers. + MiqServer.in_my_region.to_a.unshift(my_server).uniq.compact + else + [my_server].compact + end end def set_process_title