From fc76af2427c5155953362ef67bb8665387d30778 Mon Sep 17 00:00:00 2001 From: Keenan Brock Date: Thu, 25 Jan 2018 18:44:45 -0500 Subject: [PATCH] WIP Add ems centric capture to metrics_capture TODO: move from capture to metrics_capture.rb THEN refactor --- .../providers/base_manager/metrics_capture.rb | 212 +++++++++++- app/models/metric/capture.rb | 220 ++----------- app/models/metric/ci_mixin/capture.rb | 10 + app/models/metric/ci_mixin/rollup.rb | 6 +- app/models/miq_schedule_worker/jobs.rb | 10 +- config/settings.yml | 9 - spec/models/metric/capture_spec.rb | 121 ++++--- spec/models/metric/ci_mixin/capture_spec.rb | 102 ------ spec/models/metric_spec.rb | 310 +----------------- 9 files changed, 329 insertions(+), 671 deletions(-) diff --git a/app/models/manageiq/providers/base_manager/metrics_capture.rb b/app/models/manageiq/providers/base_manager/metrics_capture.rb index 1877c6b64fb..ff775c5bd46 100644 --- a/app/models/manageiq/providers/base_manager/metrics_capture.rb +++ b/app/models/manageiq/providers/base_manager/metrics_capture.rb @@ -1,8 +1,216 @@ class ManageIQ::Providers::BaseManager::MetricsCapture + HOURLY_METRICS_DURATION = 4.hours include Vmdb::Logging - attr_reader :target - def initialize(target) + attr_reader :target, :ems + def initialize(target, ems = nil) @target = target + @ems = ems + end + + # New method to collect metrics. It is ems centric + # It collects all metrics that are enabled + # option options :exclude_storages [Boolean] if true, will not include storages + def perf_collect_all_metrics(interval_name, start_time = nil, end_time = nil, options = {}) + grouped_targets(ems, options).each do |target_class, targets| + case target_class + when "Storage" + perf_collect_storages(targets, interval_name, start_time, end_time) + when "Host" + perf_collect_hosts(targets, interval_name, start_time, end_time) + else + perf_collect_targets(targets, interval_name, start_time, end_time) + end + end + self + end + + private + + # Determine possible collection targets grouped by target type (base class name) + # + # @param [ExtManagementSystem] ems + # @options options [Boolean] :skip_storage Skip collection of storages - used for gap collection + # @raises "Unknown ems type" if the ems is not infra, cloud, or container + def grouped_targets(ems, options = {}) + all_capture_targets(ems, options).group_by { |t| t.class.base_class.name } + end + + # Determine targets for an ems + # + # @param [ExtManagementSystem] ems + # @options options [Boolean] :skip_storage Skip collection of storages - used for gap collection + # + def all_capture_targets(ems, options = {}) + if ems.kind_of?(::ManageIQ::Providers::InfraManager) + Metric::Targets.capture_infra_targets([ems], options) + elsif ems.kind_of?(::ManageIQ::Providers::CloudManager) + Metric::Targets.capture_cloud_targets([ems], options) + elsif ems.kind_of?(::ManageIQ::Providers::ContainerManager) + Metric::Targets.capture_container_targets([ems], options) + else + raise "Unknown ems type #{ems.class.name}" + end + end + + def perf_collect_storages(targets, interval_name, start_time, end_time) + capture_interval = %w(realtime historical).include?(interval_name) ? "hourly" : interval_name + targets.each do |target| + # NOTE: storage is not fetched from provider + # Storage#perf_capture is really just perf_process + capture_start, capture_end = fix_capture_start_end_time(interval_name, target, start_time, end_time) + perf_process_queue(target, "perf_capture_#{capture_interval}", capture_interval, capture_start, capture_end) + end + end + + def perf_collect_hosts(targets, interval_name, start_time, end_time) + # assuming there are fewer than $query_size hosts per cluster - (typically is is < 40) + targets.group_by(&:ems_cluster_id).each do |parent_id, targets_in_cluster| + start_range, end_range, counters_data = capture_targets(targets_in_cluster, interval_name, start_time, end_time) + perf_process_queue(ems, "perf_process", interval_name, start_range, end_range, counters_data, parent_id) + end + end + + def perf_collect_targets(targets, interval_name, start_time, end_time) + query_size = Metric::Capture.concurrent_requests(interval_name) + targets.each_slice(query_size).each do |target_group| + start_range, end_range, counters_data = capture_targets(target_group, interval_name, start_time, end_time) + perf_process_queue(ems, "perf_process", interval_name, start_range, end_range, counters_data) + end + end + + # Perform capture on list of targets + # @param targets [Array] array of targets to be captured + # @param start_time [String, nil] Start time - typically nil. has value for gap collection + # @param end_time [String, nil] Ending time - typically nil. has value for gap collection + # [{ + # :ems_id => ems_id, # "2" + # :ems_ref => target.ems_ref, # "vm-55" + # :ems_klass => ("Vm", "Host", "Storage") + # :interval_name => ("realtime", "hourly") + # :start_range => start_time, + # :end_range => end_time, + # :counters => { + # "#{counter_id}_#{instance}" => { # e.g.: "143_" => + # :counter_key => :"#{group}_#{name}_#{stats}_#{rollup}", # :net_usage_rate_average + # :rollup => ("realtime", ...) + # :precision => (0.1, 1), + # :unit_key => ("percent", "kiloBytesPerSecond"), + # :vim_key => counter_id, # "143" + # :instance => instance, # "" + # :capture_interval => interval, # "20" + # :capture_interval_name => ("realtime", "hourly"), + # } + # }, + # :counter_values => { + # timestamp => { # "2017-09-25T19:55:20Z" + # "#{counter_id}_#{instance}" => Numeric # "143_" => 0 + # } + # } + # }] + # @return [ArrayInteger,String,Array,Hash{String=>Float}}> + + def capture_targets(targets, interval_name, start_time, end_time) + all_start_range = nil + all_end_range = nil + all_counter_data = {} + targets.each do |target| + begin + capture_start, capture_end = fix_capture_start_end_time(interval_name, target, start_time, end_time) + + if interval_name == "realtime" + # TODO: do proper rounding. + # ? is end_time + # ? pass in start or object.last_perf_capture_on + # ? what is the end of the historical - is it 4 hours ago? is it end_time? + gap_start, gap_end = fix_capture_start_end_time("historical", target, nil, capture_start || end_time) + if (gap_start && gap_start < gap_end) + start_range, end_range, counter_data = perf_capture_one(target, "historical", gap_start, gap_end) + if counter_data + all_counter_data.merge!(counter_data) + all_start_range = start_range if all_start_range.nil? || all_start_range > start_range + all_end_range = end_range if all_end_range.nil? || all_end_range < end_range + end + end + end + + start_range, end_range, counter_data = perf_capture_one(target, interval_name, capture_start, capture_end) + if counter_data + all_counter_data.merge!(counter_data) + all_start_range = start_range if all_start_range.nil? || all_start_range > start_range + all_end_range = end_range if all_end_range.nil? || all_end_range < end_range + end + rescue => e + _log.warn("Issue capturing metrics for #{target.class.name}:#{target.id} skipping: #{e.message}") + end + end + + [all_start_range, all_end_range, all_counter_data] + end + + # this method is used for tests + # It gives us an easy point to verify perf_capture requests were made. + def perf_capture_one(target, interval_name, start_time, end_time) + target.just_perf_capture(interval_name, start_time, end_time) + end + + def fix_capture_start_end_time(interval_name, object = nil, start_time = nil, end_time = nil) + start_time = start_time.utc unless start_time.nil? + end_time = end_time.utc unless end_time.nil? + + # Determine the start_time for capturing if not provided + if interval_name == 'historical' || interval_name == 'hourly' + start_time ||= object.last_perf_capture_on if object + # For hourly on the first capture, we don't want to get all of the + # historical data, so we shorten the query + start_time ||= Metric::Capture.historical_start_time + elsif object + start_time ||= object.last_perf_capture_on + else + start_time ||= ems.last_metrics_success_date + end + + if interval_name == 'realtime' && (start_time.nil? || start_time < HOURLY_METRICS_DURATION.ago.utc) + start_time = HOURLY_METRICS_DURATION.ago.utc + end + [start_time, end_time] + end + + def detect_gap(interval_name) + if interval_name == "realtime" && ems.last_metrics_success_date.nil? + gap_start_time, gap_end_time = fix_capture_start_end_time("historical", nil, nil, HOURLY_METRICS_DURATION.ago.utc) + ::Metric::Capture.perf_capture_gap_queue(gap_start_time, gap_end_time, ems) + end + end + + # queue perf processing. + # @param target [ExtManagementSystem, Storage] the object that receives the message. + # @param method_name [String] "perf_process" for all but Storage, it uses "perf_capture_*" + # @param interval_name [String] + # @param rollup_id [String] The EmsCluster id if this is a Host and a rollup record is to be generated + def perf_process_queue(target, method_name, interval_name, start_time, end_time, counters_data = nil, rollup_id = nil) + if rollup_id + miq_callback = { + :class_name => "EmsCluster", + :instance_id => rollup_id, + :method_name => "perf_rollup_range_cb", + :zone => ems.zone_name, + :role => 'ems_metrics_processor', + :queue_name => 'ems_metrics_processor', + :args => [start_time, end_time, interval_name, nil] + } + end + MiqQueue.put( + :class_name => target.class.name, + :method_name => method_name, + :instance_id => target.id, + :zone => ems.zone_name, + :role => 'ems_metrics_processor', + :queue_name => 'ems_metrics_processor', + :priority => MiqQueue::NORMAL_PRIORITY, + :args => [interval_name, start_time, end_time], + :data => counters_data, + :miq_callback => miq_callback + ) end end diff --git a/app/models/metric/capture.rb b/app/models/metric/capture.rb index b4de2ba7cc1..c644926161f 100644 --- a/app/models/metric/capture.rb +++ b/app/models/metric/capture.rb @@ -31,201 +31,47 @@ def self.concurrent_requests(interval_name) requests end - def self.standard_capture_threshold(target) - target_key = target.class.base_model.to_s.underscore.to_sym - minutes_ago(::Settings.performance.capture_threshold[target_key] || - ::Settings.performance.capture_threshold.default) - end + # legacy messages on the queue + # went away in 4.6 def self.perf_capture_timer(zone = nil) - _log.info("Queueing performance capture...") - - zone ||= MiqServer.my_server.zone - perf_capture_health_check(zone) - targets = Metric::Targets.capture_targets(zone) - - targets_by_rollup_parent = calc_targets_by_rollup_parent(targets) - target_options = calc_target_options(zone, targets_by_rollup_parent) - targets = filter_perf_capture_now(targets, target_options) - queue_captures(targets, target_options) - - # Purge tasks older than 4 hours - MiqTask.delete_older(4.hours.ago.utc, "name LIKE 'Performance rollup for %'") - - _log.info("Queueing performance capture...Complete") - end - - def self.perf_capture_gap(start_time, end_time, zone_id = nil) - _log.info("Queueing performance capture for range: [#{start_time} - #{end_time}]...") - - zone = Zone.find(zone_id) if zone_id - targets = Metric::Targets.capture_targets(zone, :exclude_storages => true) - targets.each { |target| target.perf_capture_queue('historical', :start_time => start_time, :end_time => end_time, :zone => zone) } - - _log.info("Queueing performance capture for range: [#{start_time} - #{end_time}]...Complete") end - def self.perf_capture_gap_queue(start_time, end_time, zone = nil) - item = { - :class_name => name, - :method_name => "perf_capture_gap", - :role => "ems_metrics_coordinator", - :priority => MiqQueue::HIGH_PRIORITY, - :args => [start_time, end_time, zone.try(:id)] - } - item[:zone] = zone.name if zone - - MiqQueue.put(item) - end - - def self.filter_perf_capture_now(targets, target_options) - targets.select do |target| - options = target_options[target] - # [:force] is set if we already determined this target needs perf capture - if options[:force] || perf_capture_now?(target) - true - else - _log.debug do - "Skipping capture of #{target.log_target} -" + - "Performance last captured on [#{target.last_perf_capture_on}] is within threshold" - end - false - end - end - end - - # if it has not been run, or it was a very long time ago, just run it - # if it has been run very recently (even too recently for realtime) then skip it - # otherwise, it needs to be run if it is realtime, but not if it is standard threshold - # assumes alert capture threshold <= standard capture threshold - def self.perf_capture_now?(target) - target.last_perf_capture_on.nil? || - target.last_perf_capture_on < standard_capture_threshold(target) - end - - # - # Capture entry points - # - - def self.perf_capture_health_check(zone) - q_items = MiqQueue.select(:method_name, :created_on).order(:created_on) - .where(:state => "ready", - :role => "ems_metrics_collector", - :method_name => %w(perf_capture perf_capture_realtime perf_capture_hourly perf_capture_historical), - :zone => zone.name) - items_by_interval = q_items.group_by(&:method_name) - items_by_interval.reverse_merge!("perf_capture_realtime" => [], "perf_capture_hourly" => [], "perf_capture_historical" => []) - items_by_interval.each do |method_name, items| - interval = method_name.sub("perf_capture_", "") - msg = "#{items.length} #{interval.inspect} captures on the queue for zone [#{zone.name}]" - msg << " - oldest: [#{items.first.created_on.utc.iso8601}], recent: [#{items.last.created_on.utc.iso8601}]" if items.length > 0 - _log.info(msg) - end - end - private_class_method :perf_capture_health_check - - # Collect realtime targets and group them by their rollup parent - # - # 1. Only calculate rollups for Hosts - # 2. Some Hosts have an EmsCluster as a parent, others have none. - # 3. Only Hosts with a parent are rolled up. - # @param [Array] @targets The nodes to rollup - # @option options :force Force capture if this node is a host - # @returns Hash> - # e.g.: {"EmsCluster:4"=>[Host:4], "EmsCluster:5"=>[Host:1, Host:2]} - def self.calc_targets_by_rollup_parent(targets, options = {}) - realtime_targets = targets.select do |target| - target.kind_of?(Host) && - perf_target_to_interval_name(target) == "realtime" && - (options[:force] || perf_capture_now?(target)) - end - realtime_targets.each_with_object({}) do |target, h| - target.perf_rollup_parents("realtime").to_a.compact.each do |parent| - pkey = "#{parent.class}:#{parent.id}" - (h[pkey] ||= []) << target - end - end - end - - # Determine queue options for each target - # Is only generating options for Vmware Hosts, which have a task for rollups. - # The rest just set the zone - def self.calc_target_options(zone, targets_by_rollup_parent) - task_end_time = Time.now.utc.iso8601 - default_task_start_time = 1.hour.ago.utc.iso8601 - - target_options = Hash.new { |h, k| h[k] = {:zone => zone} } - # Create a new task for each rollup parent - # mark each target with the rollup parent - targets_by_rollup_parent.each_with_object(target_options) do |(pkey, targets), h| - name = "Performance rollup for #{pkey}" - prev_task = MiqTask.where(:identifier => pkey).order("id DESC").first - task_start_time = prev_task ? prev_task.context_data[:end] : default_task_start_time - - task = MiqTask.create( - :name => name, - :identifier => pkey, - :state => MiqTask::STATE_QUEUED, - :status => MiqTask::STATUS_OK, - :message => "Task has been queued", - :context_data => { - :start => task_start_time, - :end => task_end_time, - :parent => pkey, - :targets => targets.map { |target| "#{target.class}:#{target.id}" }, - :complete => [], - :interval => "realtime" - } + def self.perf_collect_all_metrics_queue(emses, interval_name = "realtime", start_time = nil, end_time = nil, options = {}) + emses.each do |ems| + MiqQueue.put_unless_exists( + :class_name => "Metric::Capture", + :method_name => "perf_collect_all_metrics", + :args => [ems.id, interval_name, start_time, end_time, options], + :role => "ems_metrics_collector", + :queue_name => ems.metrics_collector_queue_name, + :zone => ems.zone_name, + :priority => MiqQueue::HIGH_PRIORITY, + :state => %w(ready dequeue), ) - _log.info("Created task id: [#{task.id}] for: [#{pkey}] with targets: #{targets_by_rollup_parent[pkey].inspect} for time range: [#{task_start_time} - #{task_end_time}]") - targets.each do |target| - h[target] = { - :task_id => task.id, - :force => true, # Force collection since we've already verified that capture should be done now - :zone => zone, - } - end end end - private_class_method :calc_target_options - def self.queue_captures(targets, target_options) - # Queue the captures for each target - use_historical = historical_days != 0 - - targets.each do |target| - interval_name = perf_target_to_interval_name(target) - - options = target_options[target] - - begin - target.perf_capture_queue(interval_name, options) - if !target.kind_of?(Storage) && use_historical && target.last_perf_capture_on.nil? - target.perf_capture_queue('historical') - end - rescue => err - _log.warn("Failed to queue perf_capture for target [#{target.class.name}], [#{target.id}], [#{target.name}]: #{err}") - end - end + def self.perf_collect_all_metrics(ems, interval_name = "realtime", start_time = nil, end_time = nil, options = {}) + ems = ExtManagementSystem.find(ems) unless ems.kind_of?(ExtManagementSystem) + klass = ems.class::MetricsCapture + klass.new(nil, ems).perf_collect_all_metrics(interval_name, start_time, end_time, options) end - private_class_method :queue_captures - def self.perf_target_to_interval_name(target) - case target - when Host, VmOrTemplate then "realtime" - when ContainerNode, Container, ContainerGroup then "realtime" - when Storage then "hourly" - end - end - private_class_method :perf_target_to_interval_name - - def self.minutes_ago(value) - if value.kind_of?(Integer) # Default unit is minutes - value.minutes.ago.utc - elsif value.nil? - nil - else - value.to_i_with_method.seconds.ago.utc - end + def self.perf_capture_gap(start_time, end_time, zone_id = nil) + perf_capture_gap_queue(start_time, end_time, zone_id) + end + + # called by ui + # run a perf capture zone for a zone or ems + # @param zone_or_ems [Zone, Integer, ExtManagementSystem] Legacy passes zone (or zone_id), but we're moving to ems + def self.perf_capture_gap_queue(start_time, end_time, zone_or_ems = nil) + emses = if zone_or_ems.kind_of?(ExtManagementSystem) + [zone_or_ems] + else + zone_or_ems = Zone.find(zone_or_ems) if zone_or_ems && !zone_or_ems.kind_of?(Zone) + zone_or_ems ||= MiqServer.my_server(true).zone + zone_or_ems.ext_management_systems + end + perf_collect_all_metrics_queue(emses, "historical", start_time, end_time, :exclude_storages => true) end - private_class_method :minutes_ago end diff --git a/app/models/metric/ci_mixin/capture.rb b/app/models/metric/ci_mixin/capture.rb index 76ef8bd993b..9ca4d35bc28 100644 --- a/app/models/metric/ci_mixin/capture.rb +++ b/app/models/metric/ci_mixin/capture.rb @@ -113,18 +113,26 @@ def perf_capture_queue(interval_name, options = {}) end end + # legacy miq_queue messages + # went away in 4.6 def perf_capture_realtime(*args) perf_capture('realtime', *args) end + # legacy miq_queue messages + # went away in 4.6 def perf_capture_hourly(*args) perf_capture('hourly', *args) end + # legacy miq_queue messages + # went away in 4.6 def perf_capture_historical(*args) perf_capture('historical', *args) end + # legacy miq_queue messages + # went away in 4.6 def perf_capture(interval_name, start_time = nil, end_time = nil) unless Metric::Capture::VALID_CAPTURE_INTERVALS.include?(interval_name) raise ArgumentError, _("invalid interval_name '%{name}'") % {:name => interval_name} @@ -232,6 +240,8 @@ def just_perf_capture(interval_name, start_time = nil, end_time = nil) [start_range, end_range, counters_data] end + # legacy miq_queue messages + # went away in 4.6 def perf_capture_callback(task_ids, _status, _message, _result) tasks = MiqTask.where(:id => task_ids) tasks.each do |t| diff --git a/app/models/metric/ci_mixin/rollup.rb b/app/models/metric/ci_mixin/rollup.rb index 723b73b17fd..6ce6cbdde23 100644 --- a/app/models/metric/ci_mixin/rollup.rb +++ b/app/models/metric/ci_mixin/rollup.rb @@ -111,11 +111,15 @@ def perf_rollup(time, interval_name, time_profile = nil) _log.info("#{log_header}...Complete - Timings: #{t.inspect}") end + def perf_rollup_range_cb(start_time, end_time, interval_name, time_profile, _status, _message, _result) + perf_rollup_range(start_time, end_time, interval_name, time_profile) + end + def perf_rollup_range(start_time, end_time, interval_name, time_profile = nil) times = case interval_name when 'realtime' Metric::Helper.realtime_timestamps_from_range(start_time, end_time) - when 'hourly' + when 'hourly', 'historical' Metric::Helper.hours_from_range(start_time, end_time) when 'daily' raise ArgumentError, _("time_profile must be passed if interval name is 'daily'") if time_profile.nil? diff --git a/app/models/miq_schedule_worker/jobs.rb b/app/models/miq_schedule_worker/jobs.rb index 39aa972abe2..18942fffacc 100644 --- a/app/models/miq_schedule_worker/jobs.rb +++ b/app/models/miq_schedule_worker/jobs.rb @@ -68,14 +68,8 @@ def storage_scan_timer def metric_capture_perf_capture_timer zone = MiqServer.my_server(true).zone - if zone.role_active?("ems_metrics_coordinator") - queue_work( - :class_name => "Metric::Capture", - :method_name => "perf_capture_timer", - :role => "ems_metrics_coordinator", - :priority => MiqQueue::HIGH_PRIORITY, - :state => ["ready", "dequeue"] - ) + if zone.role_active?("ems_metrics_collector") + Metric::Capture.perf_collect_all_metrics_queue(zone.ext_management_systems, "realtime") end end diff --git a/config/settings.yml b/config/settings.yml index 8eee01e3b05..f7b85d4d9d5 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -921,15 +921,6 @@ - 1.pool.ntp.org - 2.pool.ntp.org :performance: - :capture_threshold: - :default: 10.minutes - :ems_cluster: 50.minutes - :host: 50.minutes - :storage: 60.minutes - :vm: 50.minutes - :container: 50.minutes - :container_group: 50.minutes - :container_node: 50.minutes :concurrent_requests: :historical: 1 :hourly: 1 diff --git a/spec/models/metric/capture_spec.rb b/spec/models/metric/capture_spec.rb index 9768079f2c2..de88da77f88 100644 --- a/spec/models/metric/capture_spec.rb +++ b/spec/models/metric/capture_spec.rb @@ -1,74 +1,89 @@ describe Metric::Capture do - describe ".standard_capture_threshold" do - let(:host) { FactoryGirl.build(:host_vmware) } + let(:zone) { FactoryGirl.create(:zone) } + let(:ems) { FactoryGirl.create(:ems_vmware) } + let(:ems2) { FactoryGirl.create(:ems_vmware, :zone => zone) } + let(:ems3) { FactoryGirl.create(:ems_vmware, :zone => zone) } - it "parses fixed num" do - stub_performance_settings(:capture_threshold => {:host => 4}) - Timecop.freeze(Time.now.utc) do - expect(described_class.standard_capture_threshold(host)).to eq 4.minutes.ago.utc - end - end - - it "parses string" do - stub_performance_settings(:capture_threshold => {:host => "4.minutes"}) - Timecop.freeze(Time.now.utc) do - expect(described_class.standard_capture_threshold(host)).to eq 4.minutes.ago.utc - end - end + describe ".perf_collect_all_metrics_queue" do + it "submits with options" do + expect_queued( + :class_name => "Metric::Capture", + :method_name => "perf_collect_all_metrics", + :args => [ems.id, "realtime", nil, nil, :exclude_storage => true], + :zone => ems.zone_name + ) - it "produces default with class not found" do - stub_performance_settings(:capture_threshold => {:vm => "4.minutes", - :default => "10.minutes"}) - Timecop.freeze(Time.now.utc) do - expect(described_class.standard_capture_threshold(host)).to eq 10.minutes.ago.utc - end + Metric::Capture.perf_collect_all_metrics_queue([ems], "realtime", nil, nil, :exclude_storage => true) end - end - context ".perf_capture_health_check" do - let(:miq_server) { EvmSpecHelper.local_miq_server } - let(:ems) { FactoryGirl.create(:ems_vmware, :zone => miq_server.zone) } - let(:vm) { FactoryGirl.create(:vm_perf, :ext_management_system => ems) } - let(:vm2) { FactoryGirl.create(:vm_perf, :ext_management_system => ems) } + it "submits multiple ems" do + ems2 # other ems is not used - it "should queue up realtime capture for vm" do - vm.perf_capture_realtime_now - vm2.perf_capture_realtime_now - expect(MiqQueue.count).to eq(2) + expect_queued( + :class_name => "Metric::Capture", + :method_name => "perf_collect_all_metrics", + :args => [ems.id, "realtime", nil, nil, {}], + ) + expect_queued( + :class_name => "Metric::Capture", + :method_name => "perf_collect_all_metrics", + :args => [ems2.id, "realtime", nil, nil, {}], + ) - expect(Metric::Capture._log).to receive(:info).with(/2 "realtime" captures on the queue.*oldest:.*recent:/) - expect(Metric::Capture._log).to receive(:info).with(/0 "hourly" captures on the queue/) - expect(Metric::Capture._log).to receive(:info).with(/0 "historical" captures on the queue/) - described_class.send(:perf_capture_health_check, miq_server.zone) + Metric::Capture.perf_collect_all_metrics_queue([ems, ems2], "realtime") end end - describe ".perf_capture_now?" do - before do - stub_performance_settings(:capture_threshold => {:host => "10.minutes"}) - end - - let(:target) { FactoryGirl.build(:host_vmware) } + describe ".perf_capture_gap_queue" do + context "with default zone" do + it "queues ems" do + EvmSpecHelper.local_miq_server + ems # default zone is used + ems2 + ems3 - context "with an alertless host" do - it "captures if the target has never been captured" do - target.last_perf_capture_on = nil - expect(described_class.perf_capture_now?(target)).to eq(true) + expect_queued( + :class_name => "Metric::Capture", + :method_name => "perf_collect_all_metrics", + :args => [ems.id, "historical", nil, nil, :exclude_storages => true], + ) + Metric::Capture.perf_capture_gap_queue(nil, nil) end + end + + context "with a zone.id" do + it "queues multiple ems" do + ems + ems2 # zone declared is used + ems3 - it "does not captures if the target has been captured very recently" do - target.last_perf_capture_on = 1.minute.ago - expect(described_class.perf_capture_now?(target)).to eq(false) + expect_queued( + :class_name => "Metric::Capture", + :method_name => "perf_collect_all_metrics", + :args => [ems2.id, "historical", nil, nil, :exclude_storages => true], + ) + expect_queued( + :class_name => "Metric::Capture", + :method_name => "perf_collect_all_metrics", + :args => [ems3.id, "historical", nil, nil, :exclude_storages => true], + ) + Metric::Capture.perf_capture_gap_queue(nil, nil, zone.id) end + end - it "captures if the target hasn't been captured in a long while" do - target.last_perf_capture_on = 15.minutes.ago - expect(described_class.perf_capture_now?(target)).to eq(true) + context "with an ems" do + it "queues an ems" do + expect_queued( + :class_name => "Metric::Capture", + :method_name => "perf_collect_all_metrics", + :args => [ems.id, "historical", nil, nil, :exclude_storages => true], + ) + Metric::Capture.perf_capture_gap_queue(nil, nil, ems) end end end - def stub_performance_settings(hash) - stub_settings(:performance => hash) + def expect_queued(args) + expect(MiqQueue).to receive(:put_unless_exists).with(hash_including(args)) end end diff --git a/spec/models/metric/ci_mixin/capture_spec.rb b/spec/models/metric/ci_mixin/capture_spec.rb index c0711a91976..79222dd3f9d 100644 --- a/spec/models/metric/ci_mixin/capture_spec.rb +++ b/spec/models/metric/ci_mixin/capture_spec.rb @@ -316,45 +316,6 @@ def verify_perf_capture_queue(last_perf_capture_on, total_queue_items) end end - it "when last_perf_capture_on is nil (first time)" do - MiqQueue.delete_all - Timecop.freeze do - Timecop.travel(Time.now.end_of_day - 6.hours) - verify_perf_capture_queue(nil, 1) - Timecop.travel(Time.now + 20.minutes) - verify_perf_capture_queue(nil, 1) - end - end - - it "when last_perf_capture_on is very old (older than the realtime_cut_off of 4.hours.ago)" do - MiqQueue.delete_all - Timecop.freeze do - Timecop.travel(Time.now.end_of_day - 6.hours) - verify_perf_capture_queue((10.days + 5.hours + 23.minutes).ago, 11) - end - end - - it "when last_perf_capture_on is recent (before the realtime_cut_off of 4.hours.ago)" do - MiqQueue.delete_all - Timecop.freeze do - Timecop.travel(Time.now.end_of_day - 6.hours) - verify_perf_capture_queue((0.days + 2.hours + 5.minutes).ago, 1) - end - end - - it "is able to handle multiple attempts to queue perf_captures and not add new items" do - MiqQueue.delete_all - Timecop.freeze do - # set a specific time of day to avoid sporadic test failures that fall on the exact right time to bump the - # queue items to 12 instead of 11 - current_time = Time.now.end_of_day - 6.hours - Timecop.travel(current_time) - last_perf_capture_on = (10.days + 5.hours + 23.minutes).ago - verify_perf_capture_queue(last_perf_capture_on, 11) - Timecop.travel(current_time + 20.minutes) - verify_perf_capture_queue(last_perf_capture_on, 11) - end - end end describe "#perf_capture_queue('historical')" do @@ -365,72 +326,9 @@ def verify_perf_capture_queue_historical(last_perf_capture_on, total_queue_items expect(MiqQueue.count).to eq total_queue_items end - it "when last_perf_capture_on is nil(first time)" do - MiqQueue.delete_all - Timecop.freeze do - allow(Metric::Capture).to receive(:historical_days).and_return(7) - current_time = Time.now.end_of_day - 6.hours - Timecop.travel(current_time) - verify_perf_capture_queue_historical(nil, 8) - Timecop.travel(current_time + 20.minutes) - verify_perf_capture_queue_historical(nil, 8) - end - end - - it "when last_perf_capture_on is very old" do - MiqQueue.delete_all - Timecop.freeze do - # set a specific time of day to avoid sporadic test failures that fall on the exact right time to bump the - # queue items to 12 instead of 11 - allow(Metric::Capture).to receive(:historical_days).and_return(7) - current_time = Time.now.end_of_day - 6.hours - last_capture_on = (10.days + 5.hours + 23.minutes).ago - Timecop.travel(current_time) - verify_perf_capture_queue_historical(last_capture_on, 8) - Timecop.travel(current_time + 20.minutes) - verify_perf_capture_queue_historical(last_capture_on, 8) - end - end - - it "when last_perf_capture_on is fairly recent" do - MiqQueue.delete_all - Timecop.freeze do - # set a specific time of day to avoid sporadic test failures that fall on the exact right time to bump the - # queue items to 12 instead of 11 - allow(Metric::Capture).to receive(:historical_days).and_return(7) - current_time = Time.now.end_of_day - 6.hours - last_capture_on = (10.days + 5.hours + 23.minutes).ago - Timecop.travel(current_time) - verify_perf_capture_queue_historical(last_capture_on, 8) - Timecop.travel(current_time + 20.minutes) - verify_perf_capture_queue_historical(last_capture_on, 8) - end - end end end context "handles archived container entities" do - it "get the correct queue name and zone from archived container entities" do - ems = FactoryGirl.create(:ems_openshift, :name => 'OpenShiftProvider') - group = FactoryGirl.create(:container_group, :name => "group", :ext_management_system => ems) - container = FactoryGirl.create(:container, - :name => "container", - :container_group => group, - :ext_management_system => ems) - project = FactoryGirl.create(:container_project, - :name => "project", - :ext_management_system => ems) - container.disconnect_inv - group.disconnect_inv - project.disconnect_inv - - expect(container.ems_for_capture_target).to eq ems - expect(group.ems_for_capture_target).to eq ems - expect(project.ems_for_capture_target).to eq ems - - expect(container.my_zone).to eq ems.my_zone - expect(group.my_zone).to eq ems.my_zone - expect(project.my_zone).to eq ems.my_zone - end end end diff --git a/spec/models/metric_spec.rb b/spec/models/metric_spec.rb index 3f962f337ae..46bd93ea6f8 100644 --- a/spec/models/metric_spec.rb +++ b/spec/models/metric_spec.rb @@ -1,8 +1,7 @@ describe Metric do before(:each) do MiqRegion.seed - - _guid, _server, @zone = EvmSpecHelper.create_guid_miq_server_zone + @zone = EvmSpecHelper.local_miq_server.zone end context "as vmware" do @@ -10,233 +9,6 @@ @ems_vmware = FactoryGirl.create(:ems_vmware, :zone => @zone) end - context "with enabled and disabled targets" do - before(:each) do - storages = [] - 2.times { storages << FactoryGirl.create(:storage_target_vmware) } - - @vmware_clusters = [] - 2.times do - cluster = FactoryGirl.create(:cluster_target) - @vmware_clusters << cluster - @ems_vmware.ems_clusters << cluster - end - - 6.times do |n| - host = FactoryGirl.create(:host_target_vmware, :ext_management_system => @ems_vmware) - @ems_vmware.hosts << host - - @vmware_clusters[n / 2].hosts << host if n < 4 - host.storages << storages[n / 3] - end - - MiqQueue.delete_all - end - - context "executing capture_targets" do - it "should find enabled targets" do - targets = Metric::Targets.capture_targets - assert_infra_targets_enabled targets, %w(ManageIQ::Providers::Vmware::InfraManager::Vm ManageIQ::Providers::Vmware::InfraManager::Host ManageIQ::Providers::Vmware::InfraManager::Host ManageIQ::Providers::Vmware::InfraManager::Vm ManageIQ::Providers::Vmware::InfraManager::Host Storage) - end - - it "should find enabled targets excluding storages" do - targets = Metric::Targets.capture_targets(nil, :exclude_storages => true) - assert_infra_targets_enabled targets, %w(ManageIQ::Providers::Vmware::InfraManager::Vm ManageIQ::Providers::Vmware::InfraManager::Host ManageIQ::Providers::Vmware::InfraManager::Host ManageIQ::Providers::Vmware::InfraManager::Vm ManageIQ::Providers::Vmware::InfraManager::Host) - end - end - - context "executing perf_capture_timer" do - before(:each) do - stub_settings_merge(:performance => {:history => {:initial_capture_days => 7}}) - Metric::Capture.perf_capture_timer - end - - let(:expected_queue_items) do - { - %w(ManageIQ::Providers::Vmware::InfraManager::Host perf_capture_realtime) => 3, - %w(ManageIQ::Providers::Vmware::InfraManager::Host perf_capture_historical) => 24, - %w(Storage perf_capture_hourly) => 1, - %w(ManageIQ::Providers::Vmware::InfraManager::Vm perf_capture_realtime) => 2, - %w(ManageIQ::Providers::Vmware::InfraManager::Vm perf_capture_historical) => 16, - %w(MiqTask destroy_older_by_condition) => 1 - } - end - - it "should queue up enabled targets" do - expect(MiqQueue.group(:class_name, :method_name).count).to eq(expected_queue_items) - assert_metric_targets - end - - context "executing capture_targets for realtime targets with parent objects" do - before(:each) do - @expected_targets = Metric::Targets.capture_targets - end - - it "should create tasks and queue callbacks" do - @vmware_clusters.each do |cluster| - expected_hosts = cluster.hosts.select { |h| @expected_targets.include?(h) } - next if expected_hosts.empty? - - task = MiqTask.find_by(:name => "Performance rollup for EmsCluster:#{cluster.id}") - expect(task).not_to be_nil - expect(task.context_data[:targets]).to match_array(cluster.hosts.collect { |h| "ManageIQ::Providers::Vmware::InfraManager::Host:#{h.id}" }) - - expected_hosts.each do |host| - messages = MiqQueue.where(:class_name => 'ManageIQ::Providers::Vmware::InfraManager::Host', - :instance_id => host.id, - :method_name => "perf_capture_realtime") - expect(messages.size).to eq(1) - messages.each do |m| - expect(m.miq_callback).not_to be_nil - expect(m.miq_callback[:method_name]).to eq(:perf_capture_callback) - expect(m.miq_callback[:args]).to eq([[task.id]]) - - m.delivered("ok", "Message delivered successfully", nil) - end - end - - task.reload - expect(task.state).to eq("Finished") - - message = MiqQueue.find_by(:method_name => "perf_rollup_range", :class_name => "EmsCluster", :instance_id => cluster.id) - expect(message).not_to be_nil - expect(message.args).to eq([task.context_data[:start], task.context_data[:end], task.context_data[:interval], nil]) - end - end - - it "calling perf_capture_timer when existing capture messages are on the queue should merge messages and append new task id to cb args" do - Metric::Capture.perf_capture_timer - @vmware_clusters.each do |cluster| - expected_hosts = cluster.hosts.select { |h| @expected_targets.include?(h) } - next if expected_hosts.empty? - - tasks = MiqTask.where(:name => "Performance rollup for EmsCluster:#{cluster.id}").order("id DESC") - expect(tasks.length).to eq(2) - tasks.each do |task| - expect(task.context_data[:targets]).to match_array(cluster.hosts.collect { |h| "ManageIQ::Providers::Vmware::InfraManager::Host:#{h.id}" }) - end - - task_ids = tasks.collect(&:id) - - expected_hosts.each do |host| - messages = MiqQueue.where(:class_name => 'ManageIQ::Providers::Vmware::InfraManager::Host', - :instance_id => host.id, - :method_name => "perf_capture_realtime") - expect(messages.size).to eq(1) - host.update_attribute(:last_perf_capture_on, 1.minute.from_now.utc) - messages.each do |m| - next if m.miq_callback[:args].blank? - - expect(m.miq_callback).not_to be_nil - expect(m.miq_callback[:method_name]).to eq(:perf_capture_callback) - expect(m.miq_callback[:args].first.sort).to eq(task_ids.sort) - - status, message, result = m.deliver - m.delivered(status, message, result) - end - end - - tasks.each do |task| - task.reload - expect(task.state).to eq("Finished") - end - end - end - - it "calling perf_capture_timer when existing capture messages are on the queue in dequeue state should NOT merge" do - messages = MiqQueue.where(:class_name => "Host", :method_name => 'capture_metrics_realtime') - messages.each { |m| m.update_attribute(:state, "dequeue") } - - Metric::Capture.perf_capture_timer - - messages = MiqQueue.where(:class_name => "Host", :method_name => 'capture_metrics_realtime') - messages.each { |m| expect(m.lock_version).to eq(1) } - end - - it "calling perf_capture_timer a second time should create another task with the correct time window" do - Metric::Capture.perf_capture_timer - - @vmware_clusters.each do |cluster| - expected_hosts = cluster.hosts.select { |h| @expected_targets.include?(h) } - next if expected_hosts.empty? - - tasks = MiqTask.where(:name => "Performance rollup for EmsCluster:#{cluster.id}").order("id") - expect(tasks.length).to eq(2) - - t1, t2 = tasks - expect(t2.context_data[:start]).to eq(t1.context_data[:end]) - end - end - end - end - - context "executing perf_capture_gap" do - before(:each) do - t = Time.now.utc - Metric::Capture.perf_capture_gap(t - 7.days, t - 5.days) - end - - it "should queue up enabled targets for historical" do - expect(MiqQueue.count).to eq(10) - - expected_targets = Metric::Targets.capture_targets(nil, :exclude_storages => true) - expected = expected_targets.flat_map { |t| [[t, "historical"]] * 2 } # Vm, Host, Host, Vm, Host - - selected = queue_intervals(MiqQueue.all) - - expect(selected).to match_array(expected) - end - end - - context "executing perf_capture_realtime_now" do - before(:each) do - @vm = Vm.first - @vm.perf_capture_realtime_now - end - - it "should queue up realtime capture for vm" do - expect(MiqQueue.count).to eq(1) - - msg = MiqQueue.first - expect(msg.priority).to eq(MiqQueue::HIGH_PRIORITY) - expect(msg.instance_id).to eq(@vm.id) - expect(msg.class_name).to eq("ManageIQ::Providers::Vmware::InfraManager::Vm") - end - - context "with an existing queue item at a lower priority" do - before(:each) do - MiqQueue.first.update_attribute(:priority, MiqQueue::NORMAL_PRIORITY) - @vm.perf_capture_realtime_now - end - - it "should raise the priority of the existing queue item" do - expect(MiqQueue.count).to eq(1) - - msg = MiqQueue.first - expect(msg.priority).to eq(MiqQueue::HIGH_PRIORITY) - expect(msg.instance_id).to eq(@vm.id) - expect(msg.class_name).to eq("ManageIQ::Providers::Vmware::InfraManager::Vm") - end - end - - context "with an existing queue item at a higher priority" do - before(:each) do - MiqQueue.first.update_attribute(:priority, MiqQueue::MAX_PRIORITY) - @vm.perf_capture_realtime_now - end - - it "should not lower the priority of the existing queue item" do - expect(MiqQueue.count).to eq(1) - - msg = MiqQueue.first - expect(msg.priority).to eq(MiqQueue::MAX_PRIORITY) - expect(msg.instance_id).to eq(@vm.id) - expect(msg.class_name).to eq("ManageIQ::Providers::Vmware::InfraManager::Vm") - end - end - end - end - context "with a vm" do before(:each) do @vm = FactoryGirl.create(:vm_perf, :ext_management_system => @ems_vmware) @@ -267,16 +39,6 @@ end end - context "executing perf_capture_now?" do - before(:each) do - stub_settings(:performance => {:capture_threshold => {:vm => 10}}) - end - - it "without alerts assigned" do - assert_perf_capture_now @vm - end - end - context "services" do let(:service) { FactoryGirl.create(:service) } @@ -989,44 +751,6 @@ @ems_openstack = FactoryGirl.create(:ems_openstack, :zone => @zone) end - context "with enabled and disabled targets" do - before(:each) do - @availability_zone = FactoryGirl.create(:availability_zone_target) - @ems_openstack.availability_zones << @availability_zone - @vms_in_az = [] - 2.times { @vms_in_az << FactoryGirl.create(:vm_openstack, :ems_id => @ems_openstack.id) } - @availability_zone.vms = @vms_in_az - @availability_zone.vms.push(FactoryGirl.create(:vm_openstack, :ems_id => nil)) - - @vms_not_in_az = [] - 3.times { @vms_not_in_az << FactoryGirl.create(:vm_openstack, :ems_id => @ems_openstack.id) } - - MiqQueue.delete_all - end - - context "executing capture_targets" do - it "should find enabled targets" do - targets = Metric::Targets.capture_targets - assert_cloud_targets_enabled targets, %w(ManageIQ::Providers::Openstack::CloudManager::Vm ManageIQ::Providers::Openstack::CloudManager::Vm ManageIQ::Providers::Openstack::CloudManager::Vm ManageIQ::Providers::Openstack::CloudManager::Vm ManageIQ::Providers::Openstack::CloudManager::Vm) - end - end - - context "executing perf_capture_timer" do - before(:each) do - stub_settings(:performance => {:history => {:initial_capture_days => 7}}) - Metric::Capture.perf_capture_timer - end - - it "should queue up enabled targets" do - expected_targets = Metric::Targets.capture_targets - expect(MiqQueue.group(:method_name).count).to eq('perf_capture_realtime' => expected_targets.count, - 'perf_capture_historical' => expected_targets.count * 8, - 'destroy_older_by_condition' => 1) - assert_metric_targets(expected_targets) - end - end - end - context "with a vm" do before(:each) do @vm = FactoryGirl.create(:vm_perf_openstack, :ext_management_system => @ems_openstack) @@ -1247,36 +971,4 @@ def assert_cloud_targets_enabled(targets, expected_types) expect(selected_types).to match_array(expected_types) end - - def assert_perf_capture_now(target) - Timecop.freeze(Time.now) do - target.update_attribute(:last_perf_capture_on, nil) - expect(Metric::Capture.perf_capture_now?(target)).to be_truthy - - target.update_attribute(:last_perf_capture_on, Time.now.utc - 15.minutes) - expect(Metric::Capture.perf_capture_now?(target)).to be_truthy - - target.update_attribute(:last_perf_capture_on, Time.now.utc - 1.minutes) - expect(Metric::Capture.perf_capture_now?(target)).not_to be_truthy - end - end - - def assert_metric_targets(expected_targets = Metric::Targets.capture_targets) - expected = expected_targets.flat_map do |t| - # Storage is hourly only - # Non-storage historical is expecting 7 days back, plus partial day = 8 - t.kind_of?(Storage) ? [[t, "hourly"]] : [[t, "realtime"]] + [[t, "historical"]] * 8 - end - selected = queue_intervals( - MiqQueue.where(:method_name => %w(perf_capture_hourly perf_capture_realtime perf_capture_historical))) - - expect(selected).to match_array(expected) - end - - def queue_intervals(items) - items.map do |q| - interval_name = q.method_name.sub("perf_capture_", "") - [Object.const_get(q.class_name).find(q.instance_id), interval_name] - end - end end