Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check for timed out active tasks #15231

Merged
merged 6 commits into from
May 31, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions app/models/miq_schedule_worker/jobs.rb
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,10 @@ def generate_chargeback_for_service(args = {})
queue_work(:class_name => "Service", :method_name => "queue_chargeback_reports", :zone => nil, :args => args)
end

def check_for_timed_out_active_tasks
queue_work(:class_name => "MiqTask", :method_name => "update_status_for_timed_out_active_tasks", :zone => nil)
end

private

def queue_work(options)
Expand Down
13 changes: 12 additions & 1 deletion app/models/miq_schedule_worker/runner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -234,11 +234,22 @@ def schedules_for_scheduler_role
end
end

# run run chargeback generation every day at specific time
# run chargeback generation every day at specific time
schedule_chargeback_report_for_service_daily

schedule_check_for_task_timeout

@schedules[:scheduler]
end

def schedule_check_for_task_timeout
every = worker_settings[:task_timeout_check_frequency]
scheduler = scheduler_for(:scheduler)
scheduler.schedule_every(every, :first_at => Time.current + 1.minute) do
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@yrudman have you tested that rufus supports ActiveSupport::TimeWithZone?

irb(main):003:0> Time.current.class
=> ActiveSupport::TimeWithZone

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i've done manual testing: leaving active records in DB and restarting server - works

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

>> require 'rufus-scheduler'
>> s = Rufus::Scheduler.new
>> n = Time.current
=> Fri, 26 May 2017 15:50:17 UTC +00:00
>> p [ :scheduled_at, n, n.to_f ]
[:scheduled_at, Fri, 26 May 2017 15:50:17 UTC +00:00, 1495813817.563702]
=> [:scheduled_at, Fri, 26 May 2017 15:50:17 UTC +00:00, 1495813817.563702]

enqueue :check_for_timed_out_active_tasks
end
end

def schedule_chargeback_report_for_service_daily
every = worker_settings[:chargeback_generation_interval]
at = worker_settings[:chargeback_generation_time_utc]
Expand Down
11 changes: 11 additions & 0 deletions app/models/miq_task.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,17 @@ class MiqTask < ApplicationRecord
t.grouping(Arel::Nodes::Case.new(t[:state]).when(STATE_FINISHED).then(t[:status]).else(t[:state]))
end)

scope :active, -> { where(:state => STATE_ACTIVE) }
scope :no_associated_job, -> { where.not("id IN (SELECT miq_task_id from jobs)") }
scope :timed_out, -> { where("updated_on < ?", Time.now.utc - ::Settings.task.active_task_timeout.to_i_with_method) }

def self.update_status_for_timed_out_active_tasks
MiqTask.active.timed_out.no_associated_job.find_each do |task|
task.update_status(STATE_FINISHED, STATUS_ERROR,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm still confused by the STATUS_ERROR. We should fix this in a followup so the UI displays timed out tasks.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, i will look at it

"Task [#{task.id}] timed out - not active for more than #{::Settings.task.active_task_timeout}")
end
end

def active?
![STATE_QUEUED, STATE_FINISHED].include?(state)
end
Expand Down
3 changes: 3 additions & 0 deletions config/settings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1129,6 +1129,8 @@
:max_parallel_scans_per_host: 1
:max_qitems_per_scan_request: 0
:watchdog_interval: 1.minute
:task:
:active_task_timeout: 6.hours
:ui:
:mark_translated_strings: false
:webservices:
Expand Down Expand Up @@ -1299,6 +1301,7 @@
:session_timeout_interval: 30.seconds
:storage_file_collection_interval: 1.days
:storage_file_collection_time_utc: 21600
:task_timeout_check_frequency: 1.hour
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know what's are the correct numbers here...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

me too ❓

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

1.hour here seems reasonable. We don't want to be checking too often but, we also don't want to have tasks sitting in an active state for an extended period of time when they are already dead.

:vm_retired_interval: 10.minutes
:yum_update_check: 12.hours
:ui_worker:
Expand Down
11 changes: 11 additions & 0 deletions spec/models/miq_schedule_worker/jobs_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,15 @@
)
end
end

describe "#check_for_timed_out_active_tasks" do
it "enqueues update_status_for_timed_out_active_tasks" do
allow(MiqServer).to receive(:my_zone)
described_class.new.check_for_timed_out_active_tasks
expect(MiqQueue.first).to have_attributes(
:class_name => "MiqTask",
:method_name => "update_status_for_timed_out_active_tasks"
)
end
end
end
25 changes: 23 additions & 2 deletions spec/models/miq_schedule_worker/runner_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -460,15 +460,18 @@
end
end

context "Chargeback reports for Services" do
context "schedule for 'scheduler' role" do
before do
allow(@schedule_worker).to receive(:heartbeat)
@schedule_worker.instance_variable_set(:@active_roles, ["scheduler"])
allow(@schedule_worker).to receive(:worker_settings).and_return(:chargeback_generation_interval => 1.day)
@schedule_worker.instance_variable_set(:@schedules, :scheduler => [])
end

describe "#schedule_chargeback_report_for_service_daily" do
before do
allow(@schedule_worker).to receive(:worker_settings).and_return(:chargeback_generation_interval => 1.day)
end

it "queues daily generation of Chargeback report for each service" do
job = @schedule_worker.schedule_chargeback_report_for_service_daily[0]
expect(job).to be_kind_of(Rufus::Scheduler::EveryJob)
Expand All @@ -484,6 +487,24 @@
job.unschedule
end
end

describe "#schedule_check_for_task_timeout" do
let(:interval) { 1.hour }
before do
allow(@schedule_worker).to receive(:worker_settings).and_return(:task_timeout_check_frequency => interval)
end

it "queues check for timed out tasks" do
job = @schedule_worker.schedule_check_for_task_timeout[0]
job.call
@schedule_worker.do_work
queue = MiqQueue.first
expect(queue.method_name).to eq "update_status_for_timed_out_active_tasks"
expect(queue.class_name).to eq "MiqTask"
MiqQueue.delete_all
job.unschedule
end
end
end
end

Expand Down
64 changes: 64 additions & 0 deletions spec/models/miq_task_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -381,4 +381,68 @@
expect(miq_task.miq_server.id).to eq server.id
end
end

describe ".update_status_for_timed_out_active_tasks" do
let(:timeout) { "1.hour" }
before do
stub_settings(:task => {:active_task_timeout => timeout})
end

context "task does not linked to job" do
let(:miq_task) { FactoryGirl.create(:miq_task_plain) }

context "task is active" do
before do
miq_task.update_attributes(:state => MiqTask::STATE_ACTIVE)
end

it "updates status to 'Error' for timed out task" do
miq_task.update_attributes(:updated_on => miq_task.updated_on - timeout.to_i_with_method)
expect(miq_task.status).not_to eq MiqTask::STATUS_ERROR
MiqTask.update_status_for_timed_out_active_tasks
miq_task.reload
expect(miq_task.status).to eq MiqTask::STATUS_ERROR
end

it "does not update status if task not timed out" do
MiqTask.update_status_for_timed_out_active_tasks
miq_task.reload
expect(miq_task.status).not_to eq MiqTask::STATUS_ERROR
end
end

context "task is not active" do
it "does not update status to 'Error' if task state is 'Finished'" do
miq_task.update_attributes(:state => MiqTask::STATE_FINISHED,
:updated_on => miq_task.updated_on - timeout.to_i_with_method)
MiqTask.update_status_for_timed_out_active_tasks
miq_task.reload
expect(miq_task.status).not_to eq MiqTask::STATUS_ERROR
end

it "does not update status to 'Error' if task state is 'Queued'" do
miq_task.update_attributes(:state => MiqTask::STATE_QUEUED,
:updated_on => miq_task.updated_on - timeout.to_i_with_method)
MiqTask.update_status_for_timed_out_active_tasks
miq_task.reload
expect(miq_task.status).not_to eq MiqTask::STATUS_ERROR
end
end
end

context "task linked to job" do
let(:miq_task) do
job = Job.create_job("VmScan")
job.miq_task
end

it "does not update status to 'Error'" do
miq_task.update_attributes(:state => MiqTask::STATE_ACTIVE,
:updated_on => miq_task.updated_on - timeout.to_i_with_method)
MiqTask.update_status_for_timed_out_active_tasks
miq_task.reload
expect(miq_task.status).not_to eq MiqTask::STATUS_ERROR
end
end
end
end