Skip to content

Commit

Permalink
Merge pull request ManageIQ#13977 from jrafanie/backport_13065_to_darga
Browse files Browse the repository at this point in the history
[DARGA] Fix master server failover race condition (backport ManageIQ#13065)
  • Loading branch information
simaishi authored Apr 20, 2017
2 parents 1392ec1 + a966d39 commit 07a82c2
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 8 deletions.
20 changes: 12 additions & 8 deletions app/models/miq_server/server_monitor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,29 @@ def mark_as_not_responding(seconds = miq_server_time_threshold)
end

def make_master_server(last_master)
_log.info "Master server has #{last_master.nil? ? "not been set" : "died"}. Attempting takeover as new master server."
parent = MiqRegion.my_region
_log.info "Master server has #{last_master.nil? ? "not been set" : "died, #{last_master.name}"}. Attempting takeover as new master server, #{name}."
parent = MiqRegion.my_region(true)
parent.lock do
all_servers = parent.miq_servers
# See if an ACTIVE server has already taken over
active_servers = parent.active_miq_servers

_log.debug "Double checking that nothing has changed"
master = all_servers.detect(&:is_master?)
master = active_servers.detect(&:is_master?)
if (last_master.nil? && !master.nil?) || (!last_master.nil? && !master.nil? && last_master.id != master.id)
_log.info "Aborting master server takeover as another server has taken control first."
_log.info "Aborting master server takeover as another server, #{master.name}, has taken control first."
return nil
end

_log.debug "Setting this server as master server"
all_servers.each do |s|
_log.debug "Setting this server, #{name}, as master server"

# Set is_master on self, reset every other server in the region, including
# inactive ones.
parent.miq_servers.each do |s|
s.is_master = (id == s.id)
s.save!
end
end
_log.info "This server is now set as the master server"
_log.info "This server #{name} is now set as the master server, last_master: #{last_master.try(:name)}"
self
end

Expand Down
49 changes: 49 additions & 0 deletions spec/models/miq_server/server_monitor_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,55 @@
@miq_server3.reload
end

it "should support multiple failover transitions from stopped master" do
# server1 is first to start, becomes master
@miq_server1.monitor_servers

# Initialize the bookkeeping around current and last master
@miq_server2.monitor_servers
@miq_server3.monitor_servers

# server1 is master
expect(@miq_server1.reload.is_master).to be_truthy
expect(@miq_server2.reload.is_master).to be_falsey
expect(@miq_server3.reload.is_master).to be_falsey

# server 1 shuts down
@miq_server1.update(:status => "stopped")

# server 3 becomes master, server 2 hasn't monitored servers yet
@miq_server3.monitor_servers
expect(@miq_server1.reload.is_master).to be_falsey
expect(@miq_server2.reload.is_master).to be_falsey
expect(@miq_server3.reload.is_master).to be_truthy

# server 3 shuts down
@miq_server3.update(:status => "stopped")

# server 2 finally gets to monitor_servers, takes over
@miq_server2.monitor_servers
expect(@miq_server1.reload.is_master).to be_falsey
expect(@miq_server2.reload.is_master).to be_truthy
expect(@miq_server3.reload.is_master).to be_falsey
end

it "should failover from stopped master on startup" do
# server 1 is first to start, becomes master
@miq_server1.monitor_servers

# server 1 shuts down
@miq_server1.update(:status => "stopped")

# server 3 boots and hasn't run monitor_servers yet
expect(@miq_server1.reload.is_master).to be_truthy
expect(@miq_server3.reload.is_master).to be_falsey

# server 3 runs monitor_servers and becomes master
@miq_server3.monitor_servers
expect(@miq_server1.reload.is_master).to be_falsey
expect(@miq_server3.reload.is_master).to be_truthy
end

it "should have all roles active after sync between them" do
expect(@miq_server1.active_role_names.include?("ems_operations")).to be_truthy
expect(@miq_server2.active_role_names.include?("ems_operations")).to be_truthy
Expand Down

0 comments on commit 07a82c2

Please sign in to comment.