Skip to content

Commit

Permalink
Retry in sync_dns_scheduler if migration not finished
Browse files Browse the repository at this point in the history
We have to ensure that the migration has finshed succesfully in the
sync_dns_scheduler, before we can load the db Models. Otherwise the
sync-dns job gets into an inconsistent state since the db is still being
migrated or DB being upgraded.

[#164325756](https://www.pivotaltracker.com/story/show/164325756)

Co-authored-by: Sebastian Heid <[email protected]>
Co-authored-by: Vladimir Videlov <[email protected]>
Co-authored-by: Denis Langer <[email protected]>
  • Loading branch information
4 people committed Mar 12, 2019
1 parent 78fee5f commit 1f9ca8b
Show file tree
Hide file tree
Showing 8 changed files with 165 additions and 38 deletions.
15 changes: 6 additions & 9 deletions src/bosh-director/bin/bosh-director-sync-dns
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#!/usr/bin/env ruby

require 'bosh/director'
require 'erb'
require 'optparse'
require 'bosh/director/config'
require 'bosh/director/sync_dns_scheduler'
require 'bosh/director/agent_broadcaster'
require 'bosh/director/dns/dns_version_converger'
Expand All @@ -17,15 +19,10 @@ opts.parse!(ARGV.dup)

config_file ||= ::File.expand_path('../../config/bosh-director.yml', __FILE__)
config = Bosh::Director::Config.load_file(config_file)
config.db

Bosh::Director::App.new(config)

dns_converger = Bosh::Director::DnsVersionConverger.new(
Bosh::Director::AgentBroadcaster.new,
Bosh::Director::Config.logger,
Bosh::Director::Config.max_threads
)
dns_sync_broadcaster = Bosh::Director::SyncDnsScheduler.new(dns_converger, 10)
dns_sync_broadcaster = Bosh::Director::SyncDnsScheduler.new(config, 10)
dns_sync_broadcaster.prep

%w(TERM INT QUIT).each do |signal|
trap(signal) do
Expand Down
8 changes: 8 additions & 0 deletions src/bosh-director/lib/bosh/director/config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,14 @@ def worker_logger
logger
end

def sync_dns_scheduler_logger
logger = Logging::Logger.new('SyncDnsScheduler')
logging_config = hash.fetch('logging', {})
logger.add_appenders(Logging.appenders.stdout('SyncDnsSchedulerIO', layout: ThreadFormatter.layout))
logger.level = Logging.levelify(logging_config.fetch('level', 'debug'))
logger
end

def db
Config.configure_db(hash['db'])
end
Expand Down
38 changes: 36 additions & 2 deletions src/bosh-director/lib/bosh/director/sync_dns_scheduler.rb
Original file line number Diff line number Diff line change
@@ -1,10 +1,27 @@
require 'db_migrator'
require 'optparse'
require 'bosh/director/config'

module Bosh::Director
class SyncDnsScheduler
def initialize(dns_version_converger, interval)
@dns_version_converger = dns_version_converger
def initialize(config, interval)
@config = config
@interval = interval
end

def prep
ensure_migrations

require 'bosh/director'
Bosh::Director::App.new(@config)

@dns_version_converger = Bosh::Director::DnsVersionConverger.new(
Bosh::Director::AgentBroadcaster.new,
Bosh::Director::Config.logger,
Bosh::Director::Config.max_threads,
)
end

def start!
@thread = Thread.new do
loop do
Expand All @@ -23,6 +40,23 @@ def stop!

private

def ensure_migrations
if defined?(Bosh::Director::Models)
raise 'Bosh::Director::Models were loaded before ensuring migrations are current. '\
'Cowardly refusing to start sync dns scheduler.'
end

migrator = DBMigrator.new(@config.db, :director)
raise_migration_error unless migrator.finished?
end

def raise_migration_error
@config.sync_dns_scheduler_logger.error(
"Migrations not current during sync dns scheduler start after #{DBMigrator::MAX_MIGRATION_ATTEMPTS} attempts.",
)
raise "Migrations not current during sync dns scheduler start after #{DBMigrator::MAX_MIGRATION_ATTEMPTS} retries"
end

def broadcast
@dns_version_converger.update_instances_based_on_strategy
end
Expand Down
19 changes: 6 additions & 13 deletions src/bosh-director/lib/bosh/director/worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,9 @@
module Bosh
module Director
class Worker
MAX_MIGRATION_ATTEMPTS = 50

def initialize(config, index = 0, retry_interval = 0.5)
def initialize(config, index = 0)
@config = config
@index = index
@retry_interval = retry_interval
end

def prep
Expand Down Expand Up @@ -64,16 +61,12 @@ def ensure_migrations
raise 'Bosh::Director::Models were loaded before ensuring migrations are current. Cowardly refusing to start worker.'
end
migrator = DBMigrator.new(@config.db, :director)
tries = 0
until migrator.current?
tries += 1
sleep @retry_interval
if tries >= MAX_MIGRATION_ATTEMPTS
@config.worker_logger.error("Migrations not current during worker start after #{tries} attempts.")
raise "Migrations not current after #{MAX_MIGRATION_ATTEMPTS} retries"
end
unless migrator.finished?
@config.worker_logger.error(
"Migrations not current during worker start after #{DBMigrator::MAX_MIGRATION_ATTEMPTS} attempts.",
)
raise "Migrations not current after #{DBMigrator::MAX_MIGRATION_ATTEMPTS} retries"
end

require 'bosh/director'
end

Expand Down
15 changes: 14 additions & 1 deletion src/bosh-director/lib/db_migrator.rb
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
require 'sequel'

class DBMigrator
def initialize(database, type, options = {})
MAX_MIGRATION_ATTEMPTS = 50

def initialize(database, type, options = {}, retry_interval = 0.5)
@migrator = case type
when :cpi then cpi_migrator(database, options)
when :director then director_migrator(database, options)
when :dns then dns_migrator(database, options)
end
@retry_interval = retry_interval
end

def current?
Expand All @@ -17,6 +20,16 @@ def migrate
@migrator&.run
end

def finished?
tries = 0
until current?
tries += 1
sleep @retry_interval
return false if tries >= MAX_MIGRATION_ATTEMPTS
end
true
end

private

def migrator(database, directory, options)
Expand Down
25 changes: 25 additions & 0 deletions src/bosh-director/spec/unit/db_migrator_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
require 'spec_helper'
require 'db_migrator'

module Bosh::Director
describe 'DBMigrator' do
subject(:migrator) { DBMigrator.new('db', 'test', {}, 0.01) }

describe '#finished?' do
it 'returns true if migration is already current' do
allow(migrator).to receive(:current?).once.and_return(true)
expect(migrator.finished?).to be(true)
end

it 'return true if migration is current after retrying' do
allow(migrator).to receive(:current?).twice.and_return(false, true)
expect(migrator.finished?).to be(true)
end

it 'returns false if migrations are never current' do
allow(migrator).to receive(:current?).exactly(DBMigrator::MAX_MIGRATION_ATTEMPTS).times.and_return(false)
expect(migrator.finished?).to be(false)
end
end
end
end
61 changes: 61 additions & 0 deletions src/bosh-director/spec/unit/sync_dns_scheduler_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
require 'spec_helper'
require 'db_migrator'
require_relative '../../lib/bosh/director/sync_dns_scheduler'

Models = Bosh::Director::Models
module Kernel
alias original_require require
def require(path)
Bosh::Director.const_set(:Models, Models) if path == 'bosh/director' && !defined?(Bosh::Director::Models)
original_require(path)
end
end

module Bosh::Director
describe 'sync_dns_scheduler' do
subject(:sync_dns_scheduler) { SyncDnsScheduler.new(config, 0.01) }
let(:config_hash) do
SpecHelper.spec_get_director_config
end

let(:config) { Config.load_hash(config_hash) }
let(:dns_version_converger) { double(DnsVersionConverger) }

before do
Bosh::Director.send(:remove_const, :Models)
end

after do
require 'bosh/director'
end

describe 'migrations' do
let(:migrator) { instance_double(DBMigrator, current?: true) }
before do
allow(config).to receive(:db).and_return(double(:config_db))
allow(DBMigrator).to receive(:new).with(config.db, :director).and_return(migrator)
end

it 'starts up immediately if migrations have finished' do
allow(migrator).to receive(:finished?).and_return(true)
expect(sync_dns_scheduler).to receive(:ensure_migrations)
expect { sync_dns_scheduler.prep }.not_to raise_error
end

it 'raises error if migrations never finish' do
logger = double(Logging::Logger)
allow(config).to receive(:sync_dns_scheduler_logger).and_return(logger.tap { |l| allow(l).to receive(:error) })
allow(migrator).to receive(:finished?).and_return(false)

expect(logger).to receive(:error).with(
/Migrations not current during sync dns scheduler start after #{DBMigrator::MAX_MIGRATION_ATTEMPTS} attempts./,
)
expect do
sync_dns_scheduler.prep
end .to raise_error(
/Migrations not current during sync dns scheduler start after #{DBMigrator::MAX_MIGRATION_ATTEMPTS} retries/,
)
end
end
end
end
22 changes: 9 additions & 13 deletions src/bosh-director/spec/unit/worker_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def require(path)

module Bosh::Director
describe 'worker' do
subject(:worker) { Worker.new(config, 0, 0.01) }
subject(:worker) { Worker.new(config, 0) }
let(:config_hash) do
SpecHelper.spec_get_director_config
end
Expand Down Expand Up @@ -66,32 +66,28 @@ module Bosh::Director
end

it 'starts up immediately if migrations are current' do
allow(migrator).to receive(:current?).once.and_return(true)
allow(migrator).to receive(:finished?).and_return(true)
allow(djworker).to receive(:start)
worker.prep
worker.start
expect(djworker).to have_received(:start)
end

it 'waits until migrations are current to start' do
allow(migrator).to receive(:current?).twice.and_return(false, true)

allow(djworker).to receive(:start)
worker.prep
worker.start

expect(djworker).to have_received(:start)
end

it 'raises error if migrations are never current' do
allow(migrator).to receive(:current?).exactly(Worker::MAX_MIGRATION_ATTEMPTS).times.and_return(false)
allow(migrator).to receive(:finished?).and_return(false)

logger = double(Logging::Logger)
allow(config).to receive(:worker_logger).and_return(logger.tap { |l| allow(l).to receive(:error) })
allow(djworker).to receive(:start)

expect(logger).to receive(:error).with(/Migrations not current during worker start after #{Worker::MAX_MIGRATION_ATTEMPTS} attempts./)
expect { worker.prep }.to raise_error(/Migrations not current after #{Worker::MAX_MIGRATION_ATTEMPTS} retries/)
expect(logger).to receive(:error).with(
/Migrations not current during worker start after #{DBMigrator::MAX_MIGRATION_ATTEMPTS} attempts./,
)
expect { worker.prep }.to raise_error(
/Migrations not current after #{DBMigrator::MAX_MIGRATION_ATTEMPTS} retries/,
)
expect(djworker).not_to have_received(:start)
end
end
Expand Down

0 comments on commit 1f9ca8b

Please sign in to comment.