Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Retry in sync_dns_scheduler if migration not finished #2154

Merged
merged 2 commits into from
Apr 10, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 17 additions & 12 deletions src/bosh-director/bin/bosh-director-sync-dns
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env ruby

require 'bosh/director'
require 'bosh/director/config'
require 'bosh/director/sync_dns_scheduler'
require 'bosh/director/agent_broadcaster'
require 'bosh/director/dns/dns_version_converger'
Expand All @@ -18,24 +18,29 @@ opts.parse!(ARGV.dup)
config_file ||= ::File.expand_path('../../config/bosh-director.yml', __FILE__)
config = Bosh::Director::Config.load_file(config_file)

Bosh::Director::App.new(config)

dns_converger = Bosh::Director::DnsVersionConverger.new(
Bosh::Director::AgentBroadcaster.new,
Bosh::Director::Config.logger,
Bosh::Director::Config.max_threads
)
dns_sync_broadcaster = Bosh::Director::SyncDnsScheduler.new(dns_converger, 10)
dns_sync_broadcaster = Bosh::Director::SyncDnsScheduler.new(config, 10)
dns_sync_broadcaster.prep

%w(TERM INT QUIT).each do |signal|
trap(signal) do
dns_sync_broadcaster.stop!
EM.stop
stop(signal)
end
end

def stop(reason)
Bosh::Director::Config.logger.error("Shutting down bosh-director-sync-dns: #{reason}")
dns_sync_broadcaster.stop!
EM.stop
end

EventMachine.run do
EM.defer do
Thread.new { dns_sync_broadcaster.start! }
Thread.new do
begin
dns_sync_broadcaster.start!
ensure
stop 'Thread terminated'
end
end
end
end
8 changes: 8 additions & 0 deletions src/bosh-director/lib/bosh/director/config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,14 @@ def worker_logger
logger
end

def sync_dns_scheduler_logger
logger = Logging::Logger.new('SyncDnsScheduler')
logging_config = hash.fetch('logging', {})
logger.add_appenders(Logging.appenders.stdout('SyncDnsSchedulerIO', layout: ThreadFormatter.layout))
logger.level = Logging.levelify(logging_config.fetch('level', 'debug'))
logger
end

def db
Config.configure_db(hash['db'])
end
Expand Down
38 changes: 36 additions & 2 deletions src/bosh-director/lib/bosh/director/sync_dns_scheduler.rb
Original file line number Diff line number Diff line change
@@ -1,10 +1,27 @@
require 'db_migrator'
require 'optparse'
require 'bosh/director/config'

module Bosh::Director
class SyncDnsScheduler
def initialize(dns_version_converger, interval)
@dns_version_converger = dns_version_converger
def initialize(config, interval)
@config = config
@interval = interval
end

def prep
ensure_migrations

require 'bosh/director'
Bosh::Director::App.new(@config)

@dns_version_converger = Bosh::Director::DnsVersionConverger.new(
Bosh::Director::AgentBroadcaster.new,
Bosh::Director::Config.logger,
Bosh::Director::Config.max_threads,
)
end

def start!
@thread = Thread.new do
loop do
Expand All @@ -23,6 +40,23 @@ def stop!

private

def ensure_migrations
if defined?(Bosh::Director::Models)
raise 'Bosh::Director::Models were loaded before ensuring migrations are current. '\
'Cowardly refusing to start sync dns scheduler.'
end

migrator = DBMigrator.new(@config.db, :director)
raise_migration_error unless migrator.finished?
end

def raise_migration_error
@config.sync_dns_scheduler_logger.error(
"Migrations not current during sync dns scheduler start after #{DBMigrator::MAX_MIGRATION_ATTEMPTS} attempts.",
)
raise "Migrations not current during sync dns scheduler start after #{DBMigrator::MAX_MIGRATION_ATTEMPTS} retries"
end

def broadcast
@dns_version_converger.update_instances_based_on_strategy
end
Expand Down
19 changes: 6 additions & 13 deletions src/bosh-director/lib/bosh/director/worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,9 @@
module Bosh
module Director
class Worker
MAX_MIGRATION_ATTEMPTS = 50

def initialize(config, index = 0, retry_interval = 0.5)
def initialize(config, index = 0)
@config = config
@index = index
@retry_interval = retry_interval
end

def prep
Expand Down Expand Up @@ -64,16 +61,12 @@ def ensure_migrations
raise 'Bosh::Director::Models were loaded before ensuring migrations are current. Cowardly refusing to start worker.'
end
migrator = DBMigrator.new(@config.db, :director)
tries = 0
until migrator.current?
tries += 1
sleep @retry_interval
if tries >= MAX_MIGRATION_ATTEMPTS
@config.worker_logger.error("Migrations not current during worker start after #{tries} attempts.")
raise "Migrations not current after #{MAX_MIGRATION_ATTEMPTS} retries"
end
unless migrator.finished?
@config.worker_logger.error(
"Migrations not current during worker start after #{DBMigrator::MAX_MIGRATION_ATTEMPTS} attempts.",
)
raise "Migrations not current after #{DBMigrator::MAX_MIGRATION_ATTEMPTS} retries"
end

require 'bosh/director'
end

Expand Down
15 changes: 14 additions & 1 deletion src/bosh-director/lib/db_migrator.rb
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
require 'sequel'

class DBMigrator
def initialize(database, type, options = {})
MAX_MIGRATION_ATTEMPTS = 50

def initialize(database, type, options = {}, retry_interval = 0.5)
@migrator = case type
when :cpi then cpi_migrator(database, options)
when :director then director_migrator(database, options)
when :dns then dns_migrator(database, options)
end
@retry_interval = retry_interval
end

def current?
Expand All @@ -17,6 +20,16 @@ def migrate
@migrator&.run
end

def finished?
tries = 0
until current?
tries += 1
sleep @retry_interval
return false if tries >= MAX_MIGRATION_ATTEMPTS
end
true
end

private

def migrator(database, directory, options)
Expand Down
25 changes: 25 additions & 0 deletions src/bosh-director/spec/unit/db_migrator_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
require 'spec_helper'
require 'db_migrator'

module Bosh::Director
describe 'DBMigrator' do
subject(:migrator) { DBMigrator.new('db', 'test', {}, 0.01) }

describe '#finished?' do
it 'returns true if migration is already current' do
allow(migrator).to receive(:current?).once.and_return(true)
expect(migrator.finished?).to be(true)
end

it 'return true if migration is current after retrying' do
allow(migrator).to receive(:current?).twice.and_return(false, true)
expect(migrator.finished?).to be(true)
end

it 'returns false if migrations are never current' do
allow(migrator).to receive(:current?).exactly(DBMigrator::MAX_MIGRATION_ATTEMPTS).times.and_return(false)
expect(migrator.finished?).to be(false)
end
end
end
end
61 changes: 61 additions & 0 deletions src/bosh-director/spec/unit/sync_dns_scheduler_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
require 'spec_helper'
require 'db_migrator'
require_relative '../../lib/bosh/director/sync_dns_scheduler'

Models = Bosh::Director::Models
module Kernel
alias original_require require
def require(path)
Bosh::Director.const_set(:Models, Models) if path == 'bosh/director' && !defined?(Bosh::Director::Models)
original_require(path)
end
end

module Bosh::Director
describe 'sync_dns_scheduler' do
subject(:sync_dns_scheduler) { SyncDnsScheduler.new(config, 0.01) }
let(:config_hash) do
SpecHelper.spec_get_director_config
end

let(:config) { Config.load_hash(config_hash) }
let(:dns_version_converger) { double(DnsVersionConverger) }

before do
Bosh::Director.send(:remove_const, :Models)
end

after do
require 'bosh/director'
end

describe 'migrations' do
let(:migrator) { instance_double(DBMigrator, current?: true) }
before do
allow(config).to receive(:db).and_return(double(:config_db))
allow(DBMigrator).to receive(:new).with(config.db, :director).and_return(migrator)
end

it 'starts up immediately if migrations have finished' do
allow(migrator).to receive(:finished?).and_return(true)
expect(sync_dns_scheduler).to receive(:ensure_migrations)
expect { sync_dns_scheduler.prep }.not_to raise_error
end

it 'raises error if migrations never finish' do
logger = double(Logging::Logger)
allow(config).to receive(:sync_dns_scheduler_logger).and_return(logger.tap { |l| allow(l).to receive(:error) })
allow(migrator).to receive(:finished?).and_return(false)

expect(logger).to receive(:error).with(
/Migrations not current during sync dns scheduler start after #{DBMigrator::MAX_MIGRATION_ATTEMPTS} attempts./,
)
expect do
sync_dns_scheduler.prep
end .to raise_error(
/Migrations not current during sync dns scheduler start after #{DBMigrator::MAX_MIGRATION_ATTEMPTS} retries/,
)
end
end
end
end
22 changes: 9 additions & 13 deletions src/bosh-director/spec/unit/worker_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def require(path)

module Bosh::Director
describe 'worker' do
subject(:worker) { Worker.new(config, 0, 0.01) }
subject(:worker) { Worker.new(config, 0) }
let(:config_hash) do
SpecHelper.spec_get_director_config
end
Expand Down Expand Up @@ -66,32 +66,28 @@ module Bosh::Director
end

it 'starts up immediately if migrations are current' do
allow(migrator).to receive(:current?).once.and_return(true)
allow(migrator).to receive(:finished?).and_return(true)
allow(djworker).to receive(:start)
worker.prep
worker.start
expect(djworker).to have_received(:start)
end

it 'waits until migrations are current to start' do
allow(migrator).to receive(:current?).twice.and_return(false, true)

allow(djworker).to receive(:start)
worker.prep
worker.start

expect(djworker).to have_received(:start)
end

it 'raises error if migrations are never current' do
allow(migrator).to receive(:current?).exactly(Worker::MAX_MIGRATION_ATTEMPTS).times.and_return(false)
allow(migrator).to receive(:finished?).and_return(false)

logger = double(Logging::Logger)
allow(config).to receive(:worker_logger).and_return(logger.tap { |l| allow(l).to receive(:error) })
allow(djworker).to receive(:start)

expect(logger).to receive(:error).with(/Migrations not current during worker start after #{Worker::MAX_MIGRATION_ATTEMPTS} attempts./)
expect { worker.prep }.to raise_error(/Migrations not current after #{Worker::MAX_MIGRATION_ATTEMPTS} retries/)
expect(logger).to receive(:error).with(
/Migrations not current during worker start after #{DBMigrator::MAX_MIGRATION_ATTEMPTS} attempts./,
)
expect { worker.prep }.to raise_error(
/Migrations not current after #{DBMigrator::MAX_MIGRATION_ATTEMPTS} retries/,
)
expect(djworker).not_to have_received(:start)
end
end
Expand Down