diff --git a/.gitignore b/.gitignore index cdea425..42c2603 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ test/version_tmp tmp tags .DS_Store +vendor/ diff --git a/README.md b/README.md index ce44dd5..5153ebc 100644 --- a/README.md +++ b/README.md @@ -140,18 +140,19 @@ a drop-in replacement for your existing pure redis client usage. The full set of options that can be passed to RedisFailover::Client are: - :zk - an existing ZK client instance - :zkservers - comma-separated ZooKeeper host:port pairs - :znode_path - the Znode path override for redis server list (optional) - :password - password for redis nodes (optional) - :db - db to use for redis nodes (optional) - :namespace - namespace for redis nodes (optional) - :logger - logger override (optional) - :retry_failure - indicate if failures should be retried (default true) - :max_retries - max retries for a failure (default 3) - :safe_mode - indicates if safe mode is used or not (default true) - :master_only - indicates if only redis master is used (default false) - :verify_role - verify the actual role of a redis node before every command (default true) + :zk - an existing ZK client instance + :zkservers - comma-separated ZooKeeper host:port pairs + :znode_path - the Znode path override for redis server list (optional) + :password - password for redis nodes (optional) + :db - db to use for redis nodes (optional) + :namespace - namespace for redis nodes (optional) + :trace_id - trace string tag logged for client debugging (optional) + :logger - logger override (optional) + :retry_failure - indicate if failures should be retried (default true) + :max_retries - max retries for a failure (default 3) + :safe_mode - indicates if safe mode is used or not (default true) + :master_only - indicates if only redis master is used (default false) + :verify_role - verify the actual role of a redis node before every command (default true) The RedisFailover::Client also supports a custom callback that will be invoked whenever the list of redis clients changes. Example usage: diff --git a/lib/redis_failover.rb b/lib/redis_failover.rb index d8038e7..81b897c 100644 --- a/lib/redis_failover.rb +++ b/lib/redis_failover.rb @@ -1,4 +1,10 @@ require 'zk' + +#NOTE: We've found that using the 'recommended' zk fork-hook would trigger +#kernel mutex deadlocks in forking env (unicorn & resque) [ruby 1.9] +#https://github.com/zk-ruby/zk/wiki/Forking & https://github.com/zk-ruby/zk/blob/master/RELEASES.markdown#v150 +#ZK.install_fork_hook + require 'set' require 'yaml' require 'redis' diff --git a/lib/redis_failover/client.rb b/lib/redis_failover/client.rb index 9e17f31..bcd5112 100644 --- a/lib/redis_failover/client.rb +++ b/lib/redis_failover/client.rb @@ -51,6 +51,7 @@ def call(command, &block) # @option options [String] :password password for redis nodes # @option options [String] :db database to use for redis nodes # @option options [String] :namespace namespace for redis nodes + # @option options [String] :trace_id trace string tag logged for client debugging # @option options [Logger] :logger logger override # @option options [Boolean] :retry_failure indicates if failures are retried # @option options [Integer] :max_retries max retries for a failure @@ -61,6 +62,7 @@ def call(command, &block) # @return [RedisFailover::Client] def initialize(options = {}) Util.logger = options[:logger] if options[:logger] + @trace_id = options[:trace_id] @master = nil @slaves = [] @node_addresses = {} @@ -130,7 +132,7 @@ def respond_to_missing?(method, include_private) # @return [String] a string representation of the client def inspect - "#" + "#" end alias_method :to_s, :inspect @@ -157,13 +159,12 @@ def shutdown purge_clients end - # Reconnect will first perform a shutdown of the underlying redis clients. - # Next, it attempts to reopen the ZooKeeper client and re-create the redis - # clients after it fetches the most up-to-date list from ZooKeeper. + # Reconnect method needed for compatibility with 3rd party libs that expect this for redis client objects. def reconnect - purge_clients - @zk ? @zk.reopen : setup_zk - build_clients + #NOTE: Explicit/manual reconnects are no longer needed or desired, and + #triggered kernel mutex deadlocks in forking env (unicorn & resque) [ruby 1.9] + #Resque automatically calls this method on job fork. + #We now auto-detect underlying zk & redis client InheritedError's and reconnect automatically as needed. end # Retrieves the current redis master. @@ -235,7 +236,12 @@ def dispatch(method, *args, &block) verify_supported!(method) tries = 0 begin - client_for(method).send(method, *args, &block) + redis = client_for(method) + redis.send(method, *args, &block) + rescue ::Redis::InheritedError => ex + logger.debug( "Caught #{ex.class} - reconnecting [#{@trace_id}] #{redis.inspect}" ) + redis.client.reconnect + retry rescue *CONNECTIVITY_ERRORS => ex logger.error("Error while handling `#{method}` - #{ex.inspect}") logger.error(ex.backtrace.join("\n")) @@ -243,8 +249,8 @@ def dispatch(method, *args, &block) if tries < @max_retries tries += 1 free_client - build_clients sleep(RETRY_WAIT_TIME) + build_clients retry end raise @@ -288,7 +294,7 @@ def build_clients return unless nodes_changed?(nodes) purge_clients - logger.info("Building new clients for nodes #{nodes.inspect}") + logger.info("Building new clients for nodes [#{@trace_id}] #{nodes.inspect}") new_master = new_clients_for(nodes[:master]).first if nodes[:master] new_slaves = new_clients_for(*nodes[:slaves]) @master = new_master @@ -320,19 +326,37 @@ def should_notify? # # @return [Hash] the known master/slave redis servers def fetch_nodes - data = @zk.get(redis_nodes_path, :watch => true).first - nodes = symbolize_keys(decode(data)) - logger.debug("Fetched nodes: #{nodes.inspect}") + tries = 0 + begin + data = @zk.get(redis_nodes_path, :watch => true).first + nodes = symbolize_keys(decode(data)) + logger.debug("Fetched nodes: #{nodes.inspect}") + nodes + rescue Zookeeper::Exceptions::InheritedConnectionError, ZK::Exceptions::InterruptedSession => ex + logger.debug { "Caught #{ex.class} '#{ex.message}' - reopening ZK client [#{@trace_id}]" } + sleep 1 if ex.kind_of?(ZK::Exceptions::InterruptedSession) + @zk.reopen + retry + rescue *ZK_ERRORS => ex + logger.error { "Caught #{ex.class} '#{ex.message}' - retrying ... [#{@trace_id}]" } + sleep(RETRY_WAIT_TIME) - nodes - rescue Zookeeper::Exceptions::InheritedConnectionError, ZK::Exceptions::InterruptedSession => ex - logger.debug { "Caught #{ex.class} '#{ex.message}' - reopening ZK client" } - @zk.reopen - retry - rescue *ZK_ERRORS => ex - logger.warn { "Caught #{ex.class} '#{ex.message}' - retrying" } - sleep(RETRY_WAIT_TIME) - retry + if tries < @max_retries + tries += 1 + retry + elsif tries < (@max_retries * 2) + tries += 1 + logger.error { "Hmmm, more than [#{@max_retries}] retries: reopening ZK client [#{@trace_id}]" } + @zk.reopen + retry + else + tries = 0 + logger.error { "Oops, more than [#{@max_retries * 2}] retries: establishing fresh ZK client [#{@trace_id}]" } + @zk.close! + setup_zk + retry + end + end end # Builds new Redis clients for the specified nodes. @@ -434,7 +458,7 @@ def disconnect(*redis_clients) # Disconnects current redis clients. def purge_clients @lock.synchronize do - logger.info("Purging current redis clients") + logger.info("Purging current redis clients [#{@trace_id}]") disconnect(@master, *@slaves) @master = nil @slaves = [] diff --git a/redis_failover.gemspec b/redis_failover.gemspec index 40c2d0b..b63dcf5 100644 --- a/redis_failover.gemspec +++ b/redis_failover.gemspec @@ -18,7 +18,7 @@ Gem::Specification.new do |gem| gem.add_dependency('redis', ['>= 2.2', '< 4']) gem.add_dependency('redis-namespace') gem.add_dependency('multi_json', '~> 1') - gem.add_dependency('zk', ['>= 1.7.4', '< 1.8']) + gem.add_dependency('zk', ['>= 1.7.4', '< 2.0']) gem.add_development_dependency('rake') gem.add_development_dependency('rspec') diff --git a/spec/client_spec.rb b/spec/client_spec.rb index df72244..42026fe 100644 --- a/spec/client_spec.rb +++ b/spec/client_spec.rb @@ -54,16 +54,23 @@ def setup_zk client.del('foo') called.should be_true end + end describe '#inspect' do it 'should always include db' do opts = {:zkservers => 'localhost:1234'} client = ClientStub.new(opts) - client.inspect.should match(' db) client = ClientStub.new(opts) - client.inspect.should match(" 'localhost:1234', :trace_id => tid) + client.inspect.should match("