Skip to content

Commit

Permalink
TOOLS-2215 retry ssl errors to deploys are more stable
Browse files Browse the repository at this point in the history
ssl errors and a few others are not retried atm leading to failing deploys
ManageIQ/kubeclient#240
  • Loading branch information
grosser committed Nov 21, 2017
1 parent 2ff235f commit 6c1823c
Show file tree
Hide file tree
Showing 8 changed files with 28 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def update_secret(namespace)
def secret_exist?(secret)
@cluster.client.get_secret(secret.fetch(:metadata).fetch(:name), secret.fetch(:metadata).fetch(:namespace))
true
rescue KubeException
rescue *SamsonKubernetes.connection_errors
false
end
end
4 changes: 2 additions & 2 deletions plugins/kubernetes/app/models/kubernetes/api/pod.rb
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,10 @@ def containers
# tries to get logs from current or previous pod depending on if it restarted
def logs(container, end_time)
fetch_logs(container, end_time, previous: restarted?)
rescue KubeException # not found or pod is initializing
rescue *SamsonKubernetes.connection_errors # not found or pod is initializing
begin
fetch_logs(container, end_time, previous: !restarted?)
rescue KubeException
rescue *SamsonKubernetes.connection_errors
nil
end
end
Expand Down
4 changes: 2 additions & 2 deletions plugins/kubernetes/app/models/kubernetes/cluster.rb
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def namespaces

def namespace_exists?(namespace)
connection_valid? && namespaces.include?(namespace)
rescue KubeException
rescue *SamsonKubernetes.connection_errors
false
end

Expand All @@ -63,7 +63,7 @@ def schedulable_nodes

def connection_valid?
client.api_valid?
rescue KubeException, Errno::ECONNREFUSED
rescue *SamsonKubernetes.connection_errors
false
end

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def pod_statuses(release, release_docs)
# efficient pod fetching by querying once per cluster instead of once per deploy group
def fetch_pods(release)
release.clients.flat_map do |client, query|
pods = Vault.with_retries(KubeException, attempts: 3) { client.get_pods(query) }
pods = Vault.with_retries(*SamsonKubernetes.connection_errors, attempts: 3) { client.get_pods(query) }
pods.map! { |p| Kubernetes::Api::Pod.new(p, client: client) }
end
end
Expand Down
2 changes: 1 addition & 1 deletion plugins/kubernetes/app/models/kubernetes/resource.rb
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def update
def fetch_resource
reply = request(:get, name, namespace, as: :raw)
JSON.parse(reply, symbolize_names: true)
rescue KubeException => e
rescue *SamsonKubernetes.connection_errors => e
raise e unless e.error_code == 404
nil
end
Expand Down
7 changes: 7 additions & 0 deletions plugins/kubernetes/lib/samson_kubernetes/samson_plugin.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ class Engine < Rails::Engine
app.config.assets.precompile.append %w[kubernetes/icon.png]
end
end

# http errors and ssl errors are not handled uniformly, but we want to ignore/retry on both
# see https://github.com/abonas/kubeclient/issues/240
# using a method to avoid loading kubeclient on every boot ~0.1s
def self.connection_errors
[OpenSSL::SSL::SSLError, KubeException, Errno::ECONNREFUSED].freeze
end
end

Samson::Hooks.view :project_tabs_view, 'samson_kubernetes/project_tab'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -547,5 +547,12 @@ def worker_is_unstable
executor.send(:fetch_pods, kubernetes_releases(:test_release))
end
end

it "retries on ssl failure" do
Kubeclient::Client.any_instance.expects(:get_pods).times(4).raises(OpenSSL::SSL::SSLError.new)
assert_raises OpenSSL::SSL::SSLError do
executor.send(:fetch_pods, kubernetes_releases(:test_release))
end
end
end
end
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# frozen_string_literal: true
require_relative "../test_helper"
require "kubeclient"

SingleCov.covered!

Expand Down Expand Up @@ -50,4 +51,10 @@ def link_parts(resource)
link_parts(cluster).must_equal ["test", cluster]
end
end

describe ".connection_errors" do
it "works" do
SamsonKubernetes.connection_errors
end
end
end

0 comments on commit 6c1823c

Please sign in to comment.