From 826f579dab1a5dd431a46daef80799609e277937 Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Thu, 29 Mar 2018 17:16:17 -0500 Subject: [PATCH 1/3] Adds tests for Rbac::Filterer#include_references --- spec/lib/rbac/filterer_spec.rb | 42 ++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/spec/lib/rbac/filterer_spec.rb b/spec/lib/rbac/filterer_spec.rb index 9183ecf277f..07e2c991d54 100644 --- a/spec/lib/rbac/filterer_spec.rb +++ b/spec/lib/rbac/filterer_spec.rb @@ -1821,6 +1821,48 @@ def get_rbac_results_for_and_expect_objects(klass, expected_objects) end end + describe "#include_references (private)" do + subject { described_class.new } + + let(:klass) { VmOrTemplate } + let(:scope) { klass.all } + let(:include_for_find) { { :miq_server => {} } } + let(:exp_includes) { { :host => {} } } + + it "adds include_for_find .references to the scope" do + method_args = [scope, klass, include_for_find, nil] + resulting_scope = subject.send(:include_references, *method_args) + + expect(resulting_scope.references_values).to eq(["{:miq_server=>{}}", ""]) + end + + it "adds exp_includes .references to the scope" do + method_args = [scope, klass, nil, exp_includes] + resulting_scope = subject.send(:include_references, *method_args) + + expect(resulting_scope.references_values).to eq(["", "{:host=>{}}"]) + end + + it "adds include_for_find and exp_includes .references to the scope" do + method_args = [scope, klass, include_for_find, exp_includes] + resulting_scope = subject.send(:include_references, *method_args) + + expect(resulting_scope.references_values).to eq(["{:miq_server=>{}}", "{:host=>{}}"]) + end + + context "if the include is polymorphic" do + let(:klass) { MetricRollup } + let(:include_for_find) { { :resource => {} } } + + it "does not add .references to the scope" do + method_args = [scope, klass, include_for_find, nil] + resulting_scope = subject.send(:include_references, *method_args) + + expect(resulting_scope.references_values).to eq([]) + end + end + end + it ".apply_rbac_directly?" do expect(described_class.new.send(:apply_rbac_directly?, Vm)).to be_truthy expect(described_class.new.send(:apply_rbac_directly?, Rbac)).not_to be From ab09c7ca3163db40574d7f1a1121c02a64da8648 Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Mon, 12 Mar 2018 19:54:48 -0500 Subject: [PATCH 2/3] Skip query references in Rbac when not needed Makes calling `.references` on the Rbac scope more conservative, since it has the possibility of causing some nasty join bombs. The change to the CloudTenancyMixin was made so that it can be in charge of it's own specific references instead of relying on a quirk of `ActiveRecord`, in which `references(nil)` will call references for all of the `includes` values. Also a few models that overwrote the tenant_joins_clause method had to be updated as wel. Moved some tests around to also provide some tests in a more "integration" based fashion. Should be mostly additive, and the tests that I moved are ones that I had written myself and weren't really in the best spot. This is a bandage for for the following issue: * * * Given roughly these kind of table counts container_images: 700 containers: 2500 openscap_results: 100 openscap_rule_results: 70000 custom_attributes: 20000 Where half of the custom_attributes are associated with container_images. If the following MiqReport is run: cols: - name - virtual_custom_attribute_build-date:SECTION:docker_labels - virtual_custom_attribute_io.openshift.build.name:SECTION:docker_labels - virtual_custom_attribute_io.openshift.build.namespace:SECTION:docker_labels include: openscap_rule_results: columns: - severity containers: columns: - state (`:custom_attributes => {}` is also part of the resulting `include`, but pretty sure that gets tacked on in the `MiqReport#generate` call) Without needing a report, the query that gets executed can be replicated by doing the following: irb> includes_for_find = { :openscap_rule_results => {}, :containers => {}, :custom_attributes => {} } irb> ContainerImages.includes(includes_for_find) .references(includes_for_find) .to_sql There would be a "LEFT JOIN" bomb that would end up causing about 40GB of data being returned from the database. In this case as well, there is nothing that is making use of the references for the result, but the extra table data would be returned from the query prior to this commit in a very inefficient manner with loads of duplicate data. --- app/models/cloud_tenant.rb | 3 +- app/models/flavor.rb | 3 +- app/models/mixins/cloud_tenancy_mixin.rb | 8 +- lib/rbac/filterer.rb | 20 ++- spec/lib/rbac/filterer_spec.rb | 129 ++++++++++++++++-- .../models/mixins/cloud_tenancy_mixin_spec.rb | 25 ++++ 6 files changed, 173 insertions(+), 15 deletions(-) diff --git a/app/models/cloud_tenant.rb b/app/models/cloud_tenant.rb index 1fa881695ed..39a78f16d53 100644 --- a/app/models/cloud_tenant.rb +++ b/app/models/cloud_tenant.rb @@ -163,6 +163,7 @@ def self.post_refresh_ems(ems_id, _) end def self.tenant_joins_clause(scope) - scope.includes(:source_tenant).includes(:ext_management_system) + scope.includes(:source_tenant, :ext_management_system) + .references(:source_tenant, :ext_management_system) end end diff --git a/app/models/flavor.rb b/app/models/flavor.rb index 62a5fa5258d..0b13d3fdc66 100644 --- a/app/models/flavor.rb +++ b/app/models/flavor.rb @@ -44,7 +44,8 @@ def self.class_by_ems(ext_management_system) end def self.tenant_joins_clause(scope) - scope.includes(:cloud_tenants => "source_tenant").includes(:ext_management_system) + scope.includes(:cloud_tenants => "source_tenant", :ext_management_system => {}) + .references(:cloud_tenants => "source_tenant", :ext_management_system => {}) end def self.create_flavor_queue(userid, ext_management_system, options = {}) diff --git a/app/models/mixins/cloud_tenancy_mixin.rb b/app/models/mixins/cloud_tenancy_mixin.rb index 75313422f47..e57306b6699 100644 --- a/app/models/mixins/cloud_tenancy_mixin.rb +++ b/app/models/mixins/cloud_tenancy_mixin.rb @@ -1,6 +1,11 @@ module CloudTenancyMixin extend ActiveSupport::Concern + QUERY_REFERENCES = { + :cloud_tenant => "source_tenant", + :ext_management_system => {} + }.freeze + module ClassMethods include TenancyCommonMixin @@ -13,7 +18,8 @@ def tenant_id_clause_format(tenant_ids) end def tenant_joins_clause(scope) - scope.includes(:cloud_tenant => "source_tenant").includes(:ext_management_system) + scope.includes(QUERY_REFERENCES) + .references(QUERY_REFERENCES) # needed for the where to work end end end diff --git a/lib/rbac/filterer.rb b/lib/rbac/filterer.rb index d4ebda7a9e4..1d5d89dab50 100644 --- a/lib/rbac/filterer.rb +++ b/lib/rbac/filterer.rb @@ -248,6 +248,7 @@ def search(options = {}) exp_sql, exp_includes, exp_attrs = search_filter.to_sql(tz) if search_filter && !klass.try(:instances_are_derived?) attrs[:apply_limit_in_sql] = (exp_attrs.nil? || exp_attrs[:supported_by_sql]) && user_filters["belongsto"].blank? + skip_references = skip_references?(options, attrs) # for belongs_to filters, scope_targets uses scope to make queries. want to remove limits for those. # if you note, the limits are put back into scope a few lines down from here @@ -257,7 +258,7 @@ def search(options = {}) .includes(include_for_find).includes(exp_includes) .order(order) - scope = include_references(scope, klass, include_for_find, exp_includes) + scope = include_references(scope, klass, include_for_find, exp_includes, skip_references) scope = scope.limit(limit).offset(offset) if attrs[:apply_limit_in_sql] targets = scope @@ -291,7 +292,22 @@ def is_sti?(klass) klass.respond_to?(:finder_needs_type_condition?) ? klass.finder_needs_type_condition? : false end - def include_references(scope, klass, include_for_find, exp_includes) + # This is a very primitive way of determining whether we want to skip + # adding references to the query. + # + # For now, basically it checks if the caller has not provided :extra_cols, + # or if the MiqExpression can't apply the limit in SQL. If both of those + # are true, then we don't add `.references` to the scope. + # + # If still invalid, there is an EXPLAIN check in #include_references that + # will make sure the query is valid and if not, will include the references + # as done previously. + def skip_references?(options, attrs) + options[:extra_cols].blank? && !attrs[:apply_limit_in_sql] + end + + def include_references(scope, klass, include_for_find, exp_includes, skip) + return scope if skip ref_includes = Hash(include_for_find).merge(Hash(exp_includes)) unless polymorphic_include?(klass, ref_includes) scope = scope.references(include_for_find).references(exp_includes) diff --git a/spec/lib/rbac/filterer_spec.rb b/spec/lib/rbac/filterer_spec.rb index 07e2c991d54..cad0a703231 100644 --- a/spec/lib/rbac/filterer_spec.rb +++ b/spec/lib/rbac/filterer_spec.rb @@ -329,6 +329,109 @@ def combine_filtered_ids(user_filtered_ids, belongsto_filtered_ids, managed_filt end end + context "with non-sql filter" do + subject { described_class.new } + + let(:expression) { MiqExpression.new("=" => {"field" => "Vm-vendor_display", "value" => "VMware"}) } + let(:search_attributes) { { :class => "Vm", :filter => expression } } + let(:results) { subject.search(search_attributes).first } + + before { [owned_vm, other_vm] } + + it "finds the Vms" do + expect(results.to_a).to match_array [owned_vm, other_vm] + expect(results.count).to eq 2 + end + + it "does not add references without includes" do + expect(subject).to receive(:include_references).with(anything, Vm, nil, nil, true).and_call_original + results + end + + context "with :include_for_find" do + let(:include_search) { search_attributes.merge(:include_for_find => {:evm_owner => {}}) } + let(:results) { subject.search(include_search).first } + + it "finds the Vms" do + expect(results.to_a).to match_array [owned_vm, other_vm] + expect(results.count).to eq 2 + end + + it "does not add references since there isn't a SQL filter" do + expect(subject).to receive(:include_references).with(anything, Vm, {:evm_owner => {}}, nil, true).and_call_original + results + end + end + end + + context "with a miq_expression filter on vms" do + let(:expression) { MiqExpression.new("=" => {"field" => "Vm-vendor", "value" => "vmware"}) } + let(:search_attributes) { { :class => "Vm", :filter => expression } } + let(:results) { described_class.search(search_attributes).first } + + before { [owned_vm, other_vm] } + + it "finds the Vms" do + expect(results.to_a).to match_array [owned_vm, other_vm] + expect(results.count).to eq 2 + end + + it "does not add references without includes" do + # empty string here is basically passing `.references(nil)`, and the + # extra empty hash here is from the MiqExpression (which will result in + # the same), both of which will no-op to when determining if there are + # joins in ActiveRecord, and will not create a JoinDependency query + expect(results.references_values).to match_array ["", "{}"] + end + + context "with :include_for_find" do + let(:include_search) { search_attributes.merge(:include_for_find => {:evm_owner => {}}) } + let(:results) { described_class.search(include_search).first } + + it "finds the Service" do + expect(results.to_a).to match_array [owned_vm, other_vm] + expect(results.count).to eq 2 + end + + it "adds references" do + expect(results.references_values).to match_array ["{:evm_owner=>{}}", "{}"] + end + end + end + + context "with :extra_cols on a Service" do + let(:extra_cols) { [:owned_by_current_user] } + let(:search_attributes) { { :class => "Service", :extra_cols => extra_cols } } + let(:results) { described_class.search(search_attributes).first } + + before { FactoryGirl.create :service, :evm_owner => owner_user } + + it "finds the Service" do + expect(results.first.attributes["owned_by_current_user"]).to be false + end + + it "does not add references with no includes" do + # The single empty string is the result of a nil from both the lack of + # a MiqExpression filter and the user filter, which is deduped in + # ActiveRecord's internals and results in a `.references(nil)` + # effectively + expect(results.references_values).to match_array [""] + end + + context "with :include_for_find" do + let(:include_search) { search_attributes.merge(:include_for_find => {:evm_owner => {}}) } + let(:results) { described_class.search(include_search).first } + + it "finds the Service" do + expect(results.first.attributes["owned_by_current_user"]).to be false + end + + it "adds references" do + expect(results.references_values).to match_array ["", "{:evm_owner=>{}}"] + end + end + end + describe "with find_options_for_tenant filtering" do before do owned_vm # happy path @@ -362,12 +465,6 @@ def combine_filtered_ids(user_filtered_ids, belongsto_filtered_ids, managed_filt expect(results).to match_array [owned_vm] end - it "with :extra_cols finds Service" do - FactoryGirl.create :service, :evm_owner => owner_user - results = described_class.search(:class => "Service", :extra_cols => [:owned_by_current_user]).first - expect(results.first.attributes["owned_by_current_user"]).to be false - end - it "leaving tenant doesnt find Vm" do owner_user.update_attributes(:miq_groups => [other_user.current_group]) User.with_user(owner_user) do @@ -1824,27 +1921,28 @@ def get_rbac_results_for_and_expect_objects(klass, expected_objects) describe "#include_references (private)" do subject { described_class.new } + let(:skip) { false } let(:klass) { VmOrTemplate } let(:scope) { klass.all } let(:include_for_find) { { :miq_server => {} } } let(:exp_includes) { { :host => {} } } it "adds include_for_find .references to the scope" do - method_args = [scope, klass, include_for_find, nil] + method_args = [scope, klass, include_for_find, nil, skip] resulting_scope = subject.send(:include_references, *method_args) expect(resulting_scope.references_values).to eq(["{:miq_server=>{}}", ""]) end it "adds exp_includes .references to the scope" do - method_args = [scope, klass, nil, exp_includes] + method_args = [scope, klass, nil, exp_includes, skip] resulting_scope = subject.send(:include_references, *method_args) expect(resulting_scope.references_values).to eq(["", "{:host=>{}}"]) end it "adds include_for_find and exp_includes .references to the scope" do - method_args = [scope, klass, include_for_find, exp_includes] + method_args = [scope, klass, include_for_find, exp_includes, skip] resulting_scope = subject.send(:include_references, *method_args) expect(resulting_scope.references_values).to eq(["{:miq_server=>{}}", "{:host=>{}}"]) @@ -1855,7 +1953,18 @@ def get_rbac_results_for_and_expect_objects(klass, expected_objects) let(:include_for_find) { { :resource => {} } } it "does not add .references to the scope" do - method_args = [scope, klass, include_for_find, nil] + method_args = [scope, klass, include_for_find, nil, skip] + resulting_scope = subject.send(:include_references, *method_args) + + expect(resulting_scope.references_values).to eq([]) + end + end + + context "when skip is passed as true" do + let(:skip) { true } + + it "does not add .references to the scope" do + method_args = [scope, klass, include_for_find, exp_includes, skip] resulting_scope = subject.send(:include_references, *method_args) expect(resulting_scope.references_values).to eq([]) diff --git a/spec/models/mixins/cloud_tenancy_mixin_spec.rb b/spec/models/mixins/cloud_tenancy_mixin_spec.rb index 349c7413577..9026ffe411d 100644 --- a/spec/models/mixins/cloud_tenancy_mixin_spec.rb +++ b/spec/models/mixins/cloud_tenancy_mixin_spec.rb @@ -16,10 +16,35 @@ it "finds correct tenant id clause for regular tenants" do expect(VmOrTemplate.tenant_id_clause(user)).to eql ["(vms.template = true AND vms.tenant_id IN (?)) OR (vms.template = false AND vms.tenant_id IN (?))", [default_tenant.id, tenant.id], [tenant.id]] + + query = VmOrTemplate.where(VmOrTemplate.tenant_id_clause(user)) + expect { query.load }.not_to raise_error end it "finds correct tenant id clause for cloud tenants" do expect(CloudVolume.tenant_id_clause(user)).to eql ["(tenants.id IN (?) AND ext_management_systems.tenant_mapping_enabled IS TRUE) OR ext_management_systems.tenant_mapping_enabled IS FALSE OR ext_management_systems.tenant_mapping_enabled IS NULL", [tenant.id]] + + query = CloudVolume.tenant_joins_clause(CloudVolume.all) + .where(CloudVolume.tenant_id_clause(user)) + expect { query.load }.not_to raise_error + end + + # Overwrites CloudTenancyMixin.tenant_joins_clause + it "finds correct tenant id clause for flavors" do + expect(Flavor.tenant_id_clause(user)).to eql ["(tenants.id IN (?) AND ext_management_systems.tenant_mapping_enabled IS TRUE) OR ext_management_systems.tenant_mapping_enabled IS FALSE OR ext_management_systems.tenant_mapping_enabled IS NULL", [tenant.id]] + + query = Flavor.tenant_joins_clause(Flavor.all) + .where(Flavor.tenant_id_clause(user)) + expect { query.load }.not_to raise_error + end + + # Overwrites CloudTenancyMixin.tenant_joins_clause + it "finds correct tenant id clause for cloud tenants" do + expect(CloudTenant.tenant_id_clause(user)).to eql ["(tenants.id IN (?) AND ext_management_systems.tenant_mapping_enabled IS TRUE) OR ext_management_systems.tenant_mapping_enabled IS FALSE OR ext_management_systems.tenant_mapping_enabled IS NULL", [tenant.id]] + + query = CloudTenant.tenant_joins_clause(CloudTenant.all) + .where(CloudTenant.tenant_id_clause(user)) + expect { query.load }.not_to raise_error end end end From fcc46332d7d8c329bd43d0ce258c2a3a76a81f48 Mon Sep 17 00:00:00 2001 From: Nick LaMuro Date: Fri, 23 Mar 2018 17:14:42 -0500 Subject: [PATCH 3/3] Skip references if sql EXPLAIN passes Currently, there are a lot of factors that determine whether or not a query can be executed without the `.references` call. ActiveRecord itself is even smart enough to figured it out and add it for you if you use the hash `.where` syntax: ContainerImage.includes(:containers => {}) .where(:containers => {:name => "foo"}) .to_sql So as a fail safe, if `skip` is passed to method `Rbac::Filterer#include_references`, then we can do an much cheaper explain to figure out if it is a valid query before deciding if skipping is a bad idea. This means 1 extra query to every RBac call search that is currently "skip-able" by our criteria, but they should hopefully be quick to execute and be a fail safe for what we miss. Note, doing a `scope.explain` will both execute the regular query and the explain, and since we have a lot of heavy Rbac calls, this would not be ideal. We work around this by calling a private API with `.to_sql` to only execute the EXPLAIN query, which still returns an error of `ActiveRecord::StatementInvalid` when it is malformed. In that same vain, a check if we are in a transaction is also required, and setting up a sub transaction SAVEPOINT (via `transaction(:requires_new => true)`)is necessary so we don't pollute an existing transaction in the process. More info on that here: https://stackoverflow.com/a/31146267/3574689 --- lib/rbac/filterer.rb | 28 ++++++++++++- spec/lib/rbac/filterer_spec.rb | 76 ++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 1 deletion(-) diff --git a/lib/rbac/filterer.rb b/lib/rbac/filterer.rb index 1d5d89dab50..2b3752354ae 100644 --- a/lib/rbac/filterer.rb +++ b/lib/rbac/filterer.rb @@ -307,7 +307,33 @@ def skip_references?(options, attrs) end def include_references(scope, klass, include_for_find, exp_includes, skip) - return scope if skip + if skip + # If we are in a transaction, we don't want to polute that + # transaction with a failed EXPLAIN. We use a SQL SAVEPOINT (which is + # created via `transaction(:requires_new => true)`) to prevent that + # from being an issue (happens in tests with transactional fixtures) + # + # See https://stackoverflow.com/a/31146267/3574689 + valid_skip = MiqDatabase.transaction(:requires_new => true) do + begin + ActiveRecord::Base.connection.explain(scope.to_sql) + rescue ActiveRecord::StatementInvalid => e + unless Rails.env.production? + warn "There was an issue with the Rbac filter without references!" + warn "Consider trying to fix this edge case in Rbac::Filterer! Error Below:" + warn e.message + warn e.backtrace + end + # returns nil + raise ActiveRecord::Rollback + end + end + # If the result of the transaction is non-nil, then the block was + # successful and didn't trigger the ActiveRecord::Rollback, so we can + # return the scope as is. + return scope if valid_skip + end + ref_includes = Hash(include_for_find).merge(Hash(exp_includes)) unless polymorphic_include?(klass, ref_includes) scope = scope.references(include_for_find).references(exp_includes) diff --git a/spec/lib/rbac/filterer_spec.rb b/spec/lib/rbac/filterer_spec.rb index cad0a703231..d4f73040f0d 100644 --- a/spec/lib/rbac/filterer_spec.rb +++ b/spec/lib/rbac/filterer_spec.rb @@ -361,6 +361,17 @@ def combine_filtered_ids(user_filtered_ids, belongsto_filtered_ids, managed_filt expect(subject).to receive(:include_references).with(anything, Vm, {:evm_owner => {}}, nil, true).and_call_original results end + + context "with a references based where_clause" do + let(:search_with_where) { include_search.merge(:where_clause => ['"users"."id" = ?', owner_user.id]) } + let(:results) { subject.search(search_with_where).first } + + it "will try to skip references to begin with" do + expect(subject).to receive(:include_references).with(anything, Vm, {:evm_owner => {}}, nil, true).and_call_original + expect(subject).to receive(:warn).exactly(4).times + results + end + end end end @@ -1969,6 +1980,71 @@ def get_rbac_results_for_and_expect_objects(klass, expected_objects) expect(resulting_scope.references_values).to eq([]) end + + context "when the scope is invalid without .references" do + let(:scope) { klass.where("hosts.name = 'foo'") } + let(:method_args) { [scope, klass, include_for_find, exp_includes, skip] } + let(:resulting_scope) { subject.send(:include_references, *method_args) } + + let(:explain_error_match) do + Regexp.new(Regexp.escape(<<~PG_ERR.chomp)) + PG::UndefinedTable: ERROR: missing FROM-clause entry for table "hosts" + LINE 1: EXPLAIN SELECT "vms".* FROM "vms" WHERE (hosts.name = 'foo') + ^ + : EXPLAIN SELECT "vms".* FROM "vms" WHERE (hosts.name = 'foo') + PG_ERR + end + + it "adds .references to the scope" do + allow(subject).to receive(:warn) + expect(resulting_scope.references_values).to eq(["{:miq_server=>{}}", "{:host=>{}}"]) + end + + it "warns that there was an issue in test mode" do + # This next couple of lines is just used to check that some of the + # backtrace that we are dumping into the logs is what we expect will + # for sure be there, and not try to match the entire trace. + # + # Does a bit of line addition to avoid this being too brittle and + # breaking easily, but expect it to break if you update + # Rbac::Filterer#include_references + method_file, method_line = subject.method(:include_references).source_location + explain_stacktrace_includes = [ + "#{method_file}:#{method_line + 10}:in `block in include_references'", + Thread.current.backtrace[1].gsub(/:\d*:/) { |sub| ":#{sub.tr(":", "").to_i + 7}:" } + ] + + expect(subject).to receive(:warn).with("There was an issue with the Rbac filter without references!").ordered + expect(subject).to receive(:warn).with("Consider trying to fix this edge case in Rbac::Filterer! Error Below:").ordered + expect(subject).to receive(:warn).with(explain_error_match).ordered + expect(subject).to receive(:warn).with(array_including(explain_stacktrace_includes)).ordered + resulting_scope + end + + it "warns that there was an issue in development mode" do + expect(Rails).to receive(:env).and_return(ActiveSupport::StringInquirer.new("developement")) + + # See above + method_file, method_line = subject.method(:include_references).source_location + explain_stacktrace_includes = [ + "#{method_file}:#{method_line + 10}:in `block in include_references'", + Thread.current.backtrace[1].gsub(/:\d*:/) { |sub| ":#{sub.tr(":", "").to_i + 7}:" } + ] + + expect(subject).to receive(:warn).with("There was an issue with the Rbac filter without references!").ordered + expect(subject).to receive(:warn).with("Consider trying to fix this edge case in Rbac::Filterer! Error Below:").ordered + expect(subject).to receive(:warn).with(explain_error_match).ordered + expect(subject).to receive(:warn).with(array_including(explain_stacktrace_includes)).ordered + resulting_scope + end + + it "does not warn that there was an issue in production mode" do + expect(Rails).to receive(:env).and_return(ActiveSupport::StringInquirer.new("production")) + + expect(subject).to receive(:warn).never + resulting_scope + end + end end end