From 4c99c65880e219bf299bf2c7f26d5d19c54d009a Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Tue, 23 May 2017 12:58:22 +0200 Subject: [PATCH 1/5] Expose a fixed_foreign_keys list Expose a fixed_foreign_keys list, givin us a way to assert referential integrity. This tool is vital for targeted refresh where we need to be sure, that the referential integrity will not be broken. --- .../manager_refresh/inventory_collection.rb | 30 +++++++++++++++++-- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/app/models/manager_refresh/inventory_collection.rb b/app/models/manager_refresh/inventory_collection.rb index d9bd52e429c..652c4e4e277 100644 --- a/app/models/manager_refresh/inventory_collection.rb +++ b/app/models/manager_refresh/inventory_collection.rb @@ -6,7 +6,7 @@ class InventoryCollection :internal_attributes, :delete_method, :data, :data_index, :dependency_attributes, :manager_ref, :association, :complete, :update_only, :transitive_dependency_attributes, :custom_manager_uuid, :custom_db_finder, :check_changed, :arel, :builder_params, :loaded_references, :db_data_index, - :inventory_object_attributes, :name + :inventory_object_attributes, :name, :manager_ref_allowed_nil delegate :each, :size, :to => :to_a @@ -204,7 +204,7 @@ class InventoryCollection # @param custom_db_finder [Proc] A custom way of getting the InventoryCollection out of the DB in a case of any DB # based strategy. This should be used in a case of complex query needed for e.g. targeted refresh or as an # optimization for :custom_manager_uuid. - + # # Example, we solve N+1 issue from Example as well as a selection used for # targeted refresh getting Hardware object from the DB instead of the API: # Having InventoryCollection.new({ @@ -285,11 +285,16 @@ class InventoryCollection # one. # @param name [Symbol] A unique name of the InventoryCollection under a Persister. If not provided, the :association # attribute is used. Providing either :name or :association is mandatory. + # @param manager_ref_allowed_nil [Array] Array of symbols having manager_ref columns, that are a foreign key an can + # be nil. Given the table are shared by many providers, it can happen, that the table is used only partially. + # Then it can happen we want to allow certain foreign keys to be nil, while being sure the referential + # integrity is not broken. Of course the DB Foreign Key can't be created in this case, so we should try to + # avoid this usecase by a proper modeling. def initialize(model_class: nil, manager_ref: nil, association: nil, parent: nil, strategy: nil, saved: nil, custom_save_block: nil, delete_method: nil, data_index: nil, data: nil, dependency_attributes: nil, attributes_blacklist: nil, attributes_whitelist: nil, complete: nil, update_only: nil, check_changed: nil, custom_manager_uuid: nil, custom_db_finder: nil, arel: nil, builder_params: {}, - inventory_object_attributes: nil, unique_index_columns: nil, name: nil) + inventory_object_attributes: nil, unique_index_columns: nil, name: nil, manager_ref_allowed_nil: nil) @model_class = model_class @manager_ref = manager_ref || [:ems_ref] @custom_manager_uuid = custom_manager_uuid @@ -312,6 +317,8 @@ def initialize(model_class: nil, manager_ref: nil, association: nil, parent: nil @unique_index_columns = unique_index_columns @name = name || association + @manager_ref_allowed_nil = manager_ref_allowed_nil || [] + raise "You have to pass either :name or :association argument to .new of #{self}" if @name.blank? @inventory_object_attributes = inventory_object_attributes @@ -684,6 +691,23 @@ def association_to_base_class_mapping end end + def foreign_keys + return [] unless model_class + + @foreign_keys_cache ||= belongs_to_associations.map { |x| x.foreign_key } + end + + def fixed_foreign_keys + # Foreign keys that are part of a manager_ref must be present, otherwise the record would get lost. This is a + # minimum check we can do to not break a referential integrity. + return @fixed_foreign_keys_cache unless @fixed_foreign_keys_cache.nil? + + manager_ref_set = (manager_ref - manager_ref_allowed_nil) + @fixed_foreign_keys_cache = manager_ref_set.map { |x| association_to_foreign_key_mapping[x] }.compact + @fixed_foreign_keys_cache += foreign_keys & manager_ref + @fixed_foreign_keys_cache + end + def base_class_name return "" unless model_class From 6ec42d12f1f538d4a0bed69b8a43fb4e2b9d71bd Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Tue, 23 May 2017 14:19:43 +0200 Subject: [PATCH 2/5] Extract distinct relation assertion to a base class Extract distinct relation assertion to a base class --- .../save_collection/saver/base.rb | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/app/models/manager_refresh/save_collection/saver/base.rb b/app/models/manager_refresh/save_collection/saver/base.rb index a7472bc9fe5..1f2955b78b6 100644 --- a/app/models/manager_refresh/save_collection/saver/base.rb +++ b/app/models/manager_refresh/save_collection/saver/base.rb @@ -7,6 +7,10 @@ class Base def initialize(inventory_collection) @inventory_collection = inventory_collection + + # Private attrs + @unique_index_keys = inventory_collection.manager_ref_to_cols + @unique_db_primary_keys = Set.new end def save_inventory_collection! @@ -19,6 +23,28 @@ def save_inventory_collection! save!(inventory_collection, association) end + + private + + attr_reader :unique_index_keys, :unique_db_primary_keys + + def assert_distinct_relation(record) + if unique_db_primary_keys.include?(record.id) # Include on Set is O(1) + # Change the InventoryCollection's :association or :arel parameter to return distinct results. The :through + # relations can return the same record multiple times. We don't want to do SELECT DISTINCT by default, since + # it can be very slow. + if Rails.env.production? + _log.warn("Please update :association or :arel for #{inventory_collection} to return a DISTINCT result. "\ + " The duplicate value is being ignored.") + return false + else + raise("Please update :association or :arel for #{inventory_collection} to return a DISTINCT result. ") + end + else + unique_db_primary_keys << record.id + end + true + end end end end From 4efa87a52873120fe5da6fb14994aa44723ff26c Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Tue, 23 May 2017 14:22:13 +0200 Subject: [PATCH 3/5] Extract referential integrity assertion to a base class Extract referential integrity assertion to a base class --- .../manager_refresh/save_collection/saver/base.rb | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/app/models/manager_refresh/save_collection/saver/base.rb b/app/models/manager_refresh/save_collection/saver/base.rb index 1f2955b78b6..7d04116c014 100644 --- a/app/models/manager_refresh/save_collection/saver/base.rb +++ b/app/models/manager_refresh/save_collection/saver/base.rb @@ -45,6 +45,18 @@ def assert_distinct_relation(record) end true end + + def assert_referential_integrity(hash, inventory_object) + inventory_object.inventory_collection.fixed_foreign_keys.each do |x| + if hash[x.to_s].blank? + _log.info("Ignoring #{inventory_object} because of missing foreign key #{x} for "\ + "#{inventory_object.inventory_collection.parent.class.name}:"\ + "#{inventory_object.inventory_collection.parent.id}") + return false + end + end + true + end end end end From 0b78eca6033a3c400e4df0bd3e185e25acc9da8f Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Tue, 23 May 2017 16:02:08 +0200 Subject: [PATCH 4/5] Use base class assert_distinct_relation Use base class assert_distinct_relation --- .../save_collection/saver/default.rb | 26 ++++++------------- 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/app/models/manager_refresh/save_collection/saver/default.rb b/app/models/manager_refresh/save_collection/saver/default.rb index 3788056525f..5e597d0063b 100644 --- a/app/models/manager_refresh/save_collection/saver/default.rb +++ b/app/models/manager_refresh/save_collection/saver/default.rb @@ -14,9 +14,7 @@ def save!(inventory_collection, association) inventory_objects_index[index] = inventory_object end - unique_index_keys = inventory_collection.manager_ref_to_cols unique_db_indexes = Set.new - unique_db_primary_keys = Set.new inventory_collection_size = inventory_collection.size deleted_counter = 0 @@ -25,27 +23,19 @@ def save!(inventory_collection, association) # Records that are in the DB, we will be updating or deleting them. ActiveRecord::Base.transaction do association.find_each do |record| + next unless assert_distinct_relation(record) + index = inventory_collection.object_index_with_keys(unique_index_keys, record) - if unique_db_primary_keys.include?(record.id) # Include on Set is O(1) - # Change the InventoryCollection's :association or :arel parameter to return distinct results. The :through - # relations can return the same record multiple times. We don't want to do SELECT DISTINCT by default, since - # it can be very slow. - if Rails.env.production? - _log.warn("Please update :association or :arel for #{inventory_collection} to return a DISTINCT result. "\ - " The duplicate value is being ignored.") - next - else - raise("Please update :association or :arel for #{inventory_collection} to return a DISTINCT result. ") - end - elsif unique_db_indexes.include?(index) # Include on Set is O(1) + + # TODO(lsmola) can go away once we indexed our DB with unique indexes + if unique_db_indexes.include?(index) # Include on Set is O(1) # We have a duplicate in the DB, destroy it. A find_each method does automatically .order(:id => :asc) # so we always keep the oldest record in the case of duplicates. - _log.warn("A duplicate record was detected and destroyed, inventory_collection: '#{inventory_collection}', "\ - "record: '#{record}', duplicate_index: '#{index}'") + _log.warn("A duplicate record was detected and destroyed, inventory_collection: "\ + "'#{inventory_collection}', record: '#{record}', duplicate_index: '#{index}'") record.destroy else unique_db_indexes << index - unique_db_primary_keys << record.id end inventory_object = inventory_objects_index.delete(index) @@ -73,7 +63,7 @@ def save!(inventory_collection, association) end end _log.info("*************** PROCESSED #{inventory_collection}, created=#{created_counter}, "\ - "updated=#{inventory_collection_size - created_counter}, deleted=#{deleted_counter} *************") + "updated=#{inventory_collection_size - created_counter}, deleted=#{deleted_counter} *************") end def delete_record!(inventory_collection, record) From bcab13be99b017f81cf2eb1feb07ae0c38c74084 Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Tue, 23 May 2017 16:03:06 +0200 Subject: [PATCH 5/5] Use base class assert_referential_integrity USe base class assert_referential_integrity --- app/models/manager_refresh/save_collection/saver/default.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/models/manager_refresh/save_collection/saver/default.rb b/app/models/manager_refresh/save_collection/saver/default.rb index 5e597d0063b..3362767e928 100644 --- a/app/models/manager_refresh/save_collection/saver/default.rb +++ b/app/models/manager_refresh/save_collection/saver/default.rb @@ -80,6 +80,8 @@ def update_record!(inventory_collection, record, hash, inventory_object) end def create_record!(inventory_collection, hash, inventory_object) + return unless assert_referential_integrity(hash, inventory_object) + record = inventory_collection.model_class.create!(hash.except(:id)) inventory_object.id = record.id