From 4b4d10605b888756d3044d5e3cb030358df3be28 Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Tue, 7 Aug 2018 16:29:05 +0200 Subject: [PATCH 01/28] Allow skeletal_index to be serializable Allow skeletal_index to be serializable --- app/models/manageiq/providers/inventory/persister.rb | 5 ++++- app/models/manager_refresh/inventory_collection.rb | 1 + .../manager_refresh/inventory_collection/data_storage.rb | 9 +++++++++ .../inventory_collection/index/type/base.rb | 5 +++++ .../inventory_collection/serialization.rb | 8 +++++++- 5 files changed, 26 insertions(+), 2 deletions(-) diff --git a/app/models/manageiq/providers/inventory/persister.rb b/app/models/manageiq/providers/inventory/persister.rb index f2b01248cb5..92f2db107b1 100644 --- a/app/models/manageiq/providers/inventory/persister.rb +++ b/app/models/manageiq/providers/inventory/persister.rb @@ -94,7 +94,10 @@ def initialize_inventory_collections # @return [Hash] entire Persister object serialized to hash def to_hash collections_data = collections.map do |_, collection| - next if collection.data.blank? && collection.targeted_scope.primary_references.blank? && collection.all_manager_uuids.nil? + next if collection.data.blank? && + collection.targeted_scope.primary_references.blank? && + collection.all_manager_uuids.nil? && + collection.skeletal_primary_index.index_data.blank? collection.to_hash end.compact diff --git a/app/models/manager_refresh/inventory_collection.rb b/app/models/manager_refresh/inventory_collection.rb index 5e1620dc601..233573c8571 100644 --- a/app/models/manager_refresh/inventory_collection.rb +++ b/app/models/manager_refresh/inventory_collection.rb @@ -82,6 +82,7 @@ class InventoryCollection delegate :<<, :build, + :build_partial, :data, :each, :find_or_build, diff --git a/app/models/manager_refresh/inventory_collection/data_storage.rb b/app/models/manager_refresh/inventory_collection/data_storage.rb index 14bacf7056c..19d305151db 100644 --- a/app/models/manager_refresh/inventory_collection/data_storage.rb +++ b/app/models/manager_refresh/inventory_collection/data_storage.rb @@ -106,6 +106,15 @@ def build(hash) inventory_object end + # Finds of builds a new InventoryObject with incomplete data. + # + # @param hash [Hash] Hash that needs to contain attributes defined in :manager_ref of the + # InventoryCollection + # @return [ManagerRefresh::InventoryObject] Found or built InventoryObject object + def build_partial(hash) + skeletal_primary_index.build(hash) + end + # Returns array of built InventoryObject objects # # @return [Array] Array of built InventoryObject objects diff --git a/app/models/manager_refresh/inventory_collection/index/type/base.rb b/app/models/manager_refresh/inventory_collection/index/type/base.rb index 9b9f3b659ab..6b1a7438298 100644 --- a/app/models/manager_refresh/inventory_collection/index/type/base.rb +++ b/app/models/manager_refresh/inventory_collection/index/type/base.rb @@ -35,6 +35,11 @@ def reindex! end end + # @return [Array] Returns index data + def index_data + index.values + end + # Find value based on index_value # # @param _index_value [String] a index_value of the InventoryObject we search for diff --git a/app/models/manager_refresh/inventory_collection/serialization.rb b/app/models/manager_refresh/inventory_collection/serialization.rb index d3fc99d5df1..7efd77acdf6 100644 --- a/app/models/manager_refresh/inventory_collection/serialization.rb +++ b/app/models/manager_refresh/inventory_collection/serialization.rb @@ -8,6 +8,7 @@ class Serialization :inventory_object_lazy?, :inventory_object?, :name, + :skeletal_primary_index, :to => :inventory_collection attr_reader :inventory_collection @@ -30,6 +31,10 @@ def from_hash(inventory_objects_data, available_inventory_collections) build(hash_to_data(inventory_object_data, available_inventory_collections).symbolize_keys!) end + inventory_objects_data['partial_data'].each do |inventory_object_data| + skeletal_primary_index.build(hash_to_data(inventory_object_data, available_inventory_collections).symbolize_keys!) + end + # TODO(lsmola) add support for all_manager_uuids serialization # self.all_manager_uuids = inventory_objects_data['all_manager_uuids'] end @@ -43,7 +48,8 @@ def to_hash # TODO(lsmola) we do not support nested references here, should we? :manager_uuids => targeted_scope.primary_references.values.map(&:full_reference), :all_manager_uuids => all_manager_uuids, - :data => data.map { |x| data_to_hash(x.data) } + :data => data.map { |x| data_to_hash(x.data) }, + :partial_data => skeletal_primary_index.index_data.map { |x| data_to_hash(x.data) }, } end From 40d10831eecc8dda44e3000c5b38a4c8a3752d4e Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Tue, 7 Aug 2018 16:30:48 +0200 Subject: [PATCH 02/28] Use :timestamp for the full row parallel update check Use :timestamp for the full row parallel update check --- .../save_collection/saver/base.rb | 2 +- .../save_collection/saver/sql_helper.rb | 38 +++++++++++-------- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/app/models/manager_refresh/save_collection/saver/base.rb b/app/models/manager_refresh/save_collection/saver/base.rb index 8965f8c3ba2..be075685016 100644 --- a/app/models/manager_refresh/save_collection/saver/base.rb +++ b/app/models/manager_refresh/save_collection/saver/base.rb @@ -377,7 +377,7 @@ def serializable_keys? # @return [Boolean] true if the model_class has remote_data_timestamp column def supports_remote_data_timestamp?(all_attribute_keys) - all_attribute_keys.include?(:remote_data_timestamp) # include? on Set is O(1) + all_attribute_keys.include?(:timestamp) # include? on Set is O(1) end end end diff --git a/app/models/manager_refresh/save_collection/saver/sql_helper.rb b/app/models/manager_refresh/save_collection/saver/sql_helper.rb index 28cba2d664a..ac4b6e9fc08 100644 --- a/app/models/manager_refresh/save_collection/saver/sql_helper.rb +++ b/app/models/manager_refresh/save_collection/saver/sql_helper.rb @@ -17,6 +17,8 @@ def build_insert_set_cols(key) # @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values # are :do_update, :do_nothing, nil def build_insert_query(all_attribute_keys, hashes, on_conflict: nil) + _log.debug("Building insert query for #{inventory_collection} of size #{inventory_collection.size}...") + # Cache the connection for the batch connection = get_connection @@ -48,27 +50,29 @@ def build_insert_query(all_attribute_keys, hashes, on_conflict: nil) UPDATE SET #{all_attribute_keys_array.map { |key| build_insert_set_cols(key) }.join(", ")} } - end - end - - # TODO(lsmola) do we want to exclude the ems_id from the UPDATE clause? Otherwise it might be difficult to change - # the ems_id as a cross manager migration, since ems_id should be there as part of the insert. The attempt of - # changing ems_id could lead to putting it back by a refresh. - # TODO(lsmola) should we add :deleted => false to the update clause? That should handle a reconnect, without a - # a need to list :deleted anywhere in the parser. We just need to check that a model has the :deleted attribute - # This conditional will avoid rewriting new data by old data. But we want it only when remote_data_timestamp is a - # part of the data, since for the fake records, we just want to update ems_ref. - if supports_remote_data_timestamp?(all_attribute_keys) - insert_query += %{ - WHERE EXCLUDED.remote_data_timestamp IS NULL OR (EXCLUDED.remote_data_timestamp > #{table_name}.remote_data_timestamp) - } + # TODO(lsmola) do we want to exclude the ems_id from the UPDATE clause? Otherwise it might be difficult to change + # the ems_id as a cross manager migration, since ems_id should be there as part of the insert. The attempt of + # changing ems_id could lead to putting it back by a refresh. + # TODO(lsmola) should we add :deleted => false to the update clause? That should handle a reconnect, without a + # a need to list :deleted anywhere in the parser. We just need to check that a model has the :deleted attribute + + # This conditional will avoid rewriting new data by old data. But we want it only when remote_data_timestamp is a + # part of the data, since for the fake records, we just want to update ems_ref. + if supports_remote_data_timestamp?(all_attribute_keys) + insert_query += %{ + WHERE EXCLUDED.timestamp IS NULL OR (EXCLUDED.timestamp > #{table_name}.timestamp) + } + end + end end insert_query += %{ RETURNING "id",#{unique_index_columns.map { |x| quote_column_name(x) }.join(",")} } + _log.debug("Building insert query for #{inventory_collection} of size #{inventory_collection.size}...Complete") + insert_query end @@ -97,6 +101,7 @@ def get_connection # @param all_attribute_keys [Array] Array of all columns we will be saving into each table row # @param hashes [Array] data used for building a batch update sql query def build_update_query(all_attribute_keys, hashes) + _log.debug("Building update query for #{inventory_collection} of size #{inventory_collection.size}...") # Cache the connection for the batch connection = get_connection @@ -127,9 +132,12 @@ def build_update_query(all_attribute_keys, hashes) # part of the data, since for the fake records, we just want to update ems_ref. if supports_remote_data_timestamp?(all_attribute_keys) update_query += %{ - AND (updated_values.remote_data_timestamp IS NULL OR (updated_values.remote_data_timestamp > #{table_name}.remote_data_timestamp)) + AND (updated_values.timestamp IS NULL OR (updated_values.timestamp > #{table_name}.timestamp)) } end + + _log.debug("Building update query for #{inventory_collection} of size #{inventory_collection.size}...Complete") + update_query end From 1fb4b97ef5c6c9a7da1e52bbc4659a020dd6cd8c Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Tue, 7 Aug 2018 16:32:02 +0200 Subject: [PATCH 03/28] Allow skeletal_index objects to be updated Allow skeletal_index objects to be updated --- .../index/type/skeletal.rb | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb b/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb index 34441016ce9..99ea69ac591 100644 --- a/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb +++ b/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb @@ -36,21 +36,30 @@ def delete(index_value) index.delete(index_value) end - # Builds index record with skeletal InventoryObject and returns it, or returns nil if it's already present - # in primary_index or skeletal_primary_index + # Builds index record with skeletal InventoryObject and returns it. Or it returns existing InventoryObject + # that is found in primary_index or skeletal_primary_index. # # @param attributes [Hash] Skeletal data of the index, must contain unique index keys and everything else # needed for creating the record in the Database - # @return [InventoryObject|nil] Returns built value or nil + # @return [InventoryObject] Returns built InventoryObject or existing InventoryObject with new attributes + # assigned def build(attributes) attributes = {}.merge!(default_values).merge!(attributes) # If the primary index is already filled, we don't want populate skeletal index uuid = ::ManagerRefresh::InventoryCollection::Reference.build_stringified_reference(attributes, named_ref) - return if primary_index.find(uuid) + if (inventory_object = primary_index.find(uuid)) + # TODO(lsmola) add timestamp check? If timestamps are present, we should assign the data, only if they + # have newer timestamp + return inventory_object.assign_attributes(attributes) + end # Return if skeletal index already exists - return if index[uuid] + if (inventory_object = index[uuid]) + # TODO(lsmola) add timestamp check? If timestamps are present, we should assign the data, only if they + # have newer timestamp + return inventory_object.assign_attributes(attributes) + end # We want to populate a new skeletal index inventory_object = new_inventory_object(attributes) From 1897df2efbff622e931d258829e5e41bafcf9c4b Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Wed, 8 Aug 2018 18:19:14 +0200 Subject: [PATCH 04/28] Expose data iterator into skelatal index Expose data iterator into skelatal index --- .../manager_refresh/inventory_collection/index/type/skeletal.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb b/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb index 99ea69ac591..92bf2f9f90d 100644 --- a/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb +++ b/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb @@ -18,6 +18,7 @@ def initialize(inventory_collection, index_name, attribute_names, primary_index) delegate :blank?, :each, + :each_value, :to => :index # Find value based on index_value From 1a2415e44f0bb37d5735458a587bf0fb00d8e70e Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Wed, 8 Aug 2018 18:20:43 +0200 Subject: [PATCH 05/28] A basic version of partial updates A basic version of partial updates. Merged with skeletal precreate. The performance is not the best, but it is the base for passing specs. --- .../saver/concurrent_safe_batch.rb | 42 ++++++++++++---- .../save_collection/saver/sql_helper.rb | 48 +++++++++++++++++-- 2 files changed, 77 insertions(+), 13 deletions(-) diff --git a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb index b6c76e196e2..98526f4c6a8 100644 --- a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb +++ b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb @@ -129,6 +129,21 @@ def save!(association) skeletal_inventory_objects_index[index] = inventory_object end + base_columns = unique_index_columns + [:created_on, :updated_on, :timestamp, :type] + columns_for_per_column_batches = all_attribute_keys - base_columns + columns_for_per_column_batches.each do |column_name| + filtered = skeletal_inventory_objects_index.select { |_, v| v.data.key?(column_name) } + + filtered.each_slice(batch_size_for_persisting) do |batch| + create_records!(base_columns + [column_name], + batch, + skeletal_attributes_index, + :on_conflict => :do_update, + :mode => :partial, + :column_name => column_name) + end + end + skeletal_inventory_objects_index.each_slice(batch_size_for_persisting) do |batch| create_records!(all_attribute_keys, batch, skeletal_attributes_index, :on_conflict => :do_nothing) end @@ -293,20 +308,22 @@ def update_records!(all_attribute_keys, hashes) # models's table # @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values # are :do_update, :do_nothing, nil - def create_records!(all_attribute_keys, batch, attributes_index, on_conflict: nil) + # @param on_conflict [Symbol] Mode for saving, allowed values are [:full, :partial], :full is when we save all + # columns of a row, :partial is when we save only few columns, so a partial row. + def create_records!(all_attribute_keys, batch, attributes_index, on_conflict: nil, mode: :full, column_name: nil) indexed_inventory_objects = {} hashes = [] create_time = time_now batch.each do |index, inventory_object| hash = if inventory_collection.use_ar_object? - record = inventory_collection.model_class.new(attributes_index.delete(index)) + record = inventory_collection.model_class.new(attributes_index[index]) values_for_database!(all_attribute_keys, record.attributes.symbolize_keys) elsif serializable_keys? values_for_database!(all_attribute_keys, - attributes_index.delete(index)) + attributes_index[index]) else - attributes_index.delete(index) + attributes_index[index] end assign_attributes_for_create!(hash, create_time) @@ -321,12 +338,19 @@ def create_records!(all_attribute_keys, batch, attributes_index, on_conflict: ni return if hashes.blank? result = get_connection.execute( - build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict) + build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => mode, :column_name => column_name) ) + # TODO(lsmola) we need to recognize what records were created and what records were updated. Will we take + # skeletal records as created? The creation should be having :complete => true inventory_collection.store_created_records(result) - if inventory_collection.dependees.present? + if inventory_collection.dependees.present? && mode == :full # We need to get primary keys of the created objects, but only if there are dependees that would use them - map_ids_to_inventory_objects(indexed_inventory_objects, all_attribute_keys, hashes, result, :on_conflict => on_conflict) + map_ids_to_inventory_objects(indexed_inventory_objects, + all_attribute_keys, + hashes, + result, + :on_conflict => on_conflict, + :mode => mode) end end @@ -340,8 +364,10 @@ def create_records!(all_attribute_keys, batch, attributes_index, on_conflict: ni # contains a primary key_value plus all columns that are a part of the unique index # @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values # are :do_update, :do_nothing, nil - def map_ids_to_inventory_objects(indexed_inventory_objects, all_attribute_keys, hashes, result, on_conflict:) + def map_ids_to_inventory_objects(indexed_inventory_objects, all_attribute_keys, hashes, result, on_conflict:, mode:) if on_conflict == :do_nothing + # TODO(lsmola) is the comment below still accurate? We will update some partial rows, the actual skeletal + # precreate will still do nothing. # For ON CONFLICT DO NOTHING, we need to always fetch the records plus the attribute_references. This path # applies only for skeletal precreate. inventory_collection.model_class.where( diff --git a/app/models/manager_refresh/save_collection/saver/sql_helper.rb b/app/models/manager_refresh/save_collection/saver/sql_helper.rb index ac4b6e9fc08..f77ddbc8916 100644 --- a/app/models/manager_refresh/save_collection/saver/sql_helper.rb +++ b/app/models/manager_refresh/save_collection/saver/sql_helper.rb @@ -16,12 +16,18 @@ def build_insert_set_cols(key) # @param hashes [Array] data used for building a batch insert sql query # @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values # are :do_update, :do_nothing, nil - def build_insert_query(all_attribute_keys, hashes, on_conflict: nil) + def build_insert_query(all_attribute_keys, hashes, on_conflict: nil, mode:, column_name: nil) _log.debug("Building insert query for #{inventory_collection} of size #{inventory_collection.size}...") # Cache the connection for the batch connection = get_connection + ignore_cols = if mode == :partial + [:timestamp] + elsif mode == :full + [] + end + # Make sure we don't send a primary_key for INSERT in any form, it could break PG sequencer all_attribute_keys_array = all_attribute_keys.to_a - [primary_key.to_s, primary_key.to_sym] values = hashes.map do |hash| @@ -48,7 +54,6 @@ def build_insert_query(all_attribute_keys, hashes, on_conflict: nil) ON CONFLICT (#{unique_index_columns.map { |x| quote_column_name(x) }.join(",")}) #{where_to_sql} DO UPDATE - SET #{all_attribute_keys_array.map { |key| build_insert_set_cols(key) }.join(", ")} } # TODO(lsmola) do we want to exclude the ems_id from the UPDATE clause? Otherwise it might be difficult to change @@ -59,10 +64,40 @@ def build_insert_query(all_attribute_keys, hashes, on_conflict: nil) # This conditional will avoid rewriting new data by old data. But we want it only when remote_data_timestamp is a # part of the data, since for the fake records, we just want to update ems_ref. - if supports_remote_data_timestamp?(all_attribute_keys) + if mode == :full + insert_query += %{ + SET #{all_attribute_keys_array.map { |key| build_insert_set_cols(key) }.join(", ")} + } + if supports_remote_data_timestamp?(all_attribute_keys) + insert_query += %{ + WHERE EXCLUDED.timestamp IS NULL OR ( + EXCLUDED.timestamp > #{table_name}.timestamp AND + EXCLUDED.timestamp >= #{table_name}.timestamps_max + ) + } + end + elsif mode == :partial insert_query += %{ - WHERE EXCLUDED.timestamp IS NULL OR (EXCLUDED.timestamp > #{table_name}.timestamp) + SET #{(all_attribute_keys_array - ignore_cols).map { |key| build_insert_set_cols(key) }.join(", ")} } + if supports_remote_data_timestamp?(all_attribute_keys) + # TODO(lsmola) we should have EXCLUDED.timestamp > #{table_name}.timestamp, but if skeletal precreate + # creates the row, it sets timestamp. Should we combine it with complete => true only? We probably need + # to set the timestamp, otherwise we can't touch it in the update clause. Maybe we coud set it as + # timestamps_max? + + + insert_query += %{ + , timestamps = #{table_name}.timestamps || ('{"#{column_name}": "' || EXCLUDED.timestamp::timestamp || '"}')::jsonb + , timestamps_max = greatest(#{table_name}.timestamps_max::timestamp, EXCLUDED.timestamp::timestamp) + WHERE EXCLUDED.timestamp IS NULL OR ( + EXCLUDED.timestamp >= #{table_name}.timestamp AND ( + (#{table_name}.timestamps->>'#{column_name}')::timestamp IS NULL OR + EXCLUDED.timestamp > (#{table_name}.timestamps->>'#{column_name}')::timestamp + ) + ) + } + end end end end @@ -132,7 +167,10 @@ def build_update_query(all_attribute_keys, hashes) # part of the data, since for the fake records, we just want to update ems_ref. if supports_remote_data_timestamp?(all_attribute_keys) update_query += %{ - AND (updated_values.timestamp IS NULL OR (updated_values.timestamp > #{table_name}.timestamp)) + AND (updated_values.timestamp IS NULL OR ( + updated_values.timestamp > #{table_name}.timestamp) AND + updated_values.timestamp >= #{table_name}.timestamps_max + ) } end From 5afa339b3e8daa589781506692d16bebcf19dc54 Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Thu, 9 Aug 2018 12:02:45 +0200 Subject: [PATCH 06/28] Saving partial after full plus specs passing Saving partial after full plus specs passing --- .../save_collection/saver/base.rb | 33 ++++- .../saver/concurrent_safe_batch.rb | 130 +++++++++++++----- .../save_collection/saver/sql_helper.rb | 28 ++-- 3 files changed, 143 insertions(+), 48 deletions(-) diff --git a/app/models/manager_refresh/save_collection/saver/base.rb b/app/models/manager_refresh/save_collection/saver/base.rb index be075685016..831e8085a96 100644 --- a/app/models/manager_refresh/save_collection/saver/base.rb +++ b/app/models/manager_refresh/save_collection/saver/base.rb @@ -51,6 +51,7 @@ def initialize(inventory_collection) @deserializable_keys[key.to_sym] = attribute_type elsif attribute_type.respond_to?(:coder) || attribute_type.type == :int4range || + attribute_type.type == :jsonb || pg_type == "text[]" || pg_type == "character varying[]" # Identify columns that needs to be encoded by type.serialize(value), it's a costy operations so lets do @@ -97,6 +98,19 @@ def values_for_database!(all_attribute_keys, attributes) attributes end + def transform_to_hash!(all_attribute_keys, hash) + if inventory_collection.use_ar_object? + record = inventory_collection.model_class.new(hash) + values_for_database!(all_attribute_keys, + record.attributes.symbolize_keys) + elsif serializable_keys? + values_for_database!(all_attribute_keys, + hash) + else + hash + end + end + private attr_reader :unique_index_keys, :unique_index_keys_to_s, :select_keys, :unique_db_primary_keys, :unique_db_indexes, @@ -290,6 +304,18 @@ def assign_attributes_for_create!(hash, create_time) assign_attributes_for_update!(hash, create_time) end + def internal_columns + return @internal_columns if @internal_columns + + @internal_columns = [] + @internal_columns << :type if supports_sti? + @internal_columns << :created_on if supports_created_on? + @internal_columns << :created_at if supports_created_at? + @internal_columns << :updated_on if supports_updated_on? + @internal_columns << :updated_at if supports_updated_at? + @internal_columns + end + # @return [Array] array of all unique indexes known to model def unique_indexes @unique_indexes_cache if @unique_indexes_cache @@ -375,10 +401,15 @@ def serializable_keys? @serializable_keys_bool_cache ||= serializable_keys.present? end - # @return [Boolean] true if the model_class has remote_data_timestamp column + # @return [Boolean] true if the model_class has timestamp column def supports_remote_data_timestamp?(all_attribute_keys) all_attribute_keys.include?(:timestamp) # include? on Set is O(1) end + + # @return [Boolean] true if the model_class has remote_data_timestamp column + def supports_max_timestamp? + @supports_max_timestamp_cache ||= model_class.column_names.include?("timestamps_max") + end end end end diff --git a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb index 98526f4c6a8..2d368de052f 100644 --- a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb +++ b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb @@ -117,36 +117,7 @@ def save!(association) attributes_index = nil if inventory_collection.parallel_safe? - # We will create also remaining skeletal records - skeletal_attributes_index = {} - skeletal_inventory_objects_index = {} - - inventory_collection.skeletal_primary_index.each_value do |inventory_object| - attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys) - index = build_stringified_reference(attributes, unique_index_keys) - - skeletal_attributes_index[index] = attributes - skeletal_inventory_objects_index[index] = inventory_object - end - - base_columns = unique_index_columns + [:created_on, :updated_on, :timestamp, :type] - columns_for_per_column_batches = all_attribute_keys - base_columns - columns_for_per_column_batches.each do |column_name| - filtered = skeletal_inventory_objects_index.select { |_, v| v.data.key?(column_name) } - - filtered.each_slice(batch_size_for_persisting) do |batch| - create_records!(base_columns + [column_name], - batch, - skeletal_attributes_index, - :on_conflict => :do_update, - :mode => :partial, - :column_name => column_name) - end - end - - skeletal_inventory_objects_index.each_slice(batch_size_for_persisting) do |batch| - create_records!(all_attribute_keys, batch, skeletal_attributes_index, :on_conflict => :do_nothing) - end + create_or_update_partial_records(all_attribute_keys) end end _log.debug("Processing #{inventory_collection}, "\ @@ -298,6 +269,92 @@ def update_records!(all_attribute_keys, hashes) get_connection.execute(query) end + + def create_or_update_partial_records(all_attribute_keys) + # We will create also remaining skeletal records + skeletal_attributes_index = {} + skeletal_inventory_objects_index = {} + + inventory_collection.skeletal_primary_index.each_value do |inventory_object| + attributes = inventory_object.attributes_with_keys(inventory_collection, all_attribute_keys) + index = build_stringified_reference(attributes, unique_index_keys) + + skeletal_attributes_index[index] = attributes + skeletal_inventory_objects_index[index] = inventory_object + end + + + base_columns = unique_index_columns + internal_columns + [:timestamps_max] + columns_for_per_column_batches = all_attribute_keys - base_columns + + # TODO(lsmola) needs to have supports timestamp or resource version + all_attribute_keys << :timestamps + all_attribute_keys << :timestamps_max + + indexed_inventory_objects = {} + hashes = [] + create_time = time_now + + skeletal_inventory_objects_index.each do |index, inventory_object| + hash = skeletal_attributes_index.delete(index) + # Partial create or update nevers sets a timestamp for the whole row + hash[:timestamps_max] = hash.delete(:timestamp) + # TODO(lsmola) have to filter this by keys, so we set timestamp only of cols that were passed, add spec + hash[:timestamps] = columns_for_per_column_batches.map { |x| [x, hash[:timestamps_max]] }.to_h + # Transform hash to DB format + hash = transform_to_hash!(all_attribute_keys, hash) + + assign_attributes_for_create!(hash, create_time) + + next unless assert_referential_integrity(hash) + + hashes << hash + # Index on Unique Columns values, so we can easily fill in the :id later + indexed_inventory_objects[unique_index_columns.map { |x| hash[x] }] = inventory_object + end + + return if hashes.blank? + + # First, lets try to create all partial records + hashes.each_slice(batch_size_for_persisting) do |batch| + create_partial!(all_attribute_keys, + batch, + :on_conflict => :do_nothing) + end + + # TODO(lsmola) we don't need to process rows that were save by the create -> oncoflict do nothing + (columns_for_per_column_batches - [:timestamp]).each do |column_name| + # TODO(lsmola) we need to move here the hash loading ar object etc. otherwise the key? is not correct + # and we don't want to do the map_ids + filtered = hashes.select { |x| x.key?(column_name) } + + filtered.each_slice(batch_size_for_persisting) do |batch| + create_partial!(base_columns + [column_name], + batch, + :on_conflict => :do_update, + :column_name => column_name) + end + end + + # TODO(lsmola) we need to recognize what records were created and what records were updated. Will we take + # skeletal records as created? The creation should be having :complete => true + # inventory_collection.store_created_records(result) + if inventory_collection.dependees.present? + # We need to get primary keys of the created objects, but only if there are dependees that would use them + map_ids_to_inventory_objects(indexed_inventory_objects, + all_attribute_keys, + hashes, + nil, + :on_conflict => :do_nothing) + end + end + + def create_partial!(all_attribute_keys, hashes, on_conflict: nil, column_name: nil) + result = get_connection.execute( + build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => :partial, :column_name => column_name) + ) + end + # Batch inserts records using attributes_index data. With on_conflict option using :do_update, this method # does atomic upsert. # @@ -308,9 +365,7 @@ def update_records!(all_attribute_keys, hashes) # models's table # @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values # are :do_update, :do_nothing, nil - # @param on_conflict [Symbol] Mode for saving, allowed values are [:full, :partial], :full is when we save all - # columns of a row, :partial is when we save only few columns, so a partial row. - def create_records!(all_attribute_keys, batch, attributes_index, on_conflict: nil, mode: :full, column_name: nil) + def create_records!(all_attribute_keys, batch, attributes_index, on_conflict: nil, column_name: nil) indexed_inventory_objects = {} hashes = [] create_time = time_now @@ -338,19 +393,18 @@ def create_records!(all_attribute_keys, batch, attributes_index, on_conflict: ni return if hashes.blank? result = get_connection.execute( - build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => mode, :column_name => column_name) + build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => :full, :column_name => column_name) ) # TODO(lsmola) we need to recognize what records were created and what records were updated. Will we take # skeletal records as created? The creation should be having :complete => true inventory_collection.store_created_records(result) - if inventory_collection.dependees.present? && mode == :full + if inventory_collection.dependees.present? # We need to get primary keys of the created objects, but only if there are dependees that would use them map_ids_to_inventory_objects(indexed_inventory_objects, all_attribute_keys, hashes, result, - :on_conflict => on_conflict, - :mode => mode) + :on_conflict => on_conflict) end end @@ -364,7 +418,7 @@ def create_records!(all_attribute_keys, batch, attributes_index, on_conflict: ni # contains a primary key_value plus all columns that are a part of the unique index # @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values # are :do_update, :do_nothing, nil - def map_ids_to_inventory_objects(indexed_inventory_objects, all_attribute_keys, hashes, result, on_conflict:, mode:) + def map_ids_to_inventory_objects(indexed_inventory_objects, all_attribute_keys, hashes, result, on_conflict:) if on_conflict == :do_nothing # TODO(lsmola) is the comment below still accurate? We will update some partial rows, the actual skeletal # precreate will still do nothing. diff --git a/app/models/manager_refresh/save_collection/saver/sql_helper.rb b/app/models/manager_refresh/save_collection/saver/sql_helper.rb index f77ddbc8916..fc59e67bd09 100644 --- a/app/models/manager_refresh/save_collection/saver/sql_helper.rb +++ b/app/models/manager_refresh/save_collection/saver/sql_helper.rb @@ -14,6 +14,8 @@ def build_insert_set_cols(key) # @param all_attribute_keys [Array] Array of all columns we will be saving into each table row # @param hashes [Array] data used for building a batch insert sql query + # @param mode [Symbol] Mode for saving, allowed values are [:full, :partial], :full is when we save all + # columns of a row, :partial is when we save only few columns, so a partial row. # @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values # are :do_update, :do_nothing, nil def build_insert_query(all_attribute_keys, hashes, on_conflict: nil, mode:, column_name: nil) @@ -29,7 +31,7 @@ def build_insert_query(all_attribute_keys, hashes, on_conflict: nil, mode:, colu end # Make sure we don't send a primary_key for INSERT in any form, it could break PG sequencer - all_attribute_keys_array = all_attribute_keys.to_a - [primary_key.to_s, primary_key.to_sym] + all_attribute_keys_array = all_attribute_keys.to_a - [primary_key.to_s, primary_key.to_sym] - ignore_cols values = hashes.map do |hash| "(#{all_attribute_keys_array.map { |x| quote(connection, hash[x], x) }.join(",")})" end.join(",") @@ -56,6 +58,13 @@ def build_insert_query(all_attribute_keys, hashes, on_conflict: nil, mode:, colu UPDATE } + ignore_cols += if mode == :partial + [:timestamps, :timestamps_max] + elsif mode == :full + [] + end + ignore_cols += [:created_on, :created_at] # Lets not change created_at for the update clause + # TODO(lsmola) do we want to exclude the ems_id from the UPDATE clause? Otherwise it might be difficult to change # the ems_id as a cross manager migration, since ems_id should be there as part of the insert. The attempt of # changing ems_id could lead to putting it back by a refresh. @@ -66,7 +75,7 @@ def build_insert_query(all_attribute_keys, hashes, on_conflict: nil, mode:, colu # part of the data, since for the fake records, we just want to update ems_ref. if mode == :full insert_query += %{ - SET #{all_attribute_keys_array.map { |key| build_insert_set_cols(key) }.join(", ")} + SET #{(all_attribute_keys_array - ignore_cols).map { |key| build_insert_set_cols(key) }.join(", ")} } if supports_remote_data_timestamp?(all_attribute_keys) insert_query += %{ @@ -77,23 +86,24 @@ def build_insert_query(all_attribute_keys, hashes, on_conflict: nil, mode:, colu } end elsif mode == :partial + raise "Column name not defined for #{hashes}" unless column_name + insert_query += %{ SET #{(all_attribute_keys_array - ignore_cols).map { |key| build_insert_set_cols(key) }.join(", ")} } - if supports_remote_data_timestamp?(all_attribute_keys) + if supports_max_timestamp? # TODO(lsmola) we should have EXCLUDED.timestamp > #{table_name}.timestamp, but if skeletal precreate # creates the row, it sets timestamp. Should we combine it with complete => true only? We probably need # to set the timestamp, otherwise we can't touch it in the update clause. Maybe we coud set it as # timestamps_max? - insert_query += %{ - , timestamps = #{table_name}.timestamps || ('{"#{column_name}": "' || EXCLUDED.timestamp::timestamp || '"}')::jsonb - , timestamps_max = greatest(#{table_name}.timestamps_max::timestamp, EXCLUDED.timestamp::timestamp) - WHERE EXCLUDED.timestamp IS NULL OR ( - EXCLUDED.timestamp >= #{table_name}.timestamp AND ( + , timestamps = #{table_name}.timestamps || ('{"#{column_name}": "' || EXCLUDED.timestamps_max::timestamp || '"}')::jsonb + , timestamps_max = greatest(#{table_name}.timestamps_max::timestamp, EXCLUDED.timestamps_max::timestamp) + WHERE EXCLUDED.timestamps_max IS NULL OR ( + EXCLUDED.timestamps_max > #{table_name}.timestamp AND ( (#{table_name}.timestamps->>'#{column_name}')::timestamp IS NULL OR - EXCLUDED.timestamp > (#{table_name}.timestamps->>'#{column_name}')::timestamp + EXCLUDED.timestamps_max::timestamp > (#{table_name}.timestamps->>'#{column_name}')::timestamp ) ) } From d4754fe5b6abfa2b5e7d990c44ebe6ac4d773f6b Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Thu, 9 Aug 2018 13:32:56 +0200 Subject: [PATCH 07/28] Timestamps are set correctly when combining different partials Timestamps are set correctly when combining different partials --- .../save_collection/saver/concurrent_safe_batch.rb | 3 +-- .../manager_refresh/save_collection/saver/sql_helper.rb | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb index 2d368de052f..dd38e92b1ce 100644 --- a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb +++ b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb @@ -299,8 +299,7 @@ def create_or_update_partial_records(all_attribute_keys) hash = skeletal_attributes_index.delete(index) # Partial create or update nevers sets a timestamp for the whole row hash[:timestamps_max] = hash.delete(:timestamp) - # TODO(lsmola) have to filter this by keys, so we set timestamp only of cols that were passed, add spec - hash[:timestamps] = columns_for_per_column_batches.map { |x| [x, hash[:timestamps_max]] }.to_h + hash[:timestamps] = columns_for_per_column_batches.map { |x| [x, hash[:timestamps_max]] if hash.key?(x) }.compact.to_h # Transform hash to DB format hash = transform_to_hash!(all_attribute_keys, hash) diff --git a/app/models/manager_refresh/save_collection/saver/sql_helper.rb b/app/models/manager_refresh/save_collection/saver/sql_helper.rb index fc59e67bd09..4df3d61ed80 100644 --- a/app/models/manager_refresh/save_collection/saver/sql_helper.rb +++ b/app/models/manager_refresh/save_collection/saver/sql_helper.rb @@ -80,8 +80,8 @@ def build_insert_query(all_attribute_keys, hashes, on_conflict: nil, mode:, colu if supports_remote_data_timestamp?(all_attribute_keys) insert_query += %{ WHERE EXCLUDED.timestamp IS NULL OR ( - EXCLUDED.timestamp > #{table_name}.timestamp AND - EXCLUDED.timestamp >= #{table_name}.timestamps_max + (#{table_name}.timestamp IS NULL OR EXCLUDED.timestamp > #{table_name}.timestamp) AND + (#{table_name}.timestamps_max IS NULL OR EXCLUDED.timestamp >= #{table_name}.timestamps_max) ) } end @@ -101,7 +101,7 @@ def build_insert_query(all_attribute_keys, hashes, on_conflict: nil, mode:, colu , timestamps = #{table_name}.timestamps || ('{"#{column_name}": "' || EXCLUDED.timestamps_max::timestamp || '"}')::jsonb , timestamps_max = greatest(#{table_name}.timestamps_max::timestamp, EXCLUDED.timestamps_max::timestamp) WHERE EXCLUDED.timestamps_max IS NULL OR ( - EXCLUDED.timestamps_max > #{table_name}.timestamp AND ( + (#{table_name}.timestamp IS NULL OR EXCLUDED.timestamps_max > #{table_name}.timestamp) AND ( (#{table_name}.timestamps->>'#{column_name}')::timestamp IS NULL OR EXCLUDED.timestamps_max::timestamp > (#{table_name}.timestamps->>'#{column_name}')::timestamp ) From 3ad919213cb413b9ac94801c17155e68050e15ff Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Thu, 9 Aug 2018 19:50:55 +0200 Subject: [PATCH 08/28] Correct update for full after partial Correct update for full after partial with nullifying of the partial update timestamps --- .../saver/concurrent_safe_batch.rb | 4 ++-- .../save_collection/saver/sql_helper.rb | 20 ++++++++++++++++--- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb index dd38e92b1ce..b0f739828c0 100644 --- a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb +++ b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb @@ -364,7 +364,7 @@ def create_partial!(all_attribute_keys, hashes, on_conflict: nil, column_name: n # models's table # @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values # are :do_update, :do_nothing, nil - def create_records!(all_attribute_keys, batch, attributes_index, on_conflict: nil, column_name: nil) + def create_records!(all_attribute_keys, batch, attributes_index, on_conflict: nil) indexed_inventory_objects = {} hashes = [] create_time = time_now @@ -392,7 +392,7 @@ def create_records!(all_attribute_keys, batch, attributes_index, on_conflict: ni return if hashes.blank? result = get_connection.execute( - build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => :full, :column_name => column_name) + build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => :full) ) # TODO(lsmola) we need to recognize what records were created and what records were updated. Will we take # skeletal records as created? The creation should be having :complete => true diff --git a/app/models/manager_refresh/save_collection/saver/sql_helper.rb b/app/models/manager_refresh/save_collection/saver/sql_helper.rb index 4df3d61ed80..4c916feb154 100644 --- a/app/models/manager_refresh/save_collection/saver/sql_helper.rb +++ b/app/models/manager_refresh/save_collection/saver/sql_helper.rb @@ -79,6 +79,8 @@ def build_insert_query(all_attribute_keys, hashes, on_conflict: nil, mode:, colu } if supports_remote_data_timestamp?(all_attribute_keys) insert_query += %{ + , timestamps = '{}', timestamps_max = NULL + WHERE EXCLUDED.timestamp IS NULL OR ( (#{table_name}.timestamp IS NULL OR EXCLUDED.timestamp > #{table_name}.timestamp) AND (#{table_name}.timestamps_max IS NULL OR EXCLUDED.timestamp >= #{table_name}.timestamps_max) @@ -162,6 +164,16 @@ def build_update_query(all_attribute_keys, hashes) UPDATE #{table_name} SET #{all_attribute_keys_array.map { |key| build_update_set_cols(key) }.join(",")} + } + + if supports_remote_data_timestamp?(all_attribute_keys) + # Full row update will reset the partial update timestamps + update_query += %{ + , timestamps = '{}', timestamps_max = NULL + } + end + + update_query += %{ FROM ( VALUES #{values} @@ -177,9 +189,11 @@ def build_update_query(all_attribute_keys, hashes) # part of the data, since for the fake records, we just want to update ems_ref. if supports_remote_data_timestamp?(all_attribute_keys) update_query += %{ - AND (updated_values.timestamp IS NULL OR ( - updated_values.timestamp > #{table_name}.timestamp) AND - updated_values.timestamp >= #{table_name}.timestamps_max + AND ( + updated_values.timestamp IS NULL OR ( + (#{table_name}.timestamp IS NULL OR updated_values.timestamp > #{table_name}.timestamp) AND + (#{table_name}.timestamps_max IS NULL OR updated_values.timestamp >= #{table_name}.timestamps_max) + ) ) } end From 6792e79cfc2c65e6886a9248a1486f598de34904 Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Thu, 9 Aug 2018 20:12:58 +0200 Subject: [PATCH 09/28] Use supports_timestamps_max for conditionalization of the entire path Use supports_timestamps_max for conditionalization of the entire path --- .../manager_refresh/save_collection/saver/base.rb | 4 ++-- .../saver/concurrent_safe_batch.rb | 15 +++++++++------ .../save_collection/saver/sql_helper.rb | 2 +- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/app/models/manager_refresh/save_collection/saver/base.rb b/app/models/manager_refresh/save_collection/saver/base.rb index 831e8085a96..6eb6e93ebbf 100644 --- a/app/models/manager_refresh/save_collection/saver/base.rb +++ b/app/models/manager_refresh/save_collection/saver/base.rb @@ -407,8 +407,8 @@ def supports_remote_data_timestamp?(all_attribute_keys) end # @return [Boolean] true if the model_class has remote_data_timestamp column - def supports_max_timestamp? - @supports_max_timestamp_cache ||= model_class.column_names.include?("timestamps_max") + def supports_timestamps_max? + @supports_timestamps_max_cache ||= model_class.column_names.include?("timestamps_max") end end end diff --git a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb index b0f739828c0..d66985cc857 100644 --- a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb +++ b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb @@ -287,9 +287,10 @@ def create_or_update_partial_records(all_attribute_keys) base_columns = unique_index_columns + internal_columns + [:timestamps_max] columns_for_per_column_batches = all_attribute_keys - base_columns - # TODO(lsmola) needs to have supports timestamp or resource version - all_attribute_keys << :timestamps - all_attribute_keys << :timestamps_max + if supports_timestamps_max? + all_attribute_keys << :timestamps + all_attribute_keys << :timestamps_max + end indexed_inventory_objects = {} hashes = [] @@ -297,9 +298,11 @@ def create_or_update_partial_records(all_attribute_keys) skeletal_inventory_objects_index.each do |index, inventory_object| hash = skeletal_attributes_index.delete(index) - # Partial create or update nevers sets a timestamp for the whole row - hash[:timestamps_max] = hash.delete(:timestamp) - hash[:timestamps] = columns_for_per_column_batches.map { |x| [x, hash[:timestamps_max]] if hash.key?(x) }.compact.to_h + # Partial create or update must never set a timestamp for the whole row + if supports_timestamps_max? + hash[:timestamps_max] = hash.delete(:timestamp) + hash[:timestamps] = columns_for_per_column_batches.map { |x| [x, hash[:timestamps_max]] if hash.key?(x) }.compact.to_h + end # Transform hash to DB format hash = transform_to_hash!(all_attribute_keys, hash) diff --git a/app/models/manager_refresh/save_collection/saver/sql_helper.rb b/app/models/manager_refresh/save_collection/saver/sql_helper.rb index 4c916feb154..ab35b79001b 100644 --- a/app/models/manager_refresh/save_collection/saver/sql_helper.rb +++ b/app/models/manager_refresh/save_collection/saver/sql_helper.rb @@ -93,7 +93,7 @@ def build_insert_query(all_attribute_keys, hashes, on_conflict: nil, mode:, colu insert_query += %{ SET #{(all_attribute_keys_array - ignore_cols).map { |key| build_insert_set_cols(key) }.join(", ")} } - if supports_max_timestamp? + if supports_timestamps_max? # TODO(lsmola) we should have EXCLUDED.timestamp > #{table_name}.timestamp, but if skeletal precreate # creates the row, it sets timestamp. Should we combine it with complete => true only? We probably need # to set the timestamp, otherwise we can't touch it in the update clause. Maybe we coud set it as From 2f40ea30fa3db78c009ae13de913bfccebd143c3 Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Thu, 9 Aug 2018 22:26:37 +0200 Subject: [PATCH 10/28] Skeletonize full row if it fails timestamp check Skeletonize full row if it fails timestamp check, so it is able to save partial row that is still valid. --- .../inventory_collection/index/type/data.rb | 8 ++ .../index/type/skeletal.rb | 9 ++ .../saver/concurrent_safe_batch.rb | 88 +++++++++++++------ .../save_collection/saver/sql_helper.rb | 6 ++ 4 files changed, 86 insertions(+), 25 deletions(-) diff --git a/app/models/manager_refresh/inventory_collection/index/type/data.rb b/app/models/manager_refresh/inventory_collection/index/type/data.rb index af5c7505483..2b598bb25b8 100644 --- a/app/models/manager_refresh/inventory_collection/index/type/data.rb +++ b/app/models/manager_refresh/inventory_collection/index/type/data.rb @@ -9,6 +9,14 @@ class Data < ManagerRefresh::InventoryCollection::Index::Type::Base def find(index_value) index[index_value] end + + # Deletes and returns the value on the index_value + # + # @param index_value [String] a index_value of the InventoryObject we search for + # @return [InventoryObject|nil] Returns found value or nil + def delete(index_value) + index.delete(index_value) + end end end end diff --git a/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb b/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb index 92bf2f9f90d..f10872978de 100644 --- a/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb +++ b/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb @@ -37,6 +37,15 @@ def delete(index_value) index.delete(index_value) end + # Takes value from primary_index and puts it to skeletal index + # @param index_value [String] a index_value of the InventoryObject we search for + # @return [InventoryObject|nil] Returns found value or nil + def skeletonize_primary_index(index_value) + inventory_object = primary_index.delete(index_value) + return unless inventory_object + build(inventory_object.data) + end + # Builds index record with skeletal InventoryObject and returns it. Or it returns existing InventoryObject # that is found in primary_index or skeletal_primary_index. # diff --git a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb index d66985cc857..1e7b0d7dd18 100644 --- a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb +++ b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb @@ -139,8 +139,9 @@ def save!(association) # models's table # @param all_attribute_keys [Array] Array of all columns we will be saving into each table row def update_or_destroy_records!(records_batch_iterator, inventory_objects_index, attributes_index, all_attribute_keys) - hashes_for_update = [] + hashes_for_update = {} records_for_destroy = [] + db_index_to_inventory_object_mapping = {} records_batch_iterator.find_in_batches(:batch_size => batch_size) do |batch| update_time = time_now @@ -150,22 +151,7 @@ def update_or_destroy_records!(records_batch_iterator, inventory_objects_index, next unless assert_distinct_relation(primary_key_value) - # Incoming values are in SQL string form. - # TODO(lsmola) unify this behavior with object_index_with_keys method in InventoryCollection - # TODO(lsmola) maybe we can drop the whole pure sql fetching, since everything will be targeted refresh - # with streaming refresh? Maybe just metrics and events will not be, but those should be upsert only - index = unique_index_keys_to_s.map do |attribute| - value = record_key(record, attribute) - if attribute == "timestamp" - # TODO: can this be covered by @deserializable_keys? - type = model_class.type_for_attribute(attribute) - type.cast(value).utc.iso8601.to_s - elsif (type = deserializable_keys[attribute.to_sym]) - type.deserialize(value).to_s - else - value.to_s - end - end.join("__") + index = db_columns_index(record) inventory_object = inventory_objects_index.delete(index) hash = attributes_index.delete(index) @@ -198,15 +184,17 @@ def update_or_destroy_records!(records_batch_iterator, inventory_objects_index, assign_attributes_for_update!(hash_for_update, update_time) hash_for_update[:id] = primary_key_value - hashes_for_update << hash_for_update + db_index_to_inventory_object_mapping[index] = inventory_object.manager_uuid + hashes_for_update[index] = hash_for_update end end # Update in batches if hashes_for_update.size >= batch_size_for_persisting - update_records!(all_attribute_keys, hashes_for_update) + update_records!(all_attribute_keys, hashes_for_update, db_index_to_inventory_object_mapping) - hashes_for_update = [] + hashes_for_update = {} + db_index_to_inventory_object_mapping = {} end # Destroy in batches @@ -217,7 +205,7 @@ def update_or_destroy_records!(records_batch_iterator, inventory_objects_index, end # Update the last batch - update_records!(all_attribute_keys, hashes_for_update) + update_records!(all_attribute_keys, hashes_for_update, db_index_to_inventory_object_mapping) hashes_for_update = [] # Cleanup so GC can release it sooner # Destroy the last batch @@ -225,6 +213,37 @@ def update_or_destroy_records!(records_batch_iterator, inventory_objects_index, records_for_destroy = [] # Cleanup so GC can release it sooner end + def db_columns_index(record, pure_sql: false) + # Incoming values are in SQL string form. + # TODO(lsmola) unify this behavior with object_index_with_keys method in InventoryCollection + # TODO(lsmola) maybe we can drop the whole pure sql fetching, since everything will be targeted refresh + # with streaming refresh? Maybe just metrics and events will not be, but those should be upsert only + # TODO(lsmola) taking ^ in account, we can't drop pure sql, since that is returned by batch insert and + # update queries + unique_index_keys_to_s.map do |attribute| + value = if pure_sql + record[attribute] + else + record_key(record, attribute) + end + + format_value(attribute, value) + end.join("__") + end + + def format_value(attribute, value) + if attribute == "timestamp" + # TODO: can this be covered by @deserializable_keys? + type = model_class.type_for_attribute(attribute) + type.cast(value).utc.iso8601.to_s + elsif (type = deserializable_keys[attribute.to_sym]) + type.deserialize(value).to_s + else + value.to_s + end + end + + # Deletes or sof-deletes records. If the model_class supports a custom class delete method, we will use it for # batch soft-delete. # @@ -262,11 +281,30 @@ def destroy_records!(records) # # @param hashes [Array] data used for building a batch update sql query # @param all_attribute_keys [Array] Array of all columns we will be saving into each table row - def update_records!(all_attribute_keys, hashes) + def update_records!(all_attribute_keys, hashes, db_index_to_inventory_object_mapping) return if hashes.blank? - inventory_collection.store_updated_records(hashes) - query = build_update_query(all_attribute_keys, hashes) - get_connection.execute(query) + data = hashes.values + query = build_update_query(all_attribute_keys, data) + result = get_connection.execute(query) + + if inventory_collection.parallel_safe? + # We will check for timestamp clashes of full row update and we will fallback to skeletal update + inventory_collection.store_updated_records(result) + + updated = result.map { |x| db_columns_index(x, :pure_sql => true) } + updated.each { |x| hashes.delete(x) } + + # Now lets skeletonize all inventory_objects that were not saved + # TODO(lsmola) we should compare timestamps and ignore InventoryObjects or attributes that are old, to lower + # what we send as upsert in skeletal index + hashes.keys.each do |db_index| + inventory_collection.skeletal_primary_index.skeletonize_primary_index(db_index_to_inventory_object_mapping[db_index]) + end + else + inventory_collection.store_updated_records(data) + end + + result end diff --git a/app/models/manager_refresh/save_collection/saver/sql_helper.rb b/app/models/manager_refresh/save_collection/saver/sql_helper.rb index ab35b79001b..237a18829a2 100644 --- a/app/models/manager_refresh/save_collection/saver/sql_helper.rb +++ b/app/models/manager_refresh/save_collection/saver/sql_helper.rb @@ -198,6 +198,12 @@ def build_update_query(all_attribute_keys, hashes) } end + if inventory_collection.parallel_safe? + update_query += %{ + RETURNING updated_values.#{quote_column_name("id")},#{unique_index_columns.map { |x| "updated_values.#{quote_column_name(x)}" }.join(",")} + } + end + _log.debug("Building update query for #{inventory_collection} of size #{inventory_collection.size}...Complete") update_query From f371709e26dca2a48ab0a52e81cb847cf924991a Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Fri, 10 Aug 2018 08:50:18 +0200 Subject: [PATCH 11/28] Make full to partial transformation also for the upsert path Make full to partial transformation also for the upsert path. Plus the skeletonize must preserve the inventory_object --- .../index/type/skeletal.rb | 2 +- .../save_collection/saver/base.rb | 7 +++ .../saver/concurrent_safe_batch.rb | 56 +++++++++++-------- .../save_collection/saver/sql_helper.rb | 2 +- 4 files changed, 42 insertions(+), 25 deletions(-) diff --git a/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb b/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb index f10872978de..316a4f6b08e 100644 --- a/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb +++ b/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb @@ -43,7 +43,7 @@ def delete(index_value) def skeletonize_primary_index(index_value) inventory_object = primary_index.delete(index_value) return unless inventory_object - build(inventory_object.data) + index[index_value] = inventory_object end # Builds index record with skeletal InventoryObject and returns it. Or it returns existing InventoryObject diff --git a/app/models/manager_refresh/save_collection/saver/base.rb b/app/models/manager_refresh/save_collection/saver/base.rb index 6eb6e93ebbf..2efdd0df5f6 100644 --- a/app/models/manager_refresh/save_collection/saver/base.rb +++ b/app/models/manager_refresh/save_collection/saver/base.rb @@ -358,6 +358,13 @@ def unique_index_columns @unique_index_columns = unique_index_for(unique_index_keys).columns.map(&:to_sym) end + # @return [Array] all columns that are part of the best fit unique index + def unique_index_columns_to_s + return @unique_index_columns_to_s if @unique_index_columns_to_s + + @unique_index_columns_to_s = unique_index_columns.map(&:to_s) + end + # @return [Boolean] true if the model_class supports STI def supports_sti? @supports_sti_cache = model_class.column_names.include?("type") if @supports_sti_cache.nil? diff --git a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb index 1e7b0d7dd18..99263b0ff29 100644 --- a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb +++ b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb @@ -139,9 +139,9 @@ def save!(association) # models's table # @param all_attribute_keys [Array] Array of all columns we will be saving into each table row def update_or_destroy_records!(records_batch_iterator, inventory_objects_index, attributes_index, all_attribute_keys) - hashes_for_update = {} + hashes_for_update = [] records_for_destroy = [] - db_index_to_inventory_object_mapping = {} + indexed_inventory_objects = {} records_batch_iterator.find_in_batches(:batch_size => batch_size) do |batch| update_time = time_now @@ -184,17 +184,17 @@ def update_or_destroy_records!(records_batch_iterator, inventory_objects_index, assign_attributes_for_update!(hash_for_update, update_time) hash_for_update[:id] = primary_key_value - db_index_to_inventory_object_mapping[index] = inventory_object.manager_uuid - hashes_for_update[index] = hash_for_update + indexed_inventory_objects[index] = inventory_object + hashes_for_update << hash_for_update end end # Update in batches if hashes_for_update.size >= batch_size_for_persisting - update_records!(all_attribute_keys, hashes_for_update, db_index_to_inventory_object_mapping) + update_records!(all_attribute_keys, hashes_for_update, indexed_inventory_objects) - hashes_for_update = {} - db_index_to_inventory_object_mapping = {} + hashes_for_update = [] + indexed_inventory_objects = {} end # Destroy in batches @@ -205,7 +205,7 @@ def update_or_destroy_records!(records_batch_iterator, inventory_objects_index, end # Update the last batch - update_records!(all_attribute_keys, hashes_for_update, db_index_to_inventory_object_mapping) + update_records!(all_attribute_keys, hashes_for_update, indexed_inventory_objects) hashes_for_update = [] # Cleanup so GC can release it sooner # Destroy the last batch @@ -281,32 +281,40 @@ def destroy_records!(records) # # @param hashes [Array] data used for building a batch update sql query # @param all_attribute_keys [Array] Array of all columns we will be saving into each table row - def update_records!(all_attribute_keys, hashes, db_index_to_inventory_object_mapping) + def update_records!(all_attribute_keys, hashes, indexed_inventory_objects) return if hashes.blank? - data = hashes.values - query = build_update_query(all_attribute_keys, data) + query = build_update_query(all_attribute_keys, hashes) result = get_connection.execute(query) if inventory_collection.parallel_safe? # We will check for timestamp clashes of full row update and we will fallback to skeletal update inventory_collection.store_updated_records(result) - updated = result.map { |x| db_columns_index(x, :pure_sql => true) } - updated.each { |x| hashes.delete(x) } - - # Now lets skeletonize all inventory_objects that were not saved - # TODO(lsmola) we should compare timestamps and ignore InventoryObjects or attributes that are old, to lower - # what we send as upsert in skeletal index - hashes.keys.each do |db_index| - inventory_collection.skeletal_primary_index.skeletonize_primary_index(db_index_to_inventory_object_mapping[db_index]) - end + skeletonize_ignored_records!(indexed_inventory_objects, result) else - inventory_collection.store_updated_records(data) + inventory_collection.store_updated_records(hashes) end result end + def skeletonize_ignored_records!(hash, result, all_unique_columns: false) + updated = if all_unique_columns + result.map { |x| unique_index_columns_to_s.map { |key| x[key] } } + else + result.map { |x| db_columns_index(x, :pure_sql => true) } + end + + updated.each { |x| hash.delete(x) } + + # Now lets skeletonize all inventory_objects that were not saved + # TODO(lsmola) we should compare timestamps and ignore InventoryObjects or attributes that are old, to lower + # what we send as upsert in skeletal index + hash.keys.each do |db_index| + inventory_collection.skeletal_primary_index.skeletonize_primary_index(hash[db_index].manager_uuid) + end + end + def create_or_update_partial_records(all_attribute_keys) # We will create also remaining skeletal records @@ -435,8 +443,8 @@ def create_records!(all_attribute_keys, batch, attributes_index, on_conflict: ni result = get_connection.execute( build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => :full) ) - # TODO(lsmola) we need to recognize what records were created and what records were updated. Will we take - # skeletal records as created? The creation should be having :complete => true + # TODO(lsmola) so for all of the lazy_find with key, we will need to fetch a fresh attributes from the + # db, otherwise we will be getting attribute that might not have been updated (add spec) inventory_collection.store_created_records(result) if inventory_collection.dependees.present? # We need to get primary keys of the created objects, but only if there are dependees that would use them @@ -446,6 +454,8 @@ def create_records!(all_attribute_keys, batch, attributes_index, on_conflict: ni result, :on_conflict => on_conflict) end + + skeletonize_ignored_records!(indexed_inventory_objects, result, :all_unique_columns => true) end # Stores primary_key values of created records into associated InventoryObject objects. diff --git a/app/models/manager_refresh/save_collection/saver/sql_helper.rb b/app/models/manager_refresh/save_collection/saver/sql_helper.rb index 237a18829a2..0a06fc0a02f 100644 --- a/app/models/manager_refresh/save_collection/saver/sql_helper.rb +++ b/app/models/manager_refresh/save_collection/saver/sql_helper.rb @@ -200,7 +200,7 @@ def build_update_query(all_attribute_keys, hashes) if inventory_collection.parallel_safe? update_query += %{ - RETURNING updated_values.#{quote_column_name("id")},#{unique_index_columns.map { |x| "updated_values.#{quote_column_name(x)}" }.join(",")} + RETURNING updated_values.#{quote_column_name("id")}, #{unique_index_columns.map { |x| "updated_values.#{quote_column_name(x)}" }.join(",")} } end From c57ba049c86061c3227f471495d637fbef3202ef Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Fri, 10 Aug 2018 14:05:39 +0200 Subject: [PATCH 12/28] Building 2 partial InventoryObjects merges them according to timestamps Building 2 partial InventoryObjects merges them according to timestamps --- .../manager_refresh/inventory_collection.rb | 136 ++++++++++++++++++ .../index/type/skeletal.rb | 14 ++ .../manager_refresh/inventory_object.rb | 50 ++++++- .../save_collection/saver/base.rb | 73 ++-------- .../saver/concurrent_safe_batch.rb | 23 +-- 5 files changed, 227 insertions(+), 69 deletions(-) diff --git a/app/models/manager_refresh/inventory_collection.rb b/app/models/manager_refresh/inventory_collection.rb index 233573c8571..5f97868843a 100644 --- a/app/models/manager_refresh/inventory_collection.rb +++ b/app/models/manager_refresh/inventory_collection.rb @@ -571,6 +571,142 @@ def parallel_safe? @parallel_safe_cache ||= %i(concurrent_safe concurrent_safe_batch).include?(saver_strategy) end + # @return [Boolean] true if the model_class supports STI + def supports_sti? + @supports_sti_cache = model_class.column_names.include?("type") if @supports_sti_cache.nil? + @supports_sti_cache + end + + # @return [Boolean] true if the model_class has created_on column + def supports_created_on? + if @supports_created_on_cache.nil? + @supports_created_on_cache = (model_class.column_names.include?("created_on") && ActiveRecord::Base.record_timestamps) + end + @supports_created_on_cache + end + + # @return [Boolean] true if the model_class has updated_on column + def supports_updated_on? + if @supports_updated_on_cache.nil? + @supports_updated_on_cache = (model_class.column_names.include?("updated_on") && ActiveRecord::Base.record_timestamps) + end + @supports_updated_on_cache + end + + # @return [Boolean] true if the model_class has created_at column + def supports_created_at? + if @supports_created_at_cache.nil? + @supports_created_at_cache = (model_class.column_names.include?("created_at") && ActiveRecord::Base.record_timestamps) + end + @supports_created_at_cache + end + + # @return [Boolean] true if the model_class has updated_at column + def supports_updated_at? + if @supports_updated_at_cache.nil? + @supports_updated_at_cache = (model_class.column_names.include?("updated_at") && ActiveRecord::Base.record_timestamps) + end + @supports_updated_at_cache + end + + # @return [Boolean] true if the model_class has timestamps_max column + def supports_timestamps_max? + @supports_timestamps_max_cache ||= model_class.column_names.include?("timestamps_max") + end + + # @return [Boolean] true if the model_class has timestamps column + def supports_timestamps? + @supports_timestamps_cache ||= model_class.column_names.include?("timestamps") + end + + # @return [Boolean] true if the model_class has timestamp column + def supports_timestamp? + @supports_timestamp_cache ||= model_class.column_names.include?("timestamp") + end + + # @return [Boolean] true if the model_class has timestamps_max column + def supports_resource_versions_max? + @supports_resource_versions_max_cache ||= model_class.column_names.include?("resource_versions_max") + end + + # @return [Boolean] true if the model_class has timestamps column + def supports_resource_versions? + @supports_resource_versions_cache ||= model_class.column_names.include?("resource_versions") + end + + # @return [Boolean] true if the model_class has timestamp column + def supports_resource_version? + @supports_resource_version_cache ||= model_class.column_names.include?("resource_version") + end + + # @return [Array] all columns that are part of the best fit unique index + def unique_index_columns + return @unique_index_columns if @unique_index_columns + + @unique_index_columns = unique_index_for(unique_index_keys).columns.map(&:to_sym) + end + + def unique_index_keys + @unique_index_keys ||= manager_ref_to_cols.map(&:to_sym) + end + + # @return [Array] array of all unique indexes known to model + def unique_indexes + @unique_indexes_cache if @unique_indexes_cache + + @unique_indexes_cache = model_class.connection.indexes(model_class.table_name).select(&:unique) + + if @unique_indexes_cache.blank? + raise "#{inventory_collection} and its table #{model_class.table_name} must have a unique index defined, to"\ + " be able to use saver_strategy :concurrent_safe or :concurrent_safe_batch." + end + + @unique_indexes_cache + end + + # Finds an index that fits the list of columns (keys) the best + # + # @param keys [Array] + # @raise [Exception] if the unique index for the columns was not found + # @return [ActiveRecord::ConnectionAdapters::IndexDefinition] unique index fitting the keys + def unique_index_for(keys) + @unique_index_for_keys_cache ||= {} + @unique_index_for_keys_cache[keys] if @unique_index_for_keys_cache[keys] + + # Find all uniq indexes that that are covering our keys + uniq_key_candidates = unique_indexes.each_with_object([]) { |i, obj| obj << i if (keys - i.columns.map(&:to_sym)).empty? } + + if @unique_indexes_cache.blank? + raise "#{inventory_collection} and its table #{model_class.table_name} must have a unique index defined "\ + "covering columns #{keys} to be able to use saver_strategy :concurrent_safe or :concurrent_safe_batch." + end + + # Take the uniq key having the least number of columns + @unique_index_for_keys_cache[keys] = uniq_key_candidates.min_by { |x| x.columns.count } + end + + def internal_columns + return @internal_columns if @internal_columns + + @internal_columns = [] + @internal_columns << :type if supports_sti? + @internal_columns << :created_on if supports_created_on? + @internal_columns << :created_at if supports_created_at? + @internal_columns << :updated_on if supports_updated_on? + @internal_columns << :updated_at if supports_updated_at? + @internal_columns << :timestamps_max if supports_timestamps_max? + @internal_columns << :timestamps if supports_timestamps? + @internal_columns << :timestamp if supports_timestamp? + @internal_columns << :resource_versions_max if supports_resource_versions_max? + @internal_columns << :resource_versions if supports_resource_versions? + @internal_columns << :resource_version if supports_resource_version? + @internal_columns + end + + def base_columns + @base_columns ||= unique_index_columns + internal_columns + end + # @return [Boolean] true if no more data will be added to this InventoryCollection object, that usually happens # after the parsing step is finished def data_collection_finalized? diff --git a/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb b/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb index 316a4f6b08e..35e00936707 100644 --- a/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb +++ b/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb @@ -43,6 +43,8 @@ def delete(index_value) def skeletonize_primary_index(index_value) inventory_object = primary_index.delete(index_value) return unless inventory_object + fill_timestamps!(inventory_object.data) + index[index_value] = inventory_object end @@ -55,6 +57,7 @@ def skeletonize_primary_index(index_value) # assigned def build(attributes) attributes = {}.merge!(default_values).merge!(attributes) + fill_timestamps!(attributes) # If the primary index is already filled, we don't want populate skeletal index uuid = ::ManagerRefresh::InventoryCollection::Reference.build_stringified_reference(attributes, named_ref) @@ -76,6 +79,17 @@ def build(attributes) index[inventory_object.manager_uuid] = inventory_object end + def fill_timestamps!(attributes) + if inventory_collection.supports_timestamps_max? && attributes[:timestamp] + # We have to symbolize, since serializing persistor makes these strings + (attributes[:timestamps] ||= {}).symbolize_keys! + + (attributes.keys - inventory_collection.base_columns).each do |key| + attributes[:timestamps][key] ||= attributes[:timestamp] + end + end + end + private attr_reader :primary_index diff --git a/app/models/manager_refresh/inventory_object.rb b/app/models/manager_refresh/inventory_object.rb index 13234ece9be..4b3b9f7b84d 100644 --- a/app/models/manager_refresh/inventory_object.rb +++ b/app/models/manager_refresh/inventory_object.rb @@ -137,7 +137,55 @@ def attributes_with_keys(inventory_collection_scope = nil, all_attribute_keys = # @param attributes [Hash] attributes we want to assign # @return [ManagerRefresh::InventoryObject] self def assign_attributes(attributes) - attributes.each { |k, v| public_send("#{k}=", v) } + attributes.each do |k, v| + # We don't want timestamps or resource versions to be overwritten here, since those are driving the conditions + next if %i(timestamps timestamps_max timestamp).include?(k) + next if %i(resource_versions resource_versions_max resource_version).include?(k) + + if inventory_collection.supports_timestamps_max? && attributes[:timestamp] && data[:timestamp] + # If timestamps are in play, we will set only attributes that are newer + specific_attr_timestamp = attributes[:timestamps].try(:[], k) + specific_data_timestamp = data[:timestamps].try(:[], k) + + assign = if !specific_attr_timestamp + # Data have no timestamp, we will ignore the check + true + elsif specific_attr_timestamp && !specific_data_timestamp + # Data specific timestamp is nil and we have new specific timestamp + if data.key?(k) + if attributes[:timestamp] >= data[:timestamp] + # We can save if the full timestamp is bigger, if the data already contains the attribute + true + end + else + # Data do not contain the attribute, so we are saving the newest + true + end + true + elsif specific_attr_timestamp > specific_data_timestamp + # both partial timestamps are there, newer must be bigger + true + end + + if assign + public_send("#{k}=", v) # Attribute is newer than current one, lets use it + data[:timestamps][k] = specific_attr_timestamp if specific_attr_timestamp # and set the latest timestamp + end + else + public_send("#{k}=", v) + end + end + + if inventory_collection.supports_timestamps_max? && attributes[:timestamp] && data[:timestamp] + # If timestamps are present, store the bigger one + data[:timestamp] = attributes[:timestamp] if attributes[:timestamp] > data[:timestamp] + end + + if inventory_collection.supports_resource_versions_max? && attributes[:timestamp] && data[:timestamp] + # If timestamps are present, store the bigger one + data[:timestamp] = attributes[:timestamp] if attributes[:timestamp] > data[:timestamp] + end + self end diff --git a/app/models/manager_refresh/save_collection/saver/base.rb b/app/models/manager_refresh/save_collection/saver/base.rb index 2efdd0df5f6..3ed27891ccf 100644 --- a/app/models/manager_refresh/save_collection/saver/base.rb +++ b/app/models/manager_refresh/save_collection/saver/base.rb @@ -16,7 +16,7 @@ def initialize(inventory_collection) @table_name = @model_class.table_name @primary_key = @model_class.primary_key @arel_primary_key = @model_class.arel_attribute(@primary_key) - @unique_index_keys = inventory_collection.manager_ref_to_cols.map(&:to_sym) + @unique_index_keys = inventory_collection.unique_index_keys @unique_index_keys_to_s = inventory_collection.manager_ref_to_cols.map(&:to_s) @select_keys = [@primary_key] + @unique_index_keys_to_s @unique_db_primary_keys = Set.new @@ -91,6 +91,8 @@ def save_inventory_collection! # @return [Hash] modified hash from parameter attributes with casted values def values_for_database!(all_attribute_keys, attributes) all_attribute_keys.each do |key| + next unless attributes.key?(key) + if (type = serializable_keys[key]) attributes[key] = type.serialize(attributes[key]) end @@ -102,7 +104,7 @@ def transform_to_hash!(all_attribute_keys, hash) if inventory_collection.use_ar_object? record = inventory_collection.model_class.new(hash) values_for_database!(all_attribute_keys, - record.attributes.symbolize_keys) + record.attributes.slice(*record.changed_attributes.keys).symbolize_keys) elsif serializable_keys? values_for_database!(all_attribute_keys, hash) @@ -305,29 +307,7 @@ def assign_attributes_for_create!(hash, create_time) end def internal_columns - return @internal_columns if @internal_columns - - @internal_columns = [] - @internal_columns << :type if supports_sti? - @internal_columns << :created_on if supports_created_on? - @internal_columns << :created_at if supports_created_at? - @internal_columns << :updated_on if supports_updated_on? - @internal_columns << :updated_at if supports_updated_at? - @internal_columns - end - - # @return [Array] array of all unique indexes known to model - def unique_indexes - @unique_indexes_cache if @unique_indexes_cache - - @unique_indexes_cache = model_class.connection.indexes(model_class.table_name).select(&:unique) - - if @unique_indexes_cache.blank? - raise "#{inventory_collection} and its table #{model_class.table_name} must have a unique index defined, to"\ - " be able to use saver_strategy :concurrent_safe or :concurrent_safe_batch." - end - - @unique_indexes_cache + @internal_columns ||= inventory_collection.internal_columns end # Finds an index that fits the list of columns (keys) the best @@ -336,26 +316,12 @@ def unique_indexes # @raise [Exception] if the unique index for the columns was not found # @return [ActiveRecord::ConnectionAdapters::IndexDefinition] unique index fitting the keys def unique_index_for(keys) - @unique_index_for_keys_cache ||= {} - @unique_index_for_keys_cache[keys] if @unique_index_for_keys_cache[keys] - - # Find all uniq indexes that that are covering our keys - uniq_key_candidates = unique_indexes.each_with_object([]) { |i, obj| obj << i if (keys - i.columns.map(&:to_sym)).empty? } - - if @unique_indexes_cache.blank? - raise "#{inventory_collection} and its table #{model_class.table_name} must have a unique index defined "\ - "covering columns #{keys} to be able to use saver_strategy :concurrent_safe or :concurrent_safe_batch." - end - - # Take the uniq key having the least number of columns - @unique_index_for_keys_cache[keys] = uniq_key_candidates.min_by { |x| x.columns.count } + inventory_collection.unique_index_for(keys) end # @return [Array] all columns that are part of the best fit unique index def unique_index_columns - return @unique_index_columns if @unique_index_columns - - @unique_index_columns = unique_index_for(unique_index_keys).columns.map(&:to_sym) + @unique_index_columns ||= inventory_collection.unique_index_columns end # @return [Array] all columns that are part of the best fit unique index @@ -367,40 +333,27 @@ def unique_index_columns_to_s # @return [Boolean] true if the model_class supports STI def supports_sti? - @supports_sti_cache = model_class.column_names.include?("type") if @supports_sti_cache.nil? - @supports_sti_cache + @supports_sti_cache ||= inventory_collection.supports_sti? end # @return [Boolean] true if the model_class has created_on column def supports_created_on? - if @supports_created_on_cache.nil? - @supports_created_on_cache = (model_class.column_names.include?("created_on") && ActiveRecord::Base.record_timestamps) - end - @supports_created_on_cache + @supports_created_on_cache ||= inventory_collection.supports_created_on? end # @return [Boolean] true if the model_class has updated_on column def supports_updated_on? - if @supports_updated_on_cache.nil? - @supports_updated_on_cache = (model_class.column_names.include?("updated_on") && ActiveRecord::Base.record_timestamps) - end - @supports_updated_on_cache + @supports_updated_on_cache ||= inventory_collection.supports_updated_on? end # @return [Boolean] true if the model_class has created_at column def supports_created_at? - if @supports_created_at_cache.nil? - @supports_created_at_cache = (model_class.column_names.include?("created_at") && ActiveRecord::Base.record_timestamps) - end - @supports_created_at_cache + @supports_created_at_cache ||= inventory_collection.supports_created_at? end # @return [Boolean] true if the model_class has updated_at column def supports_updated_at? - if @supports_updated_at_cache.nil? - @supports_updated_at_cache = (model_class.column_names.include?("updated_at") && ActiveRecord::Base.record_timestamps) - end - @supports_updated_at_cache + @supports_updated_at_cache ||= inventory_collection.supports_updated_at? end # @return [Boolean] true if any serializable keys are present @@ -415,7 +368,7 @@ def supports_remote_data_timestamp?(all_attribute_keys) # @return [Boolean] true if the model_class has remote_data_timestamp column def supports_timestamps_max? - @supports_timestamps_max_cache ||= model_class.column_names.include?("timestamps_max") + @supports_timestamps_max_cache ||= inventory_collection.supports_timestamps_max? end end end diff --git a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb index 99263b0ff29..c32bd6e86c9 100644 --- a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb +++ b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb @@ -315,7 +315,6 @@ def skeletonize_ignored_records!(hash, result, all_unique_columns: false) end end - def create_or_update_partial_records(all_attribute_keys) # We will create also remaining skeletal records skeletal_attributes_index = {} @@ -329,9 +328,7 @@ def create_or_update_partial_records(all_attribute_keys) skeletal_inventory_objects_index[index] = inventory_object end - - base_columns = unique_index_columns + internal_columns + [:timestamps_max] - columns_for_per_column_batches = all_attribute_keys - base_columns + columns_for_per_column_batches = all_attribute_keys - inventory_collection.base_columns if supports_timestamps_max? all_attribute_keys << :timestamps @@ -347,7 +344,13 @@ def create_or_update_partial_records(all_attribute_keys) # Partial create or update must never set a timestamp for the whole row if supports_timestamps_max? hash[:timestamps_max] = hash.delete(:timestamp) - hash[:timestamps] = columns_for_per_column_batches.map { |x| [x, hash[:timestamps_max]] if hash.key?(x) }.compact.to_h + + timestamps = {} + if hash[:timestamps].present? + # Lets clean to only what we save, since when we build the skeletal object, we can set more + hash[:timestamps] = hash[:timestamps].slice(*all_attribute_keys) + timestamps = hash[:timestamps] + end end # Transform hash to DB format hash = transform_to_hash!(all_attribute_keys, hash) @@ -356,6 +359,7 @@ def create_or_update_partial_records(all_attribute_keys) next unless assert_referential_integrity(hash) + hash[:__non_serialized_timestamps] = timestamps # store non serialized timestamps for the partial updates hashes << hash # Index on Unique Columns values, so we can easily fill in the :id later indexed_inventory_objects[unique_index_columns.map { |x| hash[x] }] = inventory_object @@ -372,12 +376,13 @@ def create_or_update_partial_records(all_attribute_keys) # TODO(lsmola) we don't need to process rows that were save by the create -> oncoflict do nothing (columns_for_per_column_batches - [:timestamp]).each do |column_name| - # TODO(lsmola) we need to move here the hash loading ar object etc. otherwise the key? is not correct - # and we don't want to do the map_ids filtered = hashes.select { |x| x.key?(column_name) } filtered.each_slice(batch_size_for_persisting) do |batch| - create_partial!(base_columns + [column_name], + # We need to set correct timestamps_max for this particular attribute, based on what is in timestamps + batch.each { |x| x[:timestamps_max] = x[:__non_serialized_timestamps][column_name] if x[:__non_serialized_timestamps][column_name] } + + create_partial!(inventory_collection.base_columns + [column_name], batch, :on_conflict => :do_update, :column_name => column_name) @@ -387,6 +392,8 @@ def create_or_update_partial_records(all_attribute_keys) # TODO(lsmola) we need to recognize what records were created and what records were updated. Will we take # skeletal records as created? The creation should be having :complete => true # inventory_collection.store_created_records(result) + # TODO(lsmola) we need to move here the hash loading ar object etc. otherwise the lazy_find with key will not + # be correct if inventory_collection.dependees.present? # We need to get primary keys of the created objects, but only if there are dependees that would use them map_ids_to_inventory_objects(indexed_inventory_objects, From bf74c7d3035e2cb2d92f83243a34a69613d81137 Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Fri, 10 Aug 2018 14:35:15 +0200 Subject: [PATCH 13/28] Fix assign attributes being too strict for the full rows Fix assign attributes being too strict for the full rows --- .../manager_refresh/inventory_object.rb | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/app/models/manager_refresh/inventory_object.rb b/app/models/manager_refresh/inventory_object.rb index 4b3b9f7b84d..ed38ead26b7 100644 --- a/app/models/manager_refresh/inventory_object.rb +++ b/app/models/manager_refresh/inventory_object.rb @@ -169,21 +169,30 @@ def assign_attributes(attributes) if assign public_send("#{k}=", v) # Attribute is newer than current one, lets use it - data[:timestamps][k] = specific_attr_timestamp if specific_attr_timestamp # and set the latest timestamp + (data[:timestamps] ||= {})[k] = specific_attr_timestamp if specific_attr_timestamp # and set the latest timestamp end else public_send("#{k}=", v) end end - if inventory_collection.supports_timestamps_max? && attributes[:timestamp] && data[:timestamp] - # If timestamps are present, store the bigger one - data[:timestamp] = attributes[:timestamp] if attributes[:timestamp] > data[:timestamp] + if inventory_collection.supports_timestamps_max? + if attributes[:timestamp] && data[:timestamp] + # If both timestamps are present, store the bigger one + data[:timestamp] = attributes[:timestamp] if attributes[:timestamp] > data[:timestamp] + elsif attributes[:timestamp] && !data[:timestamp] + # We are assigning timestamp that was missing + data[:timestamp] = attributes[:timestamp] + end end - if inventory_collection.supports_resource_versions_max? && attributes[:timestamp] && data[:timestamp] - # If timestamps are present, store the bigger one - data[:timestamp] = attributes[:timestamp] if attributes[:timestamp] > data[:timestamp] + if inventory_collection.supports_resource_versions_max? + if attributes[:resource_version] && data[:resource_version] + # If timestamps are present, store the bigger one + data[:resource_version] = attributes[:resource_version] if attributes[:resource_version] > data[:resource_version] + elsif attributes[:resource_version] && !data[:resource_version] + data[:resource_version] = attributes[:resource_version] + end end self From dfdd349b6b6ac21e3a29fd9bfa7b213525af13b2 Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Fri, 10 Aug 2018 16:10:37 +0200 Subject: [PATCH 14/28] Don't do partial updates if timestamp is missing Don't do partial updates if timestamp is missing --- .../save_collection/saver/concurrent_safe_batch.rb | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb index c32bd6e86c9..ef68319abfb 100644 --- a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb +++ b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb @@ -328,8 +328,6 @@ def create_or_update_partial_records(all_attribute_keys) skeletal_inventory_objects_index[index] = inventory_object end - columns_for_per_column_batches = all_attribute_keys - inventory_collection.base_columns - if supports_timestamps_max? all_attribute_keys << :timestamps all_attribute_keys << :timestamps_max @@ -374,9 +372,14 @@ def create_or_update_partial_records(all_attribute_keys) :on_conflict => :do_nothing) end + # TODO(lsmola) for the ones without timestamp or resource_version, we should just update everything? Not for the + # skeletal precreate though, there we want just do_nothing? + # We need only skeletal records with timestamp + pre_filtered = hashes.select { |x| x[:timestamps_max] } + # TODO(lsmola) we don't need to process rows that were save by the create -> oncoflict do nothing - (columns_for_per_column_batches - [:timestamp]).each do |column_name| - filtered = hashes.select { |x| x.key?(column_name) } + (all_attribute_keys - inventory_collection.base_columns).each do |column_name| + filtered = pre_filtered.select { |x| x.key?(column_name) } filtered.each_slice(batch_size_for_persisting) do |batch| # We need to set correct timestamps_max for this particular attribute, based on what is in timestamps From 810adeb68e5e0755e699bcfe64e4f3534b8ea4e5 Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Fri, 10 Aug 2018 17:03:52 +0200 Subject: [PATCH 15/28] Check timestamp before updating Check timestamp before updating, to avoid extra query --- .../saver/concurrent_safe_batch.rb | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb index ef68319abfb..0da8a300c5a 100644 --- a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb +++ b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb @@ -167,6 +167,21 @@ def update_or_destroy_records!(records_batch_iterator, inventory_objects_index, next unless assert_referential_integrity(hash) inventory_object.id = primary_key_value + if inventory_collection.parallel_safe? && supports_remote_data_timestamp?(all_attribute_keys) + record_timestamp = record.try(:timestamp) || record.try(:[], :timestamp) + record_timestamps_max = record.try(:timestamps_max) || record.try(:[], :timestamps_max) + hash_timestamp = hash[:timestamp] + + # Skip updating this record, because it is old + next if record_timestamp && hash_timestamp && record_timestamp >= hash_timestamp + + # Some column has bigger timestamp than the whole row, we need to store the row partially + if record_timestamps_max && hash_timestamp && record_timestamps_max > hash_timestamp + inventory_collection.skeletal_primary_index.skeletonize_primary_index(inventory_object.manager_uuid) + next + end + end + hash_for_update = if inventory_collection.use_ar_object? record.assign_attributes(hash.except(:id, :type)) values_for_database!(all_attribute_keys, From a7111b95def699fa9e58717e6cb7aa44b8529065 Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Fri, 10 Aug 2018 17:05:51 +0200 Subject: [PATCH 16/28] Update TODO about timestamp Update TODO about timestamp, we wil lrequire timestamp for the partial update --- .../save_collection/saver/concurrent_safe_batch.rb | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb index 0da8a300c5a..0af04987f19 100644 --- a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb +++ b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb @@ -387,9 +387,8 @@ def create_or_update_partial_records(all_attribute_keys) :on_conflict => :do_nothing) end - # TODO(lsmola) for the ones without timestamp or resource_version, we should just update everything? Not for the - # skeletal precreate though, there we want just do_nothing? - # We need only skeletal records with timestamp + # We need only skeletal records with timestamp. We can't save the ones without timestamp, because e.g. skeletal + # precreate would be updating records with default values, that are not correct. pre_filtered = hashes.select { |x| x[:timestamps_max] } # TODO(lsmola) we don't need to process rows that were save by the create -> oncoflict do nothing From 4add89b8aac9def7e105fb7b6290a78a8c93497a Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Fri, 10 Aug 2018 18:55:15 +0200 Subject: [PATCH 17/28] Proper parallel_safe? conditions, so we don't break existing code Proper parallel_safe? conditions, so we don't break existing code --- .../save_collection/saver/concurrent_safe_batch.rb | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb index 0af04987f19..5d7387f9835 100644 --- a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb +++ b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb @@ -298,6 +298,12 @@ def destroy_records!(records) # @param all_attribute_keys [Array] Array of all columns we will be saving into each table row def update_records!(all_attribute_keys, hashes, indexed_inventory_objects) return if hashes.blank? + + unless inventory_collection.parallel_safe? + # We need to update the stored records before we save it, since hashes are modified + inventory_collection.store_updated_records(hashes) + end + query = build_update_query(all_attribute_keys, hashes) result = get_connection.execute(query) @@ -306,8 +312,6 @@ def update_records!(all_attribute_keys, hashes, indexed_inventory_objects) inventory_collection.store_updated_records(result) skeletonize_ignored_records!(indexed_inventory_objects, result) - else - inventory_collection.store_updated_records(hashes) end result @@ -479,7 +483,9 @@ def create_records!(all_attribute_keys, batch, attributes_index, on_conflict: ni :on_conflict => on_conflict) end - skeletonize_ignored_records!(indexed_inventory_objects, result, :all_unique_columns => true) + if inventory_collection.parallel_safe? + skeletonize_ignored_records!(indexed_inventory_objects, result, :all_unique_columns => true) + end end # Stores primary_key values of created records into associated InventoryObject objects. From 01e2a342fae6a9fbe1d1c3a1b6ed614333183c25 Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Fri, 10 Aug 2018 19:50:29 +0200 Subject: [PATCH 18/28] Rename timestamp to resource_timestamp Rename timestamp to resource_timestamp, and following that renaming to resource_timestamps and resource_timestamps_max --- .../manager_refresh/inventory_collection.rb | 30 +++++++------- .../index/type/skeletal.rb | 6 +-- .../manager_refresh/inventory_object.rb | 22 +++++------ .../save_collection/saver/base.rb | 6 +-- .../saver/concurrent_safe_batch.rb | 28 ++++++------- .../save_collection/saver/sql_helper.rb | 39 ++++++++----------- 6 files changed, 63 insertions(+), 68 deletions(-) diff --git a/app/models/manager_refresh/inventory_collection.rb b/app/models/manager_refresh/inventory_collection.rb index 5f97868843a..659a7442bb4 100644 --- a/app/models/manager_refresh/inventory_collection.rb +++ b/app/models/manager_refresh/inventory_collection.rb @@ -609,32 +609,32 @@ def supports_updated_at? @supports_updated_at_cache end - # @return [Boolean] true if the model_class has timestamps_max column - def supports_timestamps_max? - @supports_timestamps_max_cache ||= model_class.column_names.include?("timestamps_max") + # @return [Boolean] true if the model_class has resource_timestamps_max column + def supports_resource_timestamps_max? + @supports_resource_timestamps_max_cache ||= model_class.column_names.include?("resource_timestamps_max") end - # @return [Boolean] true if the model_class has timestamps column - def supports_timestamps? - @supports_timestamps_cache ||= model_class.column_names.include?("timestamps") + # @return [Boolean] true if the model_class has resource_timestamps column + def supports_resource_timestamps? + @supports_resource_timestamps_cache ||= model_class.column_names.include?("resource_timestamps") end - # @return [Boolean] true if the model_class has timestamp column - def supports_timestamp? - @supports_timestamp_cache ||= model_class.column_names.include?("timestamp") + # @return [Boolean] true if the model_class has resource_timestamp column + def supports_resource_timestamp? + @supports_resource_timestamp_cache ||= model_class.column_names.include?("resource_timestamp") end - # @return [Boolean] true if the model_class has timestamps_max column + # @return [Boolean] true if the model_class has resource_versions_max column def supports_resource_versions_max? @supports_resource_versions_max_cache ||= model_class.column_names.include?("resource_versions_max") end - # @return [Boolean] true if the model_class has timestamps column + # @return [Boolean] true if the model_class has resource_versions column def supports_resource_versions? @supports_resource_versions_cache ||= model_class.column_names.include?("resource_versions") end - # @return [Boolean] true if the model_class has timestamp column + # @return [Boolean] true if the model_class has resource_version column def supports_resource_version? @supports_resource_version_cache ||= model_class.column_names.include?("resource_version") end @@ -694,9 +694,9 @@ def internal_columns @internal_columns << :created_at if supports_created_at? @internal_columns << :updated_on if supports_updated_on? @internal_columns << :updated_at if supports_updated_at? - @internal_columns << :timestamps_max if supports_timestamps_max? - @internal_columns << :timestamps if supports_timestamps? - @internal_columns << :timestamp if supports_timestamp? + @internal_columns << :resource_timestamps_max if supports_resource_timestamps_max? + @internal_columns << :resource_timestamps if supports_resource_timestamps? + @internal_columns << :resource_timestamp if supports_resource_timestamp? @internal_columns << :resource_versions_max if supports_resource_versions_max? @internal_columns << :resource_versions if supports_resource_versions? @internal_columns << :resource_version if supports_resource_version? diff --git a/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb b/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb index 35e00936707..8c5aff437fe 100644 --- a/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb +++ b/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb @@ -80,12 +80,12 @@ def build(attributes) end def fill_timestamps!(attributes) - if inventory_collection.supports_timestamps_max? && attributes[:timestamp] + if inventory_collection.supports_resource_timestamps_max? && attributes[:resource_timestamp] # We have to symbolize, since serializing persistor makes these strings - (attributes[:timestamps] ||= {}).symbolize_keys! + (attributes[:resource_timestamps] ||= {}).symbolize_keys! (attributes.keys - inventory_collection.base_columns).each do |key| - attributes[:timestamps][key] ||= attributes[:timestamp] + attributes[:resource_timestamps][key] ||= attributes[:resource_timestamp] end end end diff --git a/app/models/manager_refresh/inventory_object.rb b/app/models/manager_refresh/inventory_object.rb index ed38ead26b7..96a8cdbfe61 100644 --- a/app/models/manager_refresh/inventory_object.rb +++ b/app/models/manager_refresh/inventory_object.rb @@ -139,13 +139,13 @@ def attributes_with_keys(inventory_collection_scope = nil, all_attribute_keys = def assign_attributes(attributes) attributes.each do |k, v| # We don't want timestamps or resource versions to be overwritten here, since those are driving the conditions - next if %i(timestamps timestamps_max timestamp).include?(k) + next if %i(resource_timestamps resource_timestamps_max resource_timestamp).include?(k) next if %i(resource_versions resource_versions_max resource_version).include?(k) - if inventory_collection.supports_timestamps_max? && attributes[:timestamp] && data[:timestamp] + if inventory_collection.supports_resource_timestamps_max? && attributes[:resource_timestamp] && data[:resource_timestamp] # If timestamps are in play, we will set only attributes that are newer - specific_attr_timestamp = attributes[:timestamps].try(:[], k) - specific_data_timestamp = data[:timestamps].try(:[], k) + specific_attr_timestamp = attributes[:resource_timestamps].try(:[], k) + specific_data_timestamp = data[:resource_timestamps].try(:[], k) assign = if !specific_attr_timestamp # Data have no timestamp, we will ignore the check @@ -153,7 +153,7 @@ def assign_attributes(attributes) elsif specific_attr_timestamp && !specific_data_timestamp # Data specific timestamp is nil and we have new specific timestamp if data.key?(k) - if attributes[:timestamp] >= data[:timestamp] + if attributes[:resource_timestamp] >= data[:resource_timestamp] # We can save if the full timestamp is bigger, if the data already contains the attribute true end @@ -169,20 +169,20 @@ def assign_attributes(attributes) if assign public_send("#{k}=", v) # Attribute is newer than current one, lets use it - (data[:timestamps] ||= {})[k] = specific_attr_timestamp if specific_attr_timestamp # and set the latest timestamp + (data[:resource_timestamps] ||= {})[k] = specific_attr_timestamp if specific_attr_timestamp # and set the latest timestamp end else public_send("#{k}=", v) end end - if inventory_collection.supports_timestamps_max? - if attributes[:timestamp] && data[:timestamp] + if inventory_collection.supports_resource_timestamps_max? + if attributes[:resource_timestamp] && data[:resource_timestamp] # If both timestamps are present, store the bigger one - data[:timestamp] = attributes[:timestamp] if attributes[:timestamp] > data[:timestamp] - elsif attributes[:timestamp] && !data[:timestamp] + data[:resource_timestamp] = attributes[:resource_timestamp] if attributes[:resource_timestamp] > data[:resource_timestamp] + elsif attributes[:resource_timestamp] && !data[:resource_timestamp] # We are assigning timestamp that was missing - data[:timestamp] = attributes[:timestamp] + data[:resource_timestamp] = attributes[:resource_timestamp] end end diff --git a/app/models/manager_refresh/save_collection/saver/base.rb b/app/models/manager_refresh/save_collection/saver/base.rb index 3ed27891ccf..1c48b416730 100644 --- a/app/models/manager_refresh/save_collection/saver/base.rb +++ b/app/models/manager_refresh/save_collection/saver/base.rb @@ -363,12 +363,12 @@ def serializable_keys? # @return [Boolean] true if the model_class has timestamp column def supports_remote_data_timestamp?(all_attribute_keys) - all_attribute_keys.include?(:timestamp) # include? on Set is O(1) + all_attribute_keys.include?(:resource_timestamp) # include? on Set is O(1) end # @return [Boolean] true if the model_class has remote_data_timestamp column - def supports_timestamps_max? - @supports_timestamps_max_cache ||= inventory_collection.supports_timestamps_max? + def supports_resource_timestamps_max? + @supports_resource_timestamps_max_cache ||= inventory_collection.supports_resource_timestamps_max? end end end diff --git a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb index 5d7387f9835..a5db0f8c896 100644 --- a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb +++ b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb @@ -168,9 +168,9 @@ def update_or_destroy_records!(records_batch_iterator, inventory_objects_index, inventory_object.id = primary_key_value if inventory_collection.parallel_safe? && supports_remote_data_timestamp?(all_attribute_keys) - record_timestamp = record.try(:timestamp) || record.try(:[], :timestamp) - record_timestamps_max = record.try(:timestamps_max) || record.try(:[], :timestamps_max) - hash_timestamp = hash[:timestamp] + record_timestamp = record.try(:resource_timestamp) || record.try(:[], :resource_timestamp) + record_timestamps_max = record.try(:resource_timestamps_max) || record.try(:[], :resource_timestamps_max) + hash_timestamp = hash[:resource_timestamp] # Skip updating this record, because it is old next if record_timestamp && hash_timestamp && record_timestamp >= hash_timestamp @@ -347,9 +347,9 @@ def create_or_update_partial_records(all_attribute_keys) skeletal_inventory_objects_index[index] = inventory_object end - if supports_timestamps_max? - all_attribute_keys << :timestamps - all_attribute_keys << :timestamps_max + if supports_resource_timestamps_max? + all_attribute_keys << :resource_timestamps + all_attribute_keys << :resource_timestamps_max end indexed_inventory_objects = {} @@ -359,14 +359,14 @@ def create_or_update_partial_records(all_attribute_keys) skeletal_inventory_objects_index.each do |index, inventory_object| hash = skeletal_attributes_index.delete(index) # Partial create or update must never set a timestamp for the whole row - if supports_timestamps_max? - hash[:timestamps_max] = hash.delete(:timestamp) + if supports_resource_timestamps_max? + hash[:resource_timestamps_max] = hash.delete(:resource_timestamp) timestamps = {} - if hash[:timestamps].present? + if hash[:resource_timestamps].present? # Lets clean to only what we save, since when we build the skeletal object, we can set more - hash[:timestamps] = hash[:timestamps].slice(*all_attribute_keys) - timestamps = hash[:timestamps] + hash[:resource_timestamps] = hash[:resource_timestamps].slice(*all_attribute_keys) + timestamps = hash[:resource_timestamps] end end # Transform hash to DB format @@ -376,7 +376,7 @@ def create_or_update_partial_records(all_attribute_keys) next unless assert_referential_integrity(hash) - hash[:__non_serialized_timestamps] = timestamps # store non serialized timestamps for the partial updates + hash[:__non_serialized_resource_timestampa] = timestamps # store non serialized timestamps for the partial updates hashes << hash # Index on Unique Columns values, so we can easily fill in the :id later indexed_inventory_objects[unique_index_columns.map { |x| hash[x] }] = inventory_object @@ -393,7 +393,7 @@ def create_or_update_partial_records(all_attribute_keys) # We need only skeletal records with timestamp. We can't save the ones without timestamp, because e.g. skeletal # precreate would be updating records with default values, that are not correct. - pre_filtered = hashes.select { |x| x[:timestamps_max] } + pre_filtered = hashes.select { |x| x[:resource_timestamps_max] } # TODO(lsmola) we don't need to process rows that were save by the create -> oncoflict do nothing (all_attribute_keys - inventory_collection.base_columns).each do |column_name| @@ -401,7 +401,7 @@ def create_or_update_partial_records(all_attribute_keys) filtered.each_slice(batch_size_for_persisting) do |batch| # We need to set correct timestamps_max for this particular attribute, based on what is in timestamps - batch.each { |x| x[:timestamps_max] = x[:__non_serialized_timestamps][column_name] if x[:__non_serialized_timestamps][column_name] } + batch.each { |x| x[:resource_timestamps_max] = x[:__non_serialized_resource_timestampa][column_name] if x[:__non_serialized_resource_timestampa][column_name] } create_partial!(inventory_collection.base_columns + [column_name], batch, diff --git a/app/models/manager_refresh/save_collection/saver/sql_helper.rb b/app/models/manager_refresh/save_collection/saver/sql_helper.rb index 0a06fc0a02f..6367b5b1958 100644 --- a/app/models/manager_refresh/save_collection/saver/sql_helper.rb +++ b/app/models/manager_refresh/save_collection/saver/sql_helper.rb @@ -25,7 +25,7 @@ def build_insert_query(all_attribute_keys, hashes, on_conflict: nil, mode:, colu connection = get_connection ignore_cols = if mode == :partial - [:timestamp] + [:resource_timestamp] elsif mode == :full [] end @@ -59,7 +59,7 @@ def build_insert_query(all_attribute_keys, hashes, on_conflict: nil, mode:, colu } ignore_cols += if mode == :partial - [:timestamps, :timestamps_max] + [:resource_timestamps, :resource_timestamps_max] elsif mode == :full [] end @@ -79,11 +79,11 @@ def build_insert_query(all_attribute_keys, hashes, on_conflict: nil, mode:, colu } if supports_remote_data_timestamp?(all_attribute_keys) insert_query += %{ - , timestamps = '{}', timestamps_max = NULL + , resource_timestamps = '{}', resource_timestamps_max = NULL - WHERE EXCLUDED.timestamp IS NULL OR ( - (#{table_name}.timestamp IS NULL OR EXCLUDED.timestamp > #{table_name}.timestamp) AND - (#{table_name}.timestamps_max IS NULL OR EXCLUDED.timestamp >= #{table_name}.timestamps_max) + WHERE EXCLUDED.resource_timestamp IS NULL OR ( + (#{table_name}.resource_timestamp IS NULL OR EXCLUDED.resource_timestamp > #{table_name}.resource_timestamp) AND + (#{table_name}.resource_timestamps_max IS NULL OR EXCLUDED.resource_timestamp >= #{table_name}.resource_timestamps_max) ) } end @@ -93,19 +93,14 @@ def build_insert_query(all_attribute_keys, hashes, on_conflict: nil, mode:, colu insert_query += %{ SET #{(all_attribute_keys_array - ignore_cols).map { |key| build_insert_set_cols(key) }.join(", ")} } - if supports_timestamps_max? - # TODO(lsmola) we should have EXCLUDED.timestamp > #{table_name}.timestamp, but if skeletal precreate - # creates the row, it sets timestamp. Should we combine it with complete => true only? We probably need - # to set the timestamp, otherwise we can't touch it in the update clause. Maybe we coud set it as - # timestamps_max? - + if supports_resource_timestamps_max? insert_query += %{ - , timestamps = #{table_name}.timestamps || ('{"#{column_name}": "' || EXCLUDED.timestamps_max::timestamp || '"}')::jsonb - , timestamps_max = greatest(#{table_name}.timestamps_max::timestamp, EXCLUDED.timestamps_max::timestamp) - WHERE EXCLUDED.timestamps_max IS NULL OR ( - (#{table_name}.timestamp IS NULL OR EXCLUDED.timestamps_max > #{table_name}.timestamp) AND ( - (#{table_name}.timestamps->>'#{column_name}')::timestamp IS NULL OR - EXCLUDED.timestamps_max::timestamp > (#{table_name}.timestamps->>'#{column_name}')::timestamp + , resource_timestamps = #{table_name}.resource_timestamps || ('{"#{column_name}": "' || EXCLUDED.resource_timestamps_max::timestamp || '"}')::jsonb + , resource_timestamps_max = greatest(#{table_name}.resource_timestamps_max::timestamp, EXCLUDED.resource_timestamps_max::timestamp) + WHERE EXCLUDED.resource_timestamps_max IS NULL OR ( + (#{table_name}.resource_timestamp IS NULL OR EXCLUDED.resource_timestamps_max > #{table_name}.resource_timestamp) AND ( + (#{table_name}.resource_timestamps->>'#{column_name}')::timestamp IS NULL OR + EXCLUDED.resource_timestamps_max::timestamp > (#{table_name}.resource_timestamps->>'#{column_name}')::timestamp ) ) } @@ -169,7 +164,7 @@ def build_update_query(all_attribute_keys, hashes) if supports_remote_data_timestamp?(all_attribute_keys) # Full row update will reset the partial update timestamps update_query += %{ - , timestamps = '{}', timestamps_max = NULL + , resource_timestamps = '{}', resource_timestamps_max = NULL } end @@ -190,9 +185,9 @@ def build_update_query(all_attribute_keys, hashes) if supports_remote_data_timestamp?(all_attribute_keys) update_query += %{ AND ( - updated_values.timestamp IS NULL OR ( - (#{table_name}.timestamp IS NULL OR updated_values.timestamp > #{table_name}.timestamp) AND - (#{table_name}.timestamps_max IS NULL OR updated_values.timestamp >= #{table_name}.timestamps_max) + updated_values.resource_timestamp IS NULL OR ( + (#{table_name}.resource_timestamp IS NULL OR updated_values.resource_timestamp > #{table_name}.resource_timestamp) AND + (#{table_name}.resource_timestamps_max IS NULL OR updated_values.resource_timestamp >= #{table_name}.resource_timestamps_max) ) ) } From 08534ab9d4c428023a4de8a280a41ad06063165a Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Wed, 31 Jan 2018 16:22:59 +0100 Subject: [PATCH 19/28] Allow to update STI :type Sometimes we have base STI and we are defining sub STIs in parser. In this case the skeletal precreate will create the record with base STI, so we need to be able to update it later, by full refresh of the target, having the right STI. --- app/models/manager_refresh/save_collection/saver/base.rb | 2 +- .../manager_refresh/save_collection/saver/concurrent_safe.rb | 3 ++- .../save_collection/saver/concurrent_safe_batch.rb | 4 ++-- app/models/manager_refresh/save_collection/saver/default.rb | 2 +- .../manager_refresh/save_collection/saver/sql_helper.rb | 4 ++-- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/app/models/manager_refresh/save_collection/saver/base.rb b/app/models/manager_refresh/save_collection/saver/base.rb index 1c48b416730..efb2a898750 100644 --- a/app/models/manager_refresh/save_collection/saver/base.rb +++ b/app/models/manager_refresh/save_collection/saver/base.rb @@ -291,6 +291,7 @@ def time_now # @param hash [Hash] data hash # @param update_time [Time] data hash def assign_attributes_for_update!(hash, update_time) + hash[:type] = model_class.name if supports_sti? && hash[:type].nil? hash[:updated_on] = update_time if supports_updated_on? hash[:updated_at] = update_time if supports_updated_at? end @@ -300,7 +301,6 @@ def assign_attributes_for_update!(hash, update_time) # @param hash [Hash] data hash # @param create_time [Time] data hash def assign_attributes_for_create!(hash, create_time) - hash[:type] = model_class.name if supports_sti? && hash[:type].nil? hash[:created_on] = create_time if supports_created_on? hash[:created_at] = create_time if supports_created_at? assign_attributes_for_update!(hash, create_time) diff --git a/app/models/manager_refresh/save_collection/saver/concurrent_safe.rb b/app/models/manager_refresh/save_collection/saver/concurrent_safe.rb index 4c6c0bf832d..c6db87142e3 100644 --- a/app/models/manager_refresh/save_collection/saver/concurrent_safe.rb +++ b/app/models/manager_refresh/save_collection/saver/concurrent_safe.rb @@ -13,7 +13,7 @@ class ConcurrentSafe < ManagerRefresh::SaveCollection::Saver::Base # key value def update_record!(record, hash, inventory_object) assign_attributes_for_update!(hash, time_now) - record.assign_attributes(hash.except(:id, :type)) + record.assign_attributes(hash.except(:id)) if !inventory_object.inventory_collection.check_changed? || record.changed? update_query = inventory_object.inventory_collection.model_class.where(:id => record.id) @@ -39,6 +39,7 @@ def create_record!(hash, inventory_object) data = inventory_collection.model_class.new(hash).attributes.symbolize_keys # TODO(lsmola) abstract common behavior into base class + all_attribute_keys << :type if supports_sti? all_attribute_keys << :created_at if supports_created_at? all_attribute_keys << :updated_at if supports_updated_at? all_attribute_keys << :created_on if supports_created_on? diff --git a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb index a5db0f8c896..f3389bc744d 100644 --- a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb +++ b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb @@ -92,6 +92,7 @@ def save!(association) all_attribute_keys << :updated_at if supports_updated_at? all_attribute_keys << :created_on if supports_created_on? all_attribute_keys << :updated_on if supports_updated_on? + all_attribute_keys << :type if supports_sti? _log.debug("Processing #{inventory_collection} of size #{inventory_collection.size}...") @@ -103,7 +104,6 @@ def save!(association) inventory_collection.custom_reconnect_block&.call(inventory_collection, inventory_objects_index, attributes_index) end - all_attribute_keys << :type if supports_sti? # Records that were not found in the DB but sent for saving, we will be creating these in the DB. if inventory_collection.create_allowed? on_conflict = inventory_collection.parallel_safe? ? :do_update : nil @@ -183,7 +183,7 @@ def update_or_destroy_records!(records_batch_iterator, inventory_objects_index, end hash_for_update = if inventory_collection.use_ar_object? - record.assign_attributes(hash.except(:id, :type)) + record.assign_attributes(hash.except(:id)) values_for_database!(all_attribute_keys, record.attributes.symbolize_keys) elsif serializable_keys? diff --git a/app/models/manager_refresh/save_collection/saver/default.rb b/app/models/manager_refresh/save_collection/saver/default.rb index cd414954855..19de0bd5468 100644 --- a/app/models/manager_refresh/save_collection/saver/default.rb +++ b/app/models/manager_refresh/save_collection/saver/default.rb @@ -10,7 +10,7 @@ class Default < ManagerRefresh::SaveCollection::Saver::Base # @param inventory_object [ManagerRefresh::InventoryObject] InventoryObject instance where we will store primary # key value def update_record!(record, hash, inventory_object) - record.assign_attributes(hash.except(:id, :type)) + record.assign_attributes(hash.except(:id)) if !inventory_collection.check_changed? || record.changed? record.save inventory_collection.store_updated_records(record) diff --git a/app/models/manager_refresh/save_collection/saver/sql_helper.rb b/app/models/manager_refresh/save_collection/saver/sql_helper.rb index 6367b5b1958..ed85a5e0c62 100644 --- a/app/models/manager_refresh/save_collection/saver/sql_helper.rb +++ b/app/models/manager_refresh/save_collection/saver/sql_helper.rb @@ -147,8 +147,8 @@ def build_update_query(all_attribute_keys, hashes) # Cache the connection for the batch connection = get_connection - # We want to ignore type and create timestamps when updating - all_attribute_keys_array = all_attribute_keys.to_a.delete_if { |x| %i(type created_at created_on).include?(x) } + # We want to ignore create timestamps when updating + all_attribute_keys_array = all_attribute_keys.to_a.delete_if { |x| %i(created_at created_on).include?(x) } all_attribute_keys_array << :id values = hashes.map! do |hash| From 8c3d957459bd9d54d001b47675a8e8e1de5d6162 Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Tue, 14 Aug 2018 17:52:42 +0200 Subject: [PATCH 20/28] resource_version should work the same as resource_timestamp resource_version should work the same as resource_timestamp, while one is integer and other is timestamp --- .../index/type/skeletal.rb | 24 +++-- .../manager_refresh/inventory_object.rb | 95 ++++++++++--------- .../save_collection/saver/base.rb | 14 ++- .../saver/concurrent_safe_batch.rb | 90 ++++++++++++------ .../save_collection/saver/sql_helper.rb | 95 ++++++++++++++----- 5 files changed, 210 insertions(+), 108 deletions(-) diff --git a/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb b/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb index 8c5aff437fe..7261ddd1d5d 100644 --- a/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb +++ b/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb @@ -43,7 +43,7 @@ def delete(index_value) def skeletonize_primary_index(index_value) inventory_object = primary_index.delete(index_value) return unless inventory_object - fill_timestamps!(inventory_object.data) + fill_versions!(inventory_object.data) index[index_value] = inventory_object end @@ -57,7 +57,7 @@ def skeletonize_primary_index(index_value) # assigned def build(attributes) attributes = {}.merge!(default_values).merge!(attributes) - fill_timestamps!(attributes) + fill_versions!(attributes) # If the primary index is already filled, we don't want populate skeletal index uuid = ::ManagerRefresh::InventoryCollection::Reference.build_stringified_reference(attributes, named_ref) @@ -79,20 +79,26 @@ def build(attributes) index[inventory_object.manager_uuid] = inventory_object end - def fill_timestamps!(attributes) + def fill_versions!(attributes) if inventory_collection.supports_resource_timestamps_max? && attributes[:resource_timestamp] - # We have to symbolize, since serializing persistor makes these strings - (attributes[:resource_timestamps] ||= {}).symbolize_keys! - - (attributes.keys - inventory_collection.base_columns).each do |key| - attributes[:resource_timestamps][key] ||= attributes[:resource_timestamp] - end + fill_specific_version_attr(:resource_timestamps, :resource_timestamp, attributes) + elsif inventory_collection.supports_resource_versions_max? && attributes[:resource_version] + fill_specific_version_attr(:resource_versions, :resource_version, attributes) end end private attr_reader :primary_index + + def fill_specific_version_attr(partial_row_version_attr, full_row_version_attr, attributes) + # We have to symbolize, since serializing persistor makes these strings + (attributes[partial_row_version_attr] ||= {}).symbolize_keys! + + (attributes.keys - inventory_collection.base_columns).each do |key| + attributes[partial_row_version_attr][key] ||= attributes[full_row_version_attr] + end + end end end end diff --git a/app/models/manager_refresh/inventory_object.rb b/app/models/manager_refresh/inventory_object.rb index 96a8cdbfe61..d966c602c82 100644 --- a/app/models/manager_refresh/inventory_object.rb +++ b/app/models/manager_refresh/inventory_object.rb @@ -142,57 +142,19 @@ def assign_attributes(attributes) next if %i(resource_timestamps resource_timestamps_max resource_timestamp).include?(k) next if %i(resource_versions resource_versions_max resource_version).include?(k) - if inventory_collection.supports_resource_timestamps_max? && attributes[:resource_timestamp] && data[:resource_timestamp] - # If timestamps are in play, we will set only attributes that are newer - specific_attr_timestamp = attributes[:resource_timestamps].try(:[], k) - specific_data_timestamp = data[:resource_timestamps].try(:[], k) - - assign = if !specific_attr_timestamp - # Data have no timestamp, we will ignore the check - true - elsif specific_attr_timestamp && !specific_data_timestamp - # Data specific timestamp is nil and we have new specific timestamp - if data.key?(k) - if attributes[:resource_timestamp] >= data[:resource_timestamp] - # We can save if the full timestamp is bigger, if the data already contains the attribute - true - end - else - # Data do not contain the attribute, so we are saving the newest - true - end - true - elsif specific_attr_timestamp > specific_data_timestamp - # both partial timestamps are there, newer must be bigger - true - end - - if assign - public_send("#{k}=", v) # Attribute is newer than current one, lets use it - (data[:resource_timestamps] ||= {})[k] = specific_attr_timestamp if specific_attr_timestamp # and set the latest timestamp - end + if data[:resource_timestamp] && attributes[:resource_timestamp] + assign_only_newest(:resource_timestamp, :resource_timestamps, attributes, data, k, v) + elsif data[:resource_version] && attributes[:resource_version] + assign_only_newest(:resource_version, :resource_versions, attributes, data, k, v) else public_send("#{k}=", v) end end - if inventory_collection.supports_resource_timestamps_max? - if attributes[:resource_timestamp] && data[:resource_timestamp] - # If both timestamps are present, store the bigger one - data[:resource_timestamp] = attributes[:resource_timestamp] if attributes[:resource_timestamp] > data[:resource_timestamp] - elsif attributes[:resource_timestamp] && !data[:resource_timestamp] - # We are assigning timestamp that was missing - data[:resource_timestamp] = attributes[:resource_timestamp] - end - end - - if inventory_collection.supports_resource_versions_max? - if attributes[:resource_version] && data[:resource_version] - # If timestamps are present, store the bigger one - data[:resource_version] = attributes[:resource_version] if attributes[:resource_version] > data[:resource_version] - elsif attributes[:resource_version] && !data[:resource_version] - data[:resource_version] = attributes[:resource_version] - end + if attributes[:resource_timestamp] + assign_full_row_version_attr(:resource_timestamp, attributes, data) + elsif attributes[:resource_version] + assign_full_row_version_attr(:resource_version, attributes, data) end self @@ -237,6 +199,47 @@ def self.add_attributes(inventory_object_attributes) private + def assign_only_newest(full_row_version_attr, partial_row_version_attr, attributes, data, k, v) + # If timestamps are in play, we will set only attributes that are newer + specific_attr_timestamp = attributes[partial_row_version_attr].try(:[], k) + specific_data_timestamp = data[partial_row_version_attr].try(:[], k) + + assign = if !specific_attr_timestamp + # Data have no timestamp, we will ignore the check + true + elsif specific_attr_timestamp && !specific_data_timestamp + # Data specific timestamp is nil and we have new specific timestamp + if data.key?(k) + if attributes[full_row_version_attr] >= data[full_row_version_attr] + # We can save if the full timestamp is bigger, if the data already contains the attribute + true + end + else + # Data do not contain the attribute, so we are saving the newest + true + end + true + elsif specific_attr_timestamp > specific_data_timestamp + # both partial timestamps are there, newer must be bigger + true + end + + if assign + public_send("#{k}=", v) # Attribute is newer than current one, lets use it + (data[partial_row_version_attr] ||= {})[k] = specific_attr_timestamp if specific_attr_timestamp # and set the latest timestamp + end + end + + def assign_full_row_version_attr(full_row_version_attr, attributes, data) + if attributes[full_row_version_attr] && data[full_row_version_attr] + # If both timestamps are present, store the bigger one + data[full_row_version_attr] = attributes[full_row_version_attr] if attributes[full_row_version_attr] > data[full_row_version_attr] + elsif attributes[full_row_version_attr] && !data[full_row_version_attr] + # We are assigning timestamp that was missing + data[full_row_version_attr] = attributes[full_row_version_attr] + end + end + # Return true passed key representing a getter is an association # # @param inventory_collection_scope [ManagerRefresh::InventoryCollection] diff --git a/app/models/manager_refresh/save_collection/saver/base.rb b/app/models/manager_refresh/save_collection/saver/base.rb index efb2a898750..74edc6c3fc9 100644 --- a/app/models/manager_refresh/save_collection/saver/base.rb +++ b/app/models/manager_refresh/save_collection/saver/base.rb @@ -361,15 +361,25 @@ def serializable_keys? @serializable_keys_bool_cache ||= serializable_keys.present? end - # @return [Boolean] true if the model_class has timestamp column + # @return [Boolean] true if the model_class has resource_timestamp column def supports_remote_data_timestamp?(all_attribute_keys) all_attribute_keys.include?(:resource_timestamp) # include? on Set is O(1) end - # @return [Boolean] true if the model_class has remote_data_timestamp column + # @return [Boolean] true if the model_class has resource_version column + def supports_remote_data_version?(all_attribute_keys) + all_attribute_keys.include?(:resource_version) # include? on Set is O(1) + end + + # @return [Boolean] true if the model_class has resource_timestamps column def supports_resource_timestamps_max? @supports_resource_timestamps_max_cache ||= inventory_collection.supports_resource_timestamps_max? end + + # @return [Boolean] true if the model_class has resource_versions column + def supports_resource_versions_max? + @supports_resource_versions_max_cache ||= inventory_collection.supports_resource_versions_max? + end end end end diff --git a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb index f3389bc744d..65e615bf71b 100644 --- a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb +++ b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb @@ -167,19 +167,19 @@ def update_or_destroy_records!(records_batch_iterator, inventory_objects_index, next unless assert_referential_integrity(hash) inventory_object.id = primary_key_value - if inventory_collection.parallel_safe? && supports_remote_data_timestamp?(all_attribute_keys) - record_timestamp = record.try(:resource_timestamp) || record.try(:[], :resource_timestamp) - record_timestamps_max = record.try(:resource_timestamps_max) || record.try(:[], :resource_timestamps_max) - hash_timestamp = hash[:resource_timestamp] - - # Skip updating this record, because it is old - next if record_timestamp && hash_timestamp && record_timestamp >= hash_timestamp - - # Some column has bigger timestamp than the whole row, we need to store the row partially - if record_timestamps_max && hash_timestamp && record_timestamps_max > hash_timestamp - inventory_collection.skeletal_primary_index.skeletonize_primary_index(inventory_object.manager_uuid) - next - end + if inventory_collection.parallel_safe? && + (supports_remote_data_timestamp?(all_attribute_keys) || supports_remote_data_version?(all_attribute_keys)) + + version_attr, max_version_attr = if supports_remote_data_timestamp?(all_attribute_keys) + [:resource_timestamp, :resource_timestamps_max] + elsif supports_remote_data_version?(all_attribute_keys) + [:resource_version, :resource_versions_max] + end + + next if skeletonize_or_skip_record(record.try(version_attr) || record.try(:[], version_attr), + hash[version_attr], + record.try(max_version_attr) || record.try(:[], max_version_attr), + inventory_object) end hash_for_update = if inventory_collection.use_ar_object? @@ -347,9 +347,12 @@ def create_or_update_partial_records(all_attribute_keys) skeletal_inventory_objects_index[index] = inventory_object end - if supports_resource_timestamps_max? + if supports_remote_data_timestamp?(all_attribute_keys) all_attribute_keys << :resource_timestamps all_attribute_keys << :resource_timestamps_max + elsif supports_remote_data_version?(all_attribute_keys) + all_attribute_keys << :resource_versions + all_attribute_keys << :resource_versions_max end indexed_inventory_objects = {} @@ -359,16 +362,19 @@ def create_or_update_partial_records(all_attribute_keys) skeletal_inventory_objects_index.each do |index, inventory_object| hash = skeletal_attributes_index.delete(index) # Partial create or update must never set a timestamp for the whole row - if supports_resource_timestamps_max? - hash[:resource_timestamps_max] = hash.delete(:resource_timestamp) - - timestamps = {} - if hash[:resource_timestamps].present? - # Lets clean to only what we save, since when we build the skeletal object, we can set more - hash[:resource_timestamps] = hash[:resource_timestamps].slice(*all_attribute_keys) - timestamps = hash[:resource_timestamps] - end - end + timestamps = if supports_remote_data_timestamp?(all_attribute_keys) && supports_resource_timestamps_max? + assign_partial_row_version_attributes!(:resource_timestamp, + :resource_timestamps, + :resource_timestamps_max, + hash, + all_attribute_keys) + elsif supports_remote_data_version?(all_attribute_keys) && supports_resource_versions_max? + assign_partial_row_version_attributes!(:resource_version, + :resource_versions, + :resource_versions_max, + hash, + all_attribute_keys) + end # Transform hash to DB format hash = transform_to_hash!(all_attribute_keys, hash) @@ -376,7 +382,7 @@ def create_or_update_partial_records(all_attribute_keys) next unless assert_referential_integrity(hash) - hash[:__non_serialized_resource_timestampa] = timestamps # store non serialized timestamps for the partial updates + hash[:__non_serialized_versions] = timestamps # store non serialized timestamps for the partial updates hashes << hash # Index on Unique Columns values, so we can easily fill in the :id later indexed_inventory_objects[unique_index_columns.map { |x| hash[x] }] = inventory_object @@ -393,7 +399,7 @@ def create_or_update_partial_records(all_attribute_keys) # We need only skeletal records with timestamp. We can't save the ones without timestamp, because e.g. skeletal # precreate would be updating records with default values, that are not correct. - pre_filtered = hashes.select { |x| x[:resource_timestamps_max] } + pre_filtered = hashes.select { |x| x[:resource_timestamps_max] || x[:resource_versions_max] } # TODO(lsmola) we don't need to process rows that were save by the create -> oncoflict do nothing (all_attribute_keys - inventory_collection.base_columns).each do |column_name| @@ -401,7 +407,11 @@ def create_or_update_partial_records(all_attribute_keys) filtered.each_slice(batch_size_for_persisting) do |batch| # We need to set correct timestamps_max for this particular attribute, based on what is in timestamps - batch.each { |x| x[:resource_timestamps_max] = x[:__non_serialized_resource_timestampa][column_name] if x[:__non_serialized_resource_timestampa][column_name] } + if supports_remote_data_timestamp?(all_attribute_keys) + batch.each { |x| x[:resource_timestamps_max] = x[:__non_serialized_versions][column_name] if x[:__non_serialized_versions][column_name] } + elsif supports_remote_data_version?(all_attribute_keys) + batch.each { |x| x[:resource_versions_max] = x[:__non_serialized_versions][column_name] if x[:__non_serialized_versions][column_name] } + end create_partial!(inventory_collection.base_columns + [column_name], batch, @@ -426,7 +436,7 @@ def create_or_update_partial_records(all_attribute_keys) end def create_partial!(all_attribute_keys, hashes, on_conflict: nil, column_name: nil) - result = get_connection.execute( + get_connection.execute( build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => :partial, :column_name => column_name) ) end @@ -550,6 +560,30 @@ def map_ids_to_inventory_objects(indexed_inventory_objects, all_attribute_keys, end end end + + def skeletonize_or_skip_record(record_version, hash_version, record_versions_max, inventory_object) + # Skip updating this record, because it is old + return true if record_version && hash_version && record_version >= hash_version + + # Some column has bigger version than the whole row, we need to store the row partially + if record_versions_max && hash_version && record_versions_max > hash_version + inventory_collection.skeletal_primary_index.skeletonize_primary_index(inventory_object.manager_uuid) + return true + end + + false + end + + def assign_partial_row_version_attributes!(full_row_version_attr, partial_row_version_attr, + partial_row_version_attr_max, hash, all_attribute_keys) + hash[partial_row_version_attr_max] = hash.delete(full_row_version_attr) + + if hash[partial_row_version_attr].present? + # Lets clean to only what we save, since when we build the skeletal object, we can set more + # TODO(lsmola) should this be without internal columns? Add spec + hash[partial_row_version_attr] = hash[partial_row_version_attr].slice(*all_attribute_keys) + end + end end end end diff --git a/app/models/manager_refresh/save_collection/saver/sql_helper.rb b/app/models/manager_refresh/save_collection/saver/sql_helper.rb index ed85a5e0c62..8baab47a230 100644 --- a/app/models/manager_refresh/save_collection/saver/sql_helper.rb +++ b/app/models/manager_refresh/save_collection/saver/sql_helper.rb @@ -25,7 +25,7 @@ def build_insert_query(all_attribute_keys, hashes, on_conflict: nil, mode:, colu connection = get_connection ignore_cols = if mode == :partial - [:resource_timestamp] + [:resource_timestamp, :resource_version] elsif mode == :full [] end @@ -59,7 +59,7 @@ def build_insert_query(all_attribute_keys, hashes, on_conflict: nil, mode:, colu } ignore_cols += if mode == :partial - [:resource_timestamps, :resource_timestamps_max] + [:resource_timestamps, :resource_timestamps_max, :resource_versions, :resource_versions_max] elsif mode == :full [] end @@ -78,32 +78,20 @@ def build_insert_query(all_attribute_keys, hashes, on_conflict: nil, mode:, colu SET #{(all_attribute_keys_array - ignore_cols).map { |key| build_insert_set_cols(key) }.join(", ")} } if supports_remote_data_timestamp?(all_attribute_keys) - insert_query += %{ - , resource_timestamps = '{}', resource_timestamps_max = NULL - - WHERE EXCLUDED.resource_timestamp IS NULL OR ( - (#{table_name}.resource_timestamp IS NULL OR EXCLUDED.resource_timestamp > #{table_name}.resource_timestamp) AND - (#{table_name}.resource_timestamps_max IS NULL OR EXCLUDED.resource_timestamp >= #{table_name}.resource_timestamps_max) - ) - } + insert_query += full_update_condition(:resource_timestamp) + elsif supports_remote_data_version?(all_attribute_keys) + insert_query += full_update_condition(:resource_version) end elsif mode == :partial raise "Column name not defined for #{hashes}" unless column_name insert_query += %{ - SET #{(all_attribute_keys_array - ignore_cols).map { |key| build_insert_set_cols(key) }.join(", ")} + SET #{(all_attribute_keys_array - ignore_cols).map { |key| build_insert_set_cols(key) }.join(", ")} } - if supports_resource_timestamps_max? - insert_query += %{ - , resource_timestamps = #{table_name}.resource_timestamps || ('{"#{column_name}": "' || EXCLUDED.resource_timestamps_max::timestamp || '"}')::jsonb - , resource_timestamps_max = greatest(#{table_name}.resource_timestamps_max::timestamp, EXCLUDED.resource_timestamps_max::timestamp) - WHERE EXCLUDED.resource_timestamps_max IS NULL OR ( - (#{table_name}.resource_timestamp IS NULL OR EXCLUDED.resource_timestamps_max > #{table_name}.resource_timestamp) AND ( - (#{table_name}.resource_timestamps->>'#{column_name}')::timestamp IS NULL OR - EXCLUDED.resource_timestamps_max::timestamp > (#{table_name}.resource_timestamps->>'#{column_name}')::timestamp - ) - ) - } + if supports_remote_data_timestamp?(all_attribute_keys) + insert_query += partial_update_condition(:resource_timestamp, column_name) + elsif supports_remote_data_version?(all_attribute_keys) + insert_query += partial_update_condition(:resource_version, column_name) end end end @@ -118,6 +106,54 @@ def build_insert_query(all_attribute_keys, hashes, on_conflict: nil, mode:, colu insert_query end + def full_update_condition(full_row_version_attr) + if full_row_version_attr == :resource_timestamp + %{ + , resource_timestamps = '{}', resource_timestamps_max = NULL + + WHERE EXCLUDED.resource_timestamp IS NULL OR ( + (#{table_name}.resource_timestamp IS NULL OR EXCLUDED.resource_timestamp > #{table_name}.resource_timestamp) AND + (#{table_name}.resource_timestamps_max IS NULL OR EXCLUDED.resource_timestamp >= #{table_name}.resource_timestamps_max) + ) + } + elsif full_row_version_attr == :resource_version + %{ + , resource_versions = '{}', resource_versions_max = NULL + + WHERE EXCLUDED.resource_version IS NULL OR ( + (#{table_name}.resource_version IS NULL OR EXCLUDED.resource_version > #{table_name}.resource_version) AND + (#{table_name}.resource_versions_max IS NULL OR EXCLUDED.resource_version >= #{table_name}.resource_versions_max) + ) + } + end + end + + def partial_update_condition(full_row_version_attr, column_name) + if full_row_version_attr == :resource_timestamp + %{ + , resource_timestamps = #{table_name}.resource_timestamps || ('{"#{column_name}": "' || EXCLUDED.resource_timestamps_max::timestamp || '"}')::jsonb + , resource_timestamps_max = greatest(#{table_name}.resource_timestamps_max::timestamp, EXCLUDED.resource_timestamps_max::timestamp) + WHERE EXCLUDED.resource_timestamps_max IS NULL OR ( + (#{table_name}.resource_timestamp IS NULL OR EXCLUDED.resource_timestamps_max > #{table_name}.resource_timestamp) AND ( + (#{table_name}.resource_timestamps->>'#{column_name}')::timestamp IS NULL OR + EXCLUDED.resource_timestamps_max::timestamp > (#{table_name}.resource_timestamps->>'#{column_name}')::timestamp + ) + ) + } + elsif full_row_version_attr == :resource_version + %{ + , resource_versions = #{table_name}.resource_versions || ('{"#{column_name}": "' || EXCLUDED.resource_versions_max::integer || '"}')::jsonb + , resource_versions_max = greatest(#{table_name}.resource_versions_max::integer, EXCLUDED.resource_versions_max::integer) + WHERE EXCLUDED.resource_versions_max IS NULL OR ( + (#{table_name}.resource_version IS NULL OR EXCLUDED.resource_versions_max > #{table_name}.resource_version) AND ( + (#{table_name}.resource_versions->>'#{column_name}')::integer IS NULL OR + EXCLUDED.resource_versions_max::integer > (#{table_name}.resource_versions->>'#{column_name}')::integer + ) + ) + } + end + end + # Builds update clause for one column identified by the passed key # # @param key [Symbol] key that is column name @@ -164,7 +200,11 @@ def build_update_query(all_attribute_keys, hashes) if supports_remote_data_timestamp?(all_attribute_keys) # Full row update will reset the partial update timestamps update_query += %{ - , resource_timestamps = '{}', resource_timestamps_max = NULL + , resource_timestamps = '{}', resource_timestamps_max = NULL + } + elsif supports_remote_data_version?(all_attribute_keys) + update_query += %{ + , resource_versions = '{}', resource_versions_max = NULL } end @@ -191,6 +231,15 @@ def build_update_query(all_attribute_keys, hashes) ) ) } + elsif supports_remote_data_version?(all_attribute_keys) + update_query += %{ + AND ( + updated_values.resource_version IS NULL OR ( + (#{table_name}.resource_version IS NULL OR updated_values.resource_version > #{table_name}.resource_version) AND + (#{table_name}.resource_versions_max IS NULL OR updated_values.resource_version >= #{table_name}.resource_versions_max) + ) + ) + } end if inventory_collection.parallel_safe? From 853ce8bc16c9a5be133e00773dcc9dfac4fe7e75 Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Wed, 15 Aug 2018 11:04:49 +0200 Subject: [PATCH 21/28] Fill correct created and updated result of IC Fill correct created and updated result of IC --- .../manager_refresh/inventory_collection.rb | 18 ++++++-- .../saver/concurrent_safe_batch.rb | 46 ++++++++++++++----- .../save_collection/saver/sql_helper.rb | 10 ++++ 3 files changed, 58 insertions(+), 16 deletions(-) diff --git a/app/models/manager_refresh/inventory_collection.rb b/app/models/manager_refresh/inventory_collection.rb index 659a7442bb4..6e58286bc3a 100644 --- a/app/models/manager_refresh/inventory_collection.rb +++ b/app/models/manager_refresh/inventory_collection.rb @@ -688,12 +688,8 @@ def unique_index_for(keys) def internal_columns return @internal_columns if @internal_columns - @internal_columns = [] + @internal_columns = internal_timestamp_columns @internal_columns << :type if supports_sti? - @internal_columns << :created_on if supports_created_on? - @internal_columns << :created_at if supports_created_at? - @internal_columns << :updated_on if supports_updated_on? - @internal_columns << :updated_at if supports_updated_at? @internal_columns << :resource_timestamps_max if supports_resource_timestamps_max? @internal_columns << :resource_timestamps if supports_resource_timestamps? @internal_columns << :resource_timestamp if supports_resource_timestamp? @@ -703,6 +699,18 @@ def internal_columns @internal_columns end + def internal_timestamp_columns + return @internal_timestamp_columns if @internal_timestamp_columns + + @internal_timestamp_columns = [] + @internal_timestamp_columns << :created_on if supports_created_on? + @internal_timestamp_columns << :created_at if supports_created_at? + @internal_timestamp_columns << :updated_on if supports_updated_on? + @internal_timestamp_columns << :updated_at if supports_updated_at? + + @internal_timestamp_columns + end + def base_columns @base_columns ||= unique_index_columns + internal_columns end diff --git a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb index 65e615bf71b..c6b1f7533b2 100644 --- a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb +++ b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb @@ -392,15 +392,17 @@ def create_or_update_partial_records(all_attribute_keys) # First, lets try to create all partial records hashes.each_slice(batch_size_for_persisting) do |batch| - create_partial!(all_attribute_keys, - batch, - :on_conflict => :do_nothing) + result = create_partial!(all_attribute_keys, + batch, + :on_conflict => :do_nothing) + inventory_collection.store_created_records(result) end # We need only skeletal records with timestamp. We can't save the ones without timestamp, because e.g. skeletal # precreate would be updating records with default values, that are not correct. pre_filtered = hashes.select { |x| x[:resource_timestamps_max] || x[:resource_versions_max] } + results = {} # TODO(lsmola) we don't need to process rows that were save by the create -> oncoflict do nothing (all_attribute_keys - inventory_collection.base_columns).each do |column_name| filtered = pre_filtered.select { |x| x.key?(column_name) } @@ -413,16 +415,18 @@ def create_or_update_partial_records(all_attribute_keys) batch.each { |x| x[:resource_versions_max] = x[:__non_serialized_versions][column_name] if x[:__non_serialized_versions][column_name] } end - create_partial!(inventory_collection.base_columns + [column_name], - batch, - :on_conflict => :do_update, - :column_name => column_name) + result = create_partial!(inventory_collection.base_columns + [column_name], + batch, + :on_conflict => :do_update, + :column_name => column_name) + result.each do |result| + results[result["id"]] = result + end end end - # TODO(lsmola) we need to recognize what records were created and what records were updated. Will we take - # skeletal records as created? The creation should be having :complete => true - # inventory_collection.store_created_records(result) + inventory_collection.store_updated_records(results.values) + # TODO(lsmola) we need to move here the hash loading ar object etc. otherwise the lazy_find with key will not # be correct if inventory_collection.dependees.present? @@ -483,7 +487,27 @@ def create_records!(all_attribute_keys, batch, attributes_index, on_conflict: ni ) # TODO(lsmola) so for all of the lazy_find with key, we will need to fetch a fresh attributes from the # db, otherwise we will be getting attribute that might not have been updated (add spec) - inventory_collection.store_created_records(result) + if inventory_collection.parallel_safe? + # We've done upsert, so records were either created or update. We can recognize that by checking if + # created and updated timestamps are the same + created_attr = "created_on" if inventory_collection.supports_created_on? + created_attr ||= "created_at" if inventory_collection.supports_created_at? + updated_attr = "updated_on" if inventory_collection.supports_updated_on? + updated_attr ||= "updated_at" if inventory_collection.supports_updated_at? + + if created_attr && updated_attr + created, updated = result.to_a.partition { |x| x[created_attr] == x[updated_attr] } + inventory_collection.store_created_records(created) + inventory_collection.store_updated_records(updated) + else + # The record doesn't have both created and updated attrs, so we'll take all as created + inventory_collection.store_created_records(result) + end + else + # We've done just insert, so all records were created + inventory_collection.store_created_records(result) + end + if inventory_collection.dependees.present? # We need to get primary keys of the created objects, but only if there are dependees that would use them map_ids_to_inventory_objects(indexed_inventory_objects, diff --git a/app/models/manager_refresh/save_collection/saver/sql_helper.rb b/app/models/manager_refresh/save_collection/saver/sql_helper.rb index 8baab47a230..a9a06b6256d 100644 --- a/app/models/manager_refresh/save_collection/saver/sql_helper.rb +++ b/app/models/manager_refresh/save_collection/saver/sql_helper.rb @@ -101,6 +101,16 @@ def build_insert_query(all_attribute_keys, hashes, on_conflict: nil, mode:, colu RETURNING "id",#{unique_index_columns.map { |x| quote_column_name(x) }.join(",")} } + if inventory_collection.parallel_safe? + # For upsert, we'll return also created and updated timestamps, so we can recognize what was created and what + # updated + if inventory_collection.internal_timestamp_columns.present? + insert_query += %{ + ,#{inventory_collection.internal_timestamp_columns.join(",")} + } + end + end + _log.debug("Building insert query for #{inventory_collection} of size #{inventory_collection.size}...Complete") insert_query From fe6c581fbb18c4dfce19a4459e7b71cfce4edbd1 Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Wed, 15 Aug 2018 15:14:50 +0200 Subject: [PATCH 22/28] Cleanup TODos ans YARD Cleanup TODos ans YARD --- .../manager_refresh/inventory_collection.rb | 2 +- .../index/type/skeletal.rb | 23 ++++++++----- .../manager_refresh/inventory_object.rb | 17 ++++++++++ .../saver/concurrent_safe_batch.rb | 34 ++++++++++++++----- 4 files changed, 58 insertions(+), 18 deletions(-) diff --git a/app/models/manager_refresh/inventory_collection.rb b/app/models/manager_refresh/inventory_collection.rb index 6e58286bc3a..e86b8576420 100644 --- a/app/models/manager_refresh/inventory_collection.rb +++ b/app/models/manager_refresh/inventory_collection.rb @@ -688,7 +688,7 @@ def unique_index_for(keys) def internal_columns return @internal_columns if @internal_columns - @internal_columns = internal_timestamp_columns + @internal_columns = [] + internal_timestamp_columns @internal_columns << :type if supports_sti? @internal_columns << :resource_timestamps_max if supports_resource_timestamps_max? @internal_columns << :resource_timestamps if supports_resource_timestamps? diff --git a/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb b/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb index 7261ddd1d5d..5b68bf7ad6a 100644 --- a/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb +++ b/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb @@ -38,6 +38,7 @@ def delete(index_value) end # Takes value from primary_index and puts it to skeletal index + # # @param index_value [String] a index_value of the InventoryObject we search for # @return [InventoryObject|nil] Returns found value or nil def skeletonize_primary_index(index_value) @@ -62,15 +63,11 @@ def build(attributes) # If the primary index is already filled, we don't want populate skeletal index uuid = ::ManagerRefresh::InventoryCollection::Reference.build_stringified_reference(attributes, named_ref) if (inventory_object = primary_index.find(uuid)) - # TODO(lsmola) add timestamp check? If timestamps are present, we should assign the data, only if they - # have newer timestamp return inventory_object.assign_attributes(attributes) end # Return if skeletal index already exists if (inventory_object = index[uuid]) - # TODO(lsmola) add timestamp check? If timestamps are present, we should assign the data, only if they - # have newer timestamp return inventory_object.assign_attributes(attributes) end @@ -79,6 +76,13 @@ def build(attributes) index[inventory_object.manager_uuid] = inventory_object end + private + + attr_reader :primary_index + + # Add versions columns into the passed attributes + # + # @param attributes [Hash] Attributes we want to extend with version related attributes def fill_versions!(attributes) if inventory_collection.supports_resource_timestamps_max? && attributes[:resource_timestamp] fill_specific_version_attr(:resource_timestamps, :resource_timestamp, attributes) @@ -87,10 +91,13 @@ def fill_versions!(attributes) end end - private - - attr_reader :primary_index - + # Add specific versions columns into the passed attributes + # + # @param partial_row_version_attr [Symbol] Attr name for partial rows, allowed values are + # [:resource_timestamps, :resource_versions] + # @param full_row_version_attr [Symbol] Attr name for full rows, allowed values are + # [:resource_timestamp, :resource_version] + # @param attributes [Hash] Attributes we want to extend with version related attributes def fill_specific_version_attr(partial_row_version_attr, full_row_version_attr, attributes) # We have to symbolize, since serializing persistor makes these strings (attributes[partial_row_version_attr] ||= {}).symbolize_keys! diff --git a/app/models/manager_refresh/inventory_object.rb b/app/models/manager_refresh/inventory_object.rb index d966c602c82..2395e026310 100644 --- a/app/models/manager_refresh/inventory_object.rb +++ b/app/models/manager_refresh/inventory_object.rb @@ -199,6 +199,17 @@ def self.add_attributes(inventory_object_attributes) private + # Assigns value based on the version attributes. If versions are specified, it asigns attribute only if it's + # newer than existing attribute. + # + # @param full_row_version_attr [Symbol] Attr name for full rows, allowed values are + # [:resource_timestamp, :resource_version] + # @param partial_row_version_attr [Symbol] Attr name for partial rows, allowed values are + # [:resource_timestamps, :resource_versions] + # @param attributes [Hash] New attributes we are assigning + # @param data [Hash] Existing attributes of the InventoryObject + # @param k [Symbol] Name of the attribute we are assigning + # @param v [Object] Value of the attribute we are assigning def assign_only_newest(full_row_version_attr, partial_row_version_attr, attributes, data, k, v) # If timestamps are in play, we will set only attributes that are newer specific_attr_timestamp = attributes[partial_row_version_attr].try(:[], k) @@ -230,6 +241,12 @@ def assign_only_newest(full_row_version_attr, partial_row_version_attr, attribut end end + # Assigns attribute representing version of the whole row + # + # @param full_row_version_attr [Symbol] Attr name for full rows, allowed values are + # [:resource_timestamp, :resource_version] + # @param attributes [Hash] New attributes we are assigning + # @param data [Hash] Existing attributes of the InventoryObject def assign_full_row_version_attr(full_row_version_attr, attributes, data) if attributes[full_row_version_attr] && data[full_row_version_attr] # If both timestamps are present, store the bigger one diff --git a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb index c6b1f7533b2..daabc5ffc61 100644 --- a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb +++ b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb @@ -258,7 +258,6 @@ def format_value(attribute, value) end end - # Deletes or sof-deletes records. If the model_class supports a custom class delete method, we will use it for # batch soft-delete. # @@ -317,6 +316,13 @@ def update_records!(all_attribute_keys, hashes, indexed_inventory_objects) result end + # Taking result from update or upsert of the row. The records that were not saved will be turned into skeletal + # records and we will save them attribute by attribute. + # + # @param hash [Hash{String => InventoryObject}>] Hash with indexed data we want to save + # @param result [Array] Result from the DB containing the data that were actually saved + # @param all_unique_columns [Boolean] True if index is consisted from all columns of the unique index. False if + # index is just made from manager_ref turned in DB column names. def skeletonize_ignored_records!(hash, result, all_unique_columns: false) updated = if all_unique_columns result.map { |x| unique_index_columns_to_s.map { |key| x[key] } } @@ -326,16 +332,20 @@ def skeletonize_ignored_records!(hash, result, all_unique_columns: false) updated.each { |x| hash.delete(x) } - # Now lets skeletonize all inventory_objects that were not saved - # TODO(lsmola) we should compare timestamps and ignore InventoryObjects or attributes that are old, to lower - # what we send as upsert in skeletal index + # Now lets skeletonize all inventory_objects that were not saved by update or upsert. Old rows that can't be + # saved are not being sent here. We have only rows that are new, but become old as we send the query (so other + # parallel process saved the data in the meantime). Or if some attributes are newer than the whole row + # being sent. hash.keys.each do |db_index| inventory_collection.skeletal_primary_index.skeletonize_primary_index(hash[db_index].manager_uuid) end end + # Saves partial records using upsert, taking records from skeletal_primary_index. This is used both for + # skeletal precreate as well as for saving partial rows. + # + # @param all_attribute_keys [Set] Superset of all keys of all records being saved def create_or_update_partial_records(all_attribute_keys) - # We will create also remaining skeletal records skeletal_attributes_index = {} skeletal_inventory_objects_index = {} @@ -439,6 +449,14 @@ def create_or_update_partial_records(all_attribute_keys) end end + # Batch upserts 1 data column of the row, plus the internal columns + # + # @param all_attribute_keys [Array] Array of all columns we will be saving into each table row + # @param hashes [Array] Array of InventoryObject objects we will be inserting + # into the DB + # @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values + # are :do_update, :do_nothing, nil + # @param column_name [Symbol] Name of the data column we will be upserting def create_partial!(all_attribute_keys, hashes, on_conflict: nil, column_name: nil) get_connection.execute( build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => :partial, :column_name => column_name) @@ -485,10 +503,9 @@ def create_records!(all_attribute_keys, batch, attributes_index, on_conflict: ni result = get_connection.execute( build_insert_query(all_attribute_keys, hashes, :on_conflict => on_conflict, :mode => :full) ) - # TODO(lsmola) so for all of the lazy_find with key, we will need to fetch a fresh attributes from the - # db, otherwise we will be getting attribute that might not have been updated (add spec) + if inventory_collection.parallel_safe? - # We've done upsert, so records were either created or update. We can recognize that by checking if + # We've done upsert, so records were either created or updated. We can recognize that by checking if # created and updated timestamps are the same created_attr = "created_on" if inventory_collection.supports_created_on? created_attr ||= "created_at" if inventory_collection.supports_created_at? @@ -604,7 +621,6 @@ def assign_partial_row_version_attributes!(full_row_version_attr, partial_row_ve if hash[partial_row_version_attr].present? # Lets clean to only what we save, since when we build the skeletal object, we can set more - # TODO(lsmola) should this be without internal columns? Add spec hash[partial_row_version_attr] = hash[partial_row_version_attr].slice(*all_attribute_keys) end end From 512a7e7afd9a542f3181da6cdb11dc9a49b95370 Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Wed, 15 Aug 2018 16:40:05 +0200 Subject: [PATCH 23/28] Cleanup sql_helper and separate to upsert and update mixins Cleanup sql_helper and separate to upsert and update mixins --- .../save_collection/saver/sql_helper.rb | 248 +----------------- .../saver/sql_helper_update.rb | 112 ++++++++ .../saver/sql_helper_upsert.rb | 182 +++++++++++++ 3 files changed, 298 insertions(+), 244 deletions(-) create mode 100644 app/models/manager_refresh/save_collection/saver/sql_helper_update.rb create mode 100644 app/models/manager_refresh/save_collection/saver/sql_helper_upsert.rb diff --git a/app/models/manager_refresh/save_collection/saver/sql_helper.rb b/app/models/manager_refresh/save_collection/saver/sql_helper.rb index a9a06b6256d..15858923c75 100644 --- a/app/models/manager_refresh/save_collection/saver/sql_helper.rb +++ b/app/models/manager_refresh/save_collection/saver/sql_helper.rb @@ -4,172 +4,11 @@ module SqlHelper # TODO(lsmola) all below methods should be rewritten to arel, but we need to first extend arel to be able to do # this - # Builds ON CONFLICT UPDATE updating branch for one column identified by the passed key - # - # @param key [Symbol] key that is column name - # @return [String] SQL clause for upserting one column - def build_insert_set_cols(key) - "#{quote_column_name(key)} = EXCLUDED.#{quote_column_name(key)}" - end - - # @param all_attribute_keys [Array] Array of all columns we will be saving into each table row - # @param hashes [Array] data used for building a batch insert sql query - # @param mode [Symbol] Mode for saving, allowed values are [:full, :partial], :full is when we save all - # columns of a row, :partial is when we save only few columns, so a partial row. - # @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values - # are :do_update, :do_nothing, nil - def build_insert_query(all_attribute_keys, hashes, on_conflict: nil, mode:, column_name: nil) - _log.debug("Building insert query for #{inventory_collection} of size #{inventory_collection.size}...") - - # Cache the connection for the batch - connection = get_connection - - ignore_cols = if mode == :partial - [:resource_timestamp, :resource_version] - elsif mode == :full - [] - end - - # Make sure we don't send a primary_key for INSERT in any form, it could break PG sequencer - all_attribute_keys_array = all_attribute_keys.to_a - [primary_key.to_s, primary_key.to_sym] - ignore_cols - values = hashes.map do |hash| - "(#{all_attribute_keys_array.map { |x| quote(connection, hash[x], x) }.join(",")})" - end.join(",") - col_names = all_attribute_keys_array.map { |x| quote_column_name(x) }.join(",") - - insert_query = %{ - INSERT INTO #{table_name} (#{col_names}) - VALUES - #{values} - } - - if inventory_collection.parallel_safe? - if on_conflict == :do_nothing - insert_query += %{ - ON CONFLICT DO NOTHING - } - elsif on_conflict == :do_update - index_where_condition = unique_index_for(unique_index_keys).where - where_to_sql = index_where_condition ? "WHERE #{index_where_condition}" : "" - - insert_query += %{ - ON CONFLICT (#{unique_index_columns.map { |x| quote_column_name(x) }.join(",")}) #{where_to_sql} - DO - UPDATE - } - - ignore_cols += if mode == :partial - [:resource_timestamps, :resource_timestamps_max, :resource_versions, :resource_versions_max] - elsif mode == :full - [] - end - ignore_cols += [:created_on, :created_at] # Lets not change created_at for the update clause - - # TODO(lsmola) do we want to exclude the ems_id from the UPDATE clause? Otherwise it might be difficult to change - # the ems_id as a cross manager migration, since ems_id should be there as part of the insert. The attempt of - # changing ems_id could lead to putting it back by a refresh. - # TODO(lsmola) should we add :deleted => false to the update clause? That should handle a reconnect, without a - # a need to list :deleted anywhere in the parser. We just need to check that a model has the :deleted attribute + extend ActiveSupport::Concern - # This conditional will avoid rewriting new data by old data. But we want it only when remote_data_timestamp is a - # part of the data, since for the fake records, we just want to update ems_ref. - if mode == :full - insert_query += %{ - SET #{(all_attribute_keys_array - ignore_cols).map { |key| build_insert_set_cols(key) }.join(", ")} - } - if supports_remote_data_timestamp?(all_attribute_keys) - insert_query += full_update_condition(:resource_timestamp) - elsif supports_remote_data_version?(all_attribute_keys) - insert_query += full_update_condition(:resource_version) - end - elsif mode == :partial - raise "Column name not defined for #{hashes}" unless column_name - - insert_query += %{ - SET #{(all_attribute_keys_array - ignore_cols).map { |key| build_insert_set_cols(key) }.join(", ")} - } - if supports_remote_data_timestamp?(all_attribute_keys) - insert_query += partial_update_condition(:resource_timestamp, column_name) - elsif supports_remote_data_version?(all_attribute_keys) - insert_query += partial_update_condition(:resource_version, column_name) - end - end - end - end - - insert_query += %{ - RETURNING "id",#{unique_index_columns.map { |x| quote_column_name(x) }.join(",")} - } - - if inventory_collection.parallel_safe? - # For upsert, we'll return also created and updated timestamps, so we can recognize what was created and what - # updated - if inventory_collection.internal_timestamp_columns.present? - insert_query += %{ - ,#{inventory_collection.internal_timestamp_columns.join(",")} - } - end - end - - _log.debug("Building insert query for #{inventory_collection} of size #{inventory_collection.size}...Complete") - - insert_query - end - - def full_update_condition(full_row_version_attr) - if full_row_version_attr == :resource_timestamp - %{ - , resource_timestamps = '{}', resource_timestamps_max = NULL - - WHERE EXCLUDED.resource_timestamp IS NULL OR ( - (#{table_name}.resource_timestamp IS NULL OR EXCLUDED.resource_timestamp > #{table_name}.resource_timestamp) AND - (#{table_name}.resource_timestamps_max IS NULL OR EXCLUDED.resource_timestamp >= #{table_name}.resource_timestamps_max) - ) - } - elsif full_row_version_attr == :resource_version - %{ - , resource_versions = '{}', resource_versions_max = NULL - - WHERE EXCLUDED.resource_version IS NULL OR ( - (#{table_name}.resource_version IS NULL OR EXCLUDED.resource_version > #{table_name}.resource_version) AND - (#{table_name}.resource_versions_max IS NULL OR EXCLUDED.resource_version >= #{table_name}.resource_versions_max) - ) - } - end - end - - def partial_update_condition(full_row_version_attr, column_name) - if full_row_version_attr == :resource_timestamp - %{ - , resource_timestamps = #{table_name}.resource_timestamps || ('{"#{column_name}": "' || EXCLUDED.resource_timestamps_max::timestamp || '"}')::jsonb - , resource_timestamps_max = greatest(#{table_name}.resource_timestamps_max::timestamp, EXCLUDED.resource_timestamps_max::timestamp) - WHERE EXCLUDED.resource_timestamps_max IS NULL OR ( - (#{table_name}.resource_timestamp IS NULL OR EXCLUDED.resource_timestamps_max > #{table_name}.resource_timestamp) AND ( - (#{table_name}.resource_timestamps->>'#{column_name}')::timestamp IS NULL OR - EXCLUDED.resource_timestamps_max::timestamp > (#{table_name}.resource_timestamps->>'#{column_name}')::timestamp - ) - ) - } - elsif full_row_version_attr == :resource_version - %{ - , resource_versions = #{table_name}.resource_versions || ('{"#{column_name}": "' || EXCLUDED.resource_versions_max::integer || '"}')::jsonb - , resource_versions_max = greatest(#{table_name}.resource_versions_max::integer, EXCLUDED.resource_versions_max::integer) - WHERE EXCLUDED.resource_versions_max IS NULL OR ( - (#{table_name}.resource_version IS NULL OR EXCLUDED.resource_versions_max > #{table_name}.resource_version) AND ( - (#{table_name}.resource_versions->>'#{column_name}')::integer IS NULL OR - EXCLUDED.resource_versions_max::integer > (#{table_name}.resource_versions->>'#{column_name}')::integer - ) - ) - } - end - end - - # Builds update clause for one column identified by the passed key - # - # @param key [Symbol] key that is column name - # @return [String] SQL clause for updating one column - def build_update_set_cols(key) - "#{quote_column_name(key)} = updated_values.#{quote_column_name(key)}" + included do + include SqlHelperUpsert + include SqlHelperUpdate end # Returns quoted column name @@ -184,85 +23,6 @@ def get_connection ActiveRecord::Base.connection end - # Build batch update query - # - # @param all_attribute_keys [Array] Array of all columns we will be saving into each table row - # @param hashes [Array] data used for building a batch update sql query - def build_update_query(all_attribute_keys, hashes) - _log.debug("Building update query for #{inventory_collection} of size #{inventory_collection.size}...") - # Cache the connection for the batch - connection = get_connection - - # We want to ignore create timestamps when updating - all_attribute_keys_array = all_attribute_keys.to_a.delete_if { |x| %i(created_at created_on).include?(x) } - all_attribute_keys_array << :id - - values = hashes.map! do |hash| - "(#{all_attribute_keys_array.map { |x| quote(connection, hash[x], x, true) }.join(",")})" - end.join(",") - - update_query = %{ - UPDATE #{table_name} - SET - #{all_attribute_keys_array.map { |key| build_update_set_cols(key) }.join(",")} - } - - if supports_remote_data_timestamp?(all_attribute_keys) - # Full row update will reset the partial update timestamps - update_query += %{ - , resource_timestamps = '{}', resource_timestamps_max = NULL - } - elsif supports_remote_data_version?(all_attribute_keys) - update_query += %{ - , resource_versions = '{}', resource_versions_max = NULL - } - end - - update_query += %{ - FROM ( - VALUES - #{values} - ) AS updated_values (#{all_attribute_keys_array.map { |x| quote_column_name(x) }.join(",")}) - WHERE updated_values.id = #{table_name}.id - } - - # TODO(lsmola) do we want to exclude the ems_id from the UPDATE clause? Otherwise it might be difficult to change - # the ems_id as a cross manager migration, since ems_id should be there as part of the insert. The attempt of - # changing ems_id could lead to putting it back by a refresh. - - # This conditional will avoid rewriting new data by old data. But we want it only when remote_data_timestamp is a - # part of the data, since for the fake records, we just want to update ems_ref. - if supports_remote_data_timestamp?(all_attribute_keys) - update_query += %{ - AND ( - updated_values.resource_timestamp IS NULL OR ( - (#{table_name}.resource_timestamp IS NULL OR updated_values.resource_timestamp > #{table_name}.resource_timestamp) AND - (#{table_name}.resource_timestamps_max IS NULL OR updated_values.resource_timestamp >= #{table_name}.resource_timestamps_max) - ) - ) - } - elsif supports_remote_data_version?(all_attribute_keys) - update_query += %{ - AND ( - updated_values.resource_version IS NULL OR ( - (#{table_name}.resource_version IS NULL OR updated_values.resource_version > #{table_name}.resource_version) AND - (#{table_name}.resource_versions_max IS NULL OR updated_values.resource_version >= #{table_name}.resource_versions_max) - ) - ) - } - end - - if inventory_collection.parallel_safe? - update_query += %{ - RETURNING updated_values.#{quote_column_name("id")}, #{unique_index_columns.map { |x| "updated_values.#{quote_column_name(x)}" }.join(",")} - } - end - - _log.debug("Building update query for #{inventory_collection} of size #{inventory_collection.size}...Complete") - - update_query - end - # Builds a multiselection conditions like (table1.a = a1 AND table2.b = b1) OR (table1.a = a2 AND table2.b = b2) # # @param hashes [Array] data we want to use for the query diff --git a/app/models/manager_refresh/save_collection/saver/sql_helper_update.rb b/app/models/manager_refresh/save_collection/saver/sql_helper_update.rb new file mode 100644 index 00000000000..3bbd51e887a --- /dev/null +++ b/app/models/manager_refresh/save_collection/saver/sql_helper_update.rb @@ -0,0 +1,112 @@ +module ManagerRefresh::SaveCollection + module Saver + module SqlHelperUpdate + # Builds update clause for one column identified by the passed key + # + # @param key [Symbol] key that is column name + # @return [String] SQL clause for updating one column + def build_update_set_cols(key) + "#{quote_column_name(key)} = updated_values.#{quote_column_name(key)}" + end + + # Build batch update query + # + # @param all_attribute_keys [Array] Array of all columns we will be saving into each table row + # @param hashes [Array] data used for building a batch update sql query + def build_update_query(all_attribute_keys, hashes) + _log.debug("Building update query for #{inventory_collection} of size #{inventory_collection.size}...") + # Cache the connection for the batch + connection = get_connection + + # We want to ignore create timestamps when updating + all_attribute_keys_array = all_attribute_keys.to_a.delete_if { |x| %i(created_at created_on).include?(x) } + all_attribute_keys_array << :id + + update_query = update_query_beginning(all_attribute_keys_array) + update_query += update_query_reset_version_columns(all_attribute_keys) + update_query += update_query_from_values(hashes, all_attribute_keys_array, connection) + update_query += update_query_version_conditions(all_attribute_keys) + update_query += update_query_returning + + _log.debug("Building update query for #{inventory_collection} of size #{inventory_collection.size}...Complete") + + update_query + end + + private + + def update_query_beginning(all_attribute_keys_array) + <<-SQL + UPDATE #{table_name} + SET + #{all_attribute_keys_array.map { |key| build_update_set_cols(key) }.join(",")} + SQL + end + + def update_query_reset_version_columns(all_attribute_keys) + if supports_remote_data_timestamp?(all_attribute_keys) + # Full row update will reset the partial update timestamps + <<-SQL + , resource_timestamps = '{}', resource_timestamps_max = NULL + SQL + elsif supports_remote_data_version?(all_attribute_keys) + <<-SQL + , resource_versions = '{}', resource_versions_max = NULL + SQL + else + "" + end + end + + def update_query_from_values(hashes, all_attribute_keys_array, connection) + values = hashes.map! do |hash| + "(#{all_attribute_keys_array.map { |x| quote(connection, hash[x], x, true) }.join(",")})" + end.join(",") + + <<-SQL + FROM ( + VALUES + #{values} + ) AS updated_values (#{all_attribute_keys_array.map { |x| quote_column_name(x) }.join(",")}) + WHERE updated_values.id = #{table_name}.id + SQL + end + + def update_query_version_conditions(all_attribute_keys) + # This conditional will avoid rewriting new data by old data. But we want it only when remote_data_timestamp is + # a part of the data, since for the fake records, we just want to update ems_ref. + if supports_remote_data_timestamp?(all_attribute_keys) + <<-SQL + AND ( + updated_values.resource_timestamp IS NULL OR ( + (#{table_name}.resource_timestamp IS NULL OR updated_values.resource_timestamp > #{table_name}.resource_timestamp) AND + (#{table_name}.resource_timestamps_max IS NULL OR updated_values.resource_timestamp >= #{table_name}.resource_timestamps_max) + ) + ) + SQL + elsif supports_remote_data_version?(all_attribute_keys) + <<-SQL + AND ( + updated_values.resource_version IS NULL OR ( + (#{table_name}.resource_version IS NULL OR updated_values.resource_version > #{table_name}.resource_version) AND + (#{table_name}.resource_versions_max IS NULL OR updated_values.resource_version >= #{table_name}.resource_versions_max) + ) + ) + SQL + else + "" + end + end + + def update_query_returning + if inventory_collection.parallel_safe? + <<-SQL + RETURNING updated_values.#{quote_column_name("id")}, #{unique_index_columns.map { |x| "updated_values.#{quote_column_name(x)}" }.join(",")} + SQL + else + "" + end + end + end + end +end diff --git a/app/models/manager_refresh/save_collection/saver/sql_helper_upsert.rb b/app/models/manager_refresh/save_collection/saver/sql_helper_upsert.rb new file mode 100644 index 00000000000..7631f07de59 --- /dev/null +++ b/app/models/manager_refresh/save_collection/saver/sql_helper_upsert.rb @@ -0,0 +1,182 @@ +module ManagerRefresh::SaveCollection + module Saver + module SqlHelperUpsert + # Builds ON CONFLICT UPDATE updating branch for one column identified by the passed key + # + # @param key [Symbol] key that is column name + # @return [String] SQL clause for upserting one column + def build_insert_set_cols(key) + "#{quote_column_name(key)} = EXCLUDED.#{quote_column_name(key)}" + end + + # @param all_attribute_keys [Array] Array of all columns we will be saving into each table row + # @param hashes [Array] data used for building a batch insert sql query + # @param mode [Symbol] Mode for saving, allowed values are [:full, :partial], :full is when we save all + # columns of a row, :partial is when we save only few columns, so a partial row. + # @param on_conflict [Symbol, NilClass] defines behavior on conflict with unique index constraint, allowed values + # are :do_update, :do_nothing, nil + def build_insert_query(all_attribute_keys, hashes, on_conflict: nil, mode:, column_name: nil) + _log.debug("Building insert query for #{inventory_collection} of size #{inventory_collection.size}...") + + # Cache the connection for the batch + connection = get_connection + # Ignore versioning columns that are set separately + ignore_cols = mode == :partial ? [:resource_timestamp, :resource_version] : [] + # Make sure we don't send a primary_key for INSERT in any form, it could break PG sequencer + all_attribute_keys_array = all_attribute_keys.to_a - [primary_key.to_s, primary_key.to_sym] - ignore_cols + + insert_query = insert_query_insert_values(hashes, all_attribute_keys_array, connection) + insert_query += insert_query_on_conflict_behavior(all_attribute_keys, on_conflict, mode, ignore_cols, column_name) + insert_query += insert_query_returning + + _log.debug("Building insert query for #{inventory_collection} of size #{inventory_collection.size}...Complete") + + insert_query + end + + private + + def insert_query_insert_values(hashes, all_attribute_keys_array, connection) + values = hashes.map do |hash| + "(#{all_attribute_keys_array.map { |x| quote(connection, hash[x], x) }.join(",")})" + end.join(",") + col_names = all_attribute_keys_array.map { |x| quote_column_name(x) }.join(",") + + <<-SQL + INSERT INTO #{table_name} (#{col_names}) + VALUES + #{values} + SQL + end + + def insert_query_on_conflict_behavior(all_attribute_keys, on_conflict, mode, ignore_cols, column_name) + return "" unless inventory_collection.parallel_safe? + + insert_query_on_conflict = insert_query_on_conflict_do(on_conflict) + if on_conflict == :do_update + insert_query_on_conflict += insert_query_on_conflict_update(all_attribute_keys, mode, ignore_cols, column_name) + end + insert_query_on_conflict + end + + def insert_query_on_conflict_do(on_conflict) + if on_conflict == :do_nothing + <<-SQL + ON CONFLICT DO NOTHING + SQL + elsif on_conflict == :do_update + index_where_condition = unique_index_for(unique_index_keys).where + where_to_sql = index_where_condition ? "WHERE #{index_where_condition}" : "" + + <<-SQL + ON CONFLICT (#{unique_index_columns.map { |x| quote_column_name(x) }.join(",")}) #{where_to_sql} + DO + UPDATE + SQL + end + end + + def insert_query_on_conflict_update(all_attribute_keys, mode, ignore_cols, column_name) + if mode == :partial + ignore_cols += [:resource_timestamps, :resource_timestamps_max, :resource_versions, :resource_versions_max] + end + ignore_cols += [:created_on, :created_at] # Lets not change created for the update clause + + # If there is not version attribute, the update part will be ignored below + version_attribute = if supports_remote_data_timestamp?(all_attribute_keys) + :resource_timestamp + elsif supports_remote_data_version?(all_attribute_keys) + :resource_version + end + + # TODO(lsmola) should we add :deleted => false to the update clause? That should handle a reconnect, without a + # a need to list :deleted anywhere in the parser. We just need to check that a model has the :deleted attribute + query = <<-SQL + SET #{(all_attribute_keys - ignore_cols).map { |key| build_insert_set_cols(key) }.join(", ")} + SQL + + # This conditional will make sure we are avoiding rewriting new data by old data. But we want it only when + # remote_data_timestamp is a part of the data. + query += insert_query_on_conflict_update_mode(mode, version_attribute, column_name) if version_attribute + query + end + + def insert_query_on_conflict_update_mode(mode, version_attribute, column_name) + if mode == :full + full_update_condition(version_attribute) + elsif mode == :partial + raise "Column name must be provided" unless column_name + partial_update_condition(version_attribute, column_name) + end + end + + def full_update_condition(attr_full) + attr_partial = attr_full.to_s.pluralize # Changes resource_version/timestamp to resource_versions/timestamps + attr_partial_max = "#{attr_partial}_max" + + # Quote the column names + attr_full = quote_column_name(attr_full) + attr_partial = quote_column_name(attr_partial) + attr_partial_max = quote_column_name(attr_partial_max) + + <<-SQL + , #{attr_partial} = '{}', #{attr_partial_max} = NULL + + WHERE EXCLUDED.#{attr_full} IS NULL OR ( + (#{table_name}.#{attr_full} IS NULL OR EXCLUDED.#{attr_full} > #{table_name}.#{attr_full}) AND + (#{table_name}.#{attr_partial_max} IS NULL OR EXCLUDED.#{attr_full} >= #{table_name}.#{attr_partial_max}) + ) + SQL + end + + def partial_update_condition(attr_full, column_name) + # column_name = quote_column_name(column_name) + attr_partial = attr_full.to_s.pluralize # Changes resource_version/timestamp to resource_versions/timestamps + attr_partial_max = "#{attr_partial}_max" + cast = if attr_full == :resource_timestamp + "timestamp" + elsif attr_full == :resource_version + "integer" + end + + # Quote the column names + attr_full = quote_column_name(attr_full) + attr_partial = quote_column_name(attr_partial) + attr_partial_max = quote_column_name(attr_partial_max) + # TODO(lsmola) how to quote 'column_name', seems like basic column quoting doesn't work on jsob + + <<-SQL + , #{attr_partial} = #{table_name}.#{attr_partial} || ('{"#{column_name}": "' || EXCLUDED.#{attr_partial_max}::#{cast} || '"}')::jsonb + , #{attr_partial_max} = greatest(#{table_name}.#{attr_partial_max}::#{cast}, EXCLUDED.#{attr_partial_max}::#{cast}) + WHERE EXCLUDED.#{attr_partial_max} IS NULL OR ( + (#{table_name}.#{attr_full} IS NULL OR EXCLUDED.#{attr_partial_max} > #{table_name}.#{attr_full}) AND ( + (#{table_name}.#{attr_partial}->>'#{column_name}')::#{cast} IS NULL OR + EXCLUDED.#{attr_partial_max}::#{cast} > (#{table_name}.#{attr_partial}->>'#{column_name}')::#{cast} + ) + ) + SQL + end + + def insert_query_returning + <<-SQL + RETURNING "id",#{unique_index_columns.map { |x| quote_column_name(x) }.join(",")} + #{insert_query_returning_timestamps} + SQL + end + + def insert_query_returning_timestamps + if inventory_collection.parallel_safe? + # For upsert, we'll return also created and updated timestamps, so we can recognize what was created and what + # updated + if inventory_collection.internal_timestamp_columns.present? + <<-SQL + , #{inventory_collection.internal_timestamp_columns.join(",")} + SQL + end + else + "" + end + end + end + end +end From 19eccb744597ff13e2978ddff5efe4a1280f48d1 Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Wed, 15 Aug 2018 18:16:11 +0200 Subject: [PATCH 24/28] Fix rubocop issues Fix rubocop issues --- .../inventory_collection/index/type/skeletal.rb | 2 +- .../save_collection/saver/concurrent_safe_batch.rb | 6 +++--- .../save_collection/saver/sql_helper_upsert.rb | 3 ++- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb b/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb index 5b68bf7ad6a..933fc0a3acf 100644 --- a/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb +++ b/app/models/manager_refresh/inventory_collection/index/type/skeletal.rb @@ -37,7 +37,7 @@ def delete(index_value) index.delete(index_value) end - # Takes value from primary_index and puts it to skeletal index + # Takes value from primary_index and inserts it to skeletal index # # @param index_value [String] a index_value of the InventoryObject we search for # @return [InventoryObject|nil] Returns found value or nil diff --git a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb index daabc5ffc61..9e984aebe03 100644 --- a/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb +++ b/app/models/manager_refresh/save_collection/saver/concurrent_safe_batch.rb @@ -336,7 +336,7 @@ def skeletonize_ignored_records!(hash, result, all_unique_columns: false) # saved are not being sent here. We have only rows that are new, but become old as we send the query (so other # parallel process saved the data in the meantime). Or if some attributes are newer than the whole row # being sent. - hash.keys.each do |db_index| + hash.each_key do |db_index| inventory_collection.skeletal_primary_index.skeletonize_primary_index(hash[db_index].manager_uuid) end end @@ -429,8 +429,8 @@ def create_or_update_partial_records(all_attribute_keys) batch, :on_conflict => :do_update, :column_name => column_name) - result.each do |result| - results[result["id"]] = result + result.each do |res| + results[res["id"]] = res end end end diff --git a/app/models/manager_refresh/save_collection/saver/sql_helper_upsert.rb b/app/models/manager_refresh/save_collection/saver/sql_helper_upsert.rb index 7631f07de59..345bfd8ceed 100644 --- a/app/models/manager_refresh/save_collection/saver/sql_helper_upsert.rb +++ b/app/models/manager_refresh/save_collection/saver/sql_helper_upsert.rb @@ -37,9 +37,10 @@ def build_insert_query(all_attribute_keys, hashes, on_conflict: nil, mode:, colu private def insert_query_insert_values(hashes, all_attribute_keys_array, connection) - values = hashes.map do |hash| + values = hashes.map do |hash| "(#{all_attribute_keys_array.map { |x| quote(connection, hash[x], x) }.join(",")})" end.join(",") + col_names = all_attribute_keys_array.map { |x| quote_column_name(x) }.join(",") <<-SQL From 2ad8435aadc1b10eaabe461f0851176df4f147ea Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Thu, 16 Aug 2018 11:27:32 +0200 Subject: [PATCH 25/28] Inventory collection logging is bad Inventory collection logging is bad --- app/models/manager_refresh/inventory_collection.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/models/manager_refresh/inventory_collection.rb b/app/models/manager_refresh/inventory_collection.rb index e86b8576420..0b69d7b0ed4 100644 --- a/app/models/manager_refresh/inventory_collection.rb +++ b/app/models/manager_refresh/inventory_collection.rb @@ -657,7 +657,7 @@ def unique_indexes @unique_indexes_cache = model_class.connection.indexes(model_class.table_name).select(&:unique) if @unique_indexes_cache.blank? - raise "#{inventory_collection} and its table #{model_class.table_name} must have a unique index defined, to"\ + raise "#{self} and its table #{model_class.table_name} must have a unique index defined, to"\ " be able to use saver_strategy :concurrent_safe or :concurrent_safe_batch." end @@ -677,7 +677,7 @@ def unique_index_for(keys) uniq_key_candidates = unique_indexes.each_with_object([]) { |i, obj| obj << i if (keys - i.columns.map(&:to_sym)).empty? } if @unique_indexes_cache.blank? - raise "#{inventory_collection} and its table #{model_class.table_name} must have a unique index defined "\ + raise "#{self} and its table #{model_class.table_name} must have a unique index defined "\ "covering columns #{keys} to be able to use saver_strategy :concurrent_safe or :concurrent_safe_batch." end From 67b03eeb57e52baf9a2afff825bee01ceb326f7f Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Thu, 16 Aug 2018 11:27:56 +0200 Subject: [PATCH 26/28] Quote every column name plus cleanup of update query Quote every column name plus cleanup of update query --- .../save_collection/saver/base.rb | 4 +- .../saver/sql_helper_update.rb | 60 +++++++++++-------- .../saver/sql_helper_upsert.rb | 22 +++---- 3 files changed, 48 insertions(+), 38 deletions(-) diff --git a/app/models/manager_refresh/save_collection/saver/base.rb b/app/models/manager_refresh/save_collection/saver/base.rb index 74edc6c3fc9..a072c0139b7 100644 --- a/app/models/manager_refresh/save_collection/saver/base.rb +++ b/app/models/manager_refresh/save_collection/saver/base.rb @@ -14,6 +14,7 @@ def initialize(inventory_collection) # Private attrs @model_class = inventory_collection.model_class @table_name = @model_class.table_name + @q_table_name = get_connection.quote_table_name(@table_name) @primary_key = @model_class.primary_key @arel_primary_key = @model_class.arel_attribute(@primary_key) @unique_index_keys = inventory_collection.unique_index_keys @@ -117,7 +118,8 @@ def transform_to_hash!(all_attribute_keys, hash) attr_reader :unique_index_keys, :unique_index_keys_to_s, :select_keys, :unique_db_primary_keys, :unique_db_indexes, :primary_key, :arel_primary_key, :record_key_method, :pure_sql_records_fetching, :select_keys_indexes, - :batch_size, :batch_size_for_persisting, :model_class, :serializable_keys, :deserializable_keys, :pg_types, :table_name + :batch_size, :batch_size_for_persisting, :model_class, :serializable_keys, :deserializable_keys, :pg_types, :table_name, + :q_table_name # Saves the InventoryCollection # diff --git a/app/models/manager_refresh/save_collection/saver/sql_helper_update.rb b/app/models/manager_refresh/save_collection/saver/sql_helper_update.rb index 3bbd51e887a..7362b60052d 100644 --- a/app/models/manager_refresh/save_collection/saver/sql_helper_update.rb +++ b/app/models/manager_refresh/save_collection/saver/sql_helper_update.rb @@ -22,10 +22,17 @@ def build_update_query(all_attribute_keys, hashes) all_attribute_keys_array = all_attribute_keys.to_a.delete_if { |x| %i(created_at created_on).include?(x) } all_attribute_keys_array << :id + # If there is not version attribute, the version conditions will be ignored + version_attribute = if supports_remote_data_timestamp?(all_attribute_keys) + :resource_timestamp + elsif supports_remote_data_version?(all_attribute_keys) + :resource_version + end + update_query = update_query_beginning(all_attribute_keys_array) - update_query += update_query_reset_version_columns(all_attribute_keys) + update_query += update_query_reset_version_columns(version_attribute) update_query += update_query_from_values(hashes, all_attribute_keys_array, connection) - update_query += update_query_version_conditions(all_attribute_keys) + update_query += update_query_version_conditions(version_attribute) update_query += update_query_returning _log.debug("Building update query for #{inventory_collection} of size #{inventory_collection.size}...Complete") @@ -43,15 +50,18 @@ def update_query_beginning(all_attribute_keys_array) SQL end - def update_query_reset_version_columns(all_attribute_keys) - if supports_remote_data_timestamp?(all_attribute_keys) + def update_query_reset_version_columns(version_attribute) + if version_attribute + attr_partial = version_attribute.to_s.pluralize # Changes resource_version/timestamp to resource_versions/timestamps + attr_partial_max = "#{attr_partial}_max" + + # Quote the column names + attr_partial = quote_column_name(attr_partial) + attr_partial_max = quote_column_name(attr_partial_max) + # Full row update will reset the partial update timestamps <<-SQL - , resource_timestamps = '{}', resource_timestamps_max = NULL - SQL - elsif supports_remote_data_version?(all_attribute_keys) - <<-SQL - , resource_versions = '{}', resource_versions_max = NULL + , #{attr_partial} = '{}', #{attr_partial_max} = NULL SQL else "" @@ -68,28 +78,26 @@ def update_query_from_values(hashes, all_attribute_keys_array, connection) VALUES #{values} ) AS updated_values (#{all_attribute_keys_array.map { |x| quote_column_name(x) }.join(",")}) - WHERE updated_values.id = #{table_name}.id + WHERE updated_values.id = #{q_table_name}.id SQL end - def update_query_version_conditions(all_attribute_keys) - # This conditional will avoid rewriting new data by old data. But we want it only when remote_data_timestamp is - # a part of the data, since for the fake records, we just want to update ems_ref. - if supports_remote_data_timestamp?(all_attribute_keys) - <<-SQL - AND ( - updated_values.resource_timestamp IS NULL OR ( - (#{table_name}.resource_timestamp IS NULL OR updated_values.resource_timestamp > #{table_name}.resource_timestamp) AND - (#{table_name}.resource_timestamps_max IS NULL OR updated_values.resource_timestamp >= #{table_name}.resource_timestamps_max) - ) - ) - SQL - elsif supports_remote_data_version?(all_attribute_keys) + def update_query_version_conditions(version_attribute) + if version_attribute + # This conditional will avoid rewriting new data by old data. But we want it only when version_attribute is + # a part of the data, since for the fake records, we just want to update ems_ref. + attr_partial = version_attribute.to_s.pluralize # Changes resource_version/timestamp to resource_versions/timestamps + attr_partial_max = "#{attr_partial}_max" + + # Quote the column names + attr_full = quote_column_name(version_attribute) + attr_partial_max = quote_column_name(attr_partial_max) + <<-SQL AND ( - updated_values.resource_version IS NULL OR ( - (#{table_name}.resource_version IS NULL OR updated_values.resource_version > #{table_name}.resource_version) AND - (#{table_name}.resource_versions_max IS NULL OR updated_values.resource_version >= #{table_name}.resource_versions_max) + updated_values.#{attr_full} IS NULL OR ( + (#{q_table_name}.#{attr_full} IS NULL OR updated_values.#{attr_full} > #{q_table_name}.#{attr_full}) AND + (#{q_table_name}.#{attr_partial_max} IS NULL OR updated_values.#{attr_full} >= #{q_table_name}.#{attr_partial_max}) ) ) SQL diff --git a/app/models/manager_refresh/save_collection/saver/sql_helper_upsert.rb b/app/models/manager_refresh/save_collection/saver/sql_helper_upsert.rb index 345bfd8ceed..a40b251e139 100644 --- a/app/models/manager_refresh/save_collection/saver/sql_helper_upsert.rb +++ b/app/models/manager_refresh/save_collection/saver/sql_helper_upsert.rb @@ -44,7 +44,7 @@ def insert_query_insert_values(hashes, all_attribute_keys_array, connection) col_names = all_attribute_keys_array.map { |x| quote_column_name(x) }.join(",") <<-SQL - INSERT INTO #{table_name} (#{col_names}) + INSERT INTO #{q_table_name} (#{col_names}) VALUES #{values} SQL @@ -124,14 +124,13 @@ def full_update_condition(attr_full) , #{attr_partial} = '{}', #{attr_partial_max} = NULL WHERE EXCLUDED.#{attr_full} IS NULL OR ( - (#{table_name}.#{attr_full} IS NULL OR EXCLUDED.#{attr_full} > #{table_name}.#{attr_full}) AND - (#{table_name}.#{attr_partial_max} IS NULL OR EXCLUDED.#{attr_full} >= #{table_name}.#{attr_partial_max}) + (#{q_table_name}.#{attr_full} IS NULL OR EXCLUDED.#{attr_full} > #{q_table_name}.#{attr_full}) AND + (#{q_table_name}.#{attr_partial_max} IS NULL OR EXCLUDED.#{attr_full} >= #{q_table_name}.#{attr_partial_max}) ) SQL end def partial_update_condition(attr_full, column_name) - # column_name = quote_column_name(column_name) attr_partial = attr_full.to_s.pluralize # Changes resource_version/timestamp to resource_versions/timestamps attr_partial_max = "#{attr_partial}_max" cast = if attr_full == :resource_timestamp @@ -144,15 +143,16 @@ def partial_update_condition(attr_full, column_name) attr_full = quote_column_name(attr_full) attr_partial = quote_column_name(attr_partial) attr_partial_max = quote_column_name(attr_partial_max) - # TODO(lsmola) how to quote 'column_name', seems like basic column quoting doesn't work on jsob + column_name = get_connection.quote_string(column_name.to_s) + q_table_name = get_connection.quote_table_name(table_name) <<-SQL - , #{attr_partial} = #{table_name}.#{attr_partial} || ('{"#{column_name}": "' || EXCLUDED.#{attr_partial_max}::#{cast} || '"}')::jsonb - , #{attr_partial_max} = greatest(#{table_name}.#{attr_partial_max}::#{cast}, EXCLUDED.#{attr_partial_max}::#{cast}) + , #{attr_partial} = #{q_table_name}.#{attr_partial} || ('{"#{column_name}": "' || EXCLUDED.#{attr_partial_max}::#{cast} || '"}')::jsonb + , #{attr_partial_max} = greatest(#{q_table_name}.#{attr_partial_max}::#{cast}, EXCLUDED.#{attr_partial_max}::#{cast}) WHERE EXCLUDED.#{attr_partial_max} IS NULL OR ( - (#{table_name}.#{attr_full} IS NULL OR EXCLUDED.#{attr_partial_max} > #{table_name}.#{attr_full}) AND ( - (#{table_name}.#{attr_partial}->>'#{column_name}')::#{cast} IS NULL OR - EXCLUDED.#{attr_partial_max}::#{cast} > (#{table_name}.#{attr_partial}->>'#{column_name}')::#{cast} + (#{q_table_name}.#{attr_full} IS NULL OR EXCLUDED.#{attr_partial_max} > #{q_table_name}.#{attr_full}) AND ( + (#{q_table_name}.#{attr_partial}->>'#{column_name}')::#{cast} IS NULL OR + EXCLUDED.#{attr_partial_max}::#{cast} > (#{q_table_name}.#{attr_partial}->>'#{column_name}')::#{cast} ) ) SQL @@ -171,7 +171,7 @@ def insert_query_returning_timestamps # updated if inventory_collection.internal_timestamp_columns.present? <<-SQL - , #{inventory_collection.internal_timestamp_columns.join(",")} + , #{inventory_collection.internal_timestamp_columns.map { |x| quote_column_name(x) }.join(",")} SQL end else From c36d4bd022032f8639e3a6b0d983ef813819b94f Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Thu, 16 Aug 2018 14:04:16 +0200 Subject: [PATCH 27/28] Store integer in resource_versions jsonb Store integer in resource_versions jsonb --- .../save_collection/saver/sql_helper_upsert.rb | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/app/models/manager_refresh/save_collection/saver/sql_helper_upsert.rb b/app/models/manager_refresh/save_collection/saver/sql_helper_upsert.rb index a40b251e139..f940219d1f6 100644 --- a/app/models/manager_refresh/save_collection/saver/sql_helper_upsert.rb +++ b/app/models/manager_refresh/save_collection/saver/sql_helper_upsert.rb @@ -147,7 +147,7 @@ def partial_update_condition(attr_full, column_name) q_table_name = get_connection.quote_table_name(table_name) <<-SQL - , #{attr_partial} = #{q_table_name}.#{attr_partial} || ('{"#{column_name}": "' || EXCLUDED.#{attr_partial_max}::#{cast} || '"}')::jsonb + #{insert_query_set_jsonb_version(cast, attr_partial, attr_partial_max, column_name)} , #{attr_partial_max} = greatest(#{q_table_name}.#{attr_partial_max}::#{cast}, EXCLUDED.#{attr_partial_max}::#{cast}) WHERE EXCLUDED.#{attr_partial_max} IS NULL OR ( (#{q_table_name}.#{attr_full} IS NULL OR EXCLUDED.#{attr_partial_max} > #{q_table_name}.#{attr_full}) AND ( @@ -158,6 +158,19 @@ def partial_update_condition(attr_full, column_name) SQL end + def insert_query_set_jsonb_version(cast, attr_partial, attr_partial_max, column_name) + if cast == "integer" + # If we have integer value, we don't want to encapsulate the value in "" + <<-SQL + , #{attr_partial} = #{q_table_name}.#{attr_partial} || ('{"#{column_name}": ' || EXCLUDED.#{attr_partial_max}::#{cast} || '}')::jsonb + SQL + else + <<-SQL + , #{attr_partial} = #{q_table_name}.#{attr_partial} || ('{"#{column_name}": "' || EXCLUDED.#{attr_partial_max}::#{cast} || '"}')::jsonb + SQL + end + end + def insert_query_returning <<-SQL RETURNING "id",#{unique_index_columns.map { |x| quote_column_name(x) }.join(",")} From 4f3c6dccd29a51e31e101cf8172c945596b5e894 Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Wed, 22 Aug 2018 16:50:58 +0200 Subject: [PATCH 28/28] Add timestamp conditions only for parallel safe strategy Add timestamp conditions only for parallel safe strategy --- .../save_collection/saver/sql_helper_update.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/models/manager_refresh/save_collection/saver/sql_helper_update.rb b/app/models/manager_refresh/save_collection/saver/sql_helper_update.rb index 7362b60052d..9b62bec87da 100644 --- a/app/models/manager_refresh/save_collection/saver/sql_helper_update.rb +++ b/app/models/manager_refresh/save_collection/saver/sql_helper_update.rb @@ -23,9 +23,9 @@ def build_update_query(all_attribute_keys, hashes) all_attribute_keys_array << :id # If there is not version attribute, the version conditions will be ignored - version_attribute = if supports_remote_data_timestamp?(all_attribute_keys) + version_attribute = if inventory_collection.parallel_safe? && supports_remote_data_timestamp?(all_attribute_keys) :resource_timestamp - elsif supports_remote_data_version?(all_attribute_keys) + elsif inventory_collection.parallel_safe? && supports_remote_data_version?(all_attribute_keys) :resource_version end