From 3974045f208ecd40bf228793d1588e66e2d96e47 Mon Sep 17 00:00:00 2001 From: Julien Portalier Date: Thu, 16 Nov 2023 09:44:24 +0100 Subject: [PATCH] Fix: compatibility with elasticsearch 2 (#916) Fixes a number of deprecated features in elasticsearch 1 that got removed in elasticsearch 2: - Replaces multi-field type for a string with a nested field (deprecated in elasticsearch 1). - Avoids the `.` in field definitions (invalid in elasticsearch 2). - Removes the path definition that isn't needed (deprecated in elasticsearch 2). - Renames `default_index` as `default` (deprecated in elasticsearch 1). - Use size instead of limit filter (deprecated in elasticsearch 1). - Uses aggregations instead of facet searches (deprecated in elasticsearch 2). These changes are compatible with elasticsearch v1.7 and v2.4 and the [elasticsearch migration plugin](https://github.com/elastic/elasticsearch-migration/tree/1.x) returns a full green status. To test against elasticsearch 2, you must tweak the docker compose service. For some reason the `index.max_result_window` setting must be increased from the default of 10000. For example: ```yaml services: elasticsearch: image: elasticsearch:2.4-alpine command: elasticsearch -Dindex.max_result_window=1000000 ``` --- app/models/elastic_search/query_helper.rb | 4 +- app/models/field/elasticsearch_concern.rb | 5 +- .../format_implementation.rb | 7 ++- .../field/identifier_fields/luhn_field.rb | 5 +- app/models/search.rb | 23 ++++--- app/models/search_base.rb | 62 +++++++++++-------- app/models/site/index_utils.rb | 2 +- .../models/search/fred_api_concern.rb | 4 +- 8 files changed, 63 insertions(+), 49 deletions(-) diff --git a/app/models/elastic_search/query_helper.rb b/app/models/elastic_search/query_helper.rb index 89a93a3f5..d87bfcf66 100644 --- a/app/models/elastic_search/query_helper.rb +++ b/app/models/elastic_search/query_helper.rb @@ -44,7 +44,7 @@ def full_text_search(text, search_base, collection, fields = nil) field = collection.fields.find { |x| x.code == key || x.name == key} next unless field - key = field.es_code + key = "properties.#{field.es_code}" op, value = SearchParser.get_op_and_val value # Check if the user is searching a label instead of the code @@ -83,7 +83,7 @@ def search_value_ids(text, collection, fields_to_search = nil) regex = /#{Regexp.escape text}/i fields_to_search.each do |field| option_id = search_value_id field, regex - codes[field.es_code] = option_id if option_id + codes["properties.#{field.es_code}"] = option_id if option_id end codes end diff --git a/app/models/field/elasticsearch_concern.rb b/app/models/field/elasticsearch_concern.rb index a09268f88..bd123d18f 100644 --- a/app/models/field/elasticsearch_concern.rb +++ b/app/models/field/elasticsearch_concern.rb @@ -15,10 +15,9 @@ def index_mapping { type: :date } when kind == 'text' { - type: :multi_field, + type: :string, index: :not_analyzed, fields: { - es_code => { type: :string, index: :not_analyzed }, - "#{es_code}.downcase" => { type: :string, path: :just_name, index: :analyzed, analyzer: :downcase }, + downcase: { type: :string, index: :analyzed, analyzer: :downcase }, }, } else diff --git a/app/models/field/identifier_fields/format_implementation.rb b/app/models/field/identifier_fields/format_implementation.rb index 1f6bb276a..cc292bec0 100644 --- a/app/models/field/identifier_fields/format_implementation.rb +++ b/app/models/field/identifier_fields/format_implementation.rb @@ -35,10 +35,13 @@ def initialize(field) end def valid_value?(value, existing_site_id) - if existing_values[value] + if existing = existing_values[value] # If the value already exists in the collection, the value will be invalid # Unless this is an update to an update an existing site with the same value - raise "The value already exists in the collection" unless (existing_site_id && (existing_values[value]["id"].to_s == existing_site_id.to_s)) end + unless existing_site_id && (existing["id"].to_s == existing_site_id.to_s) + raise "The value already exists in the collection" + end + end true end diff --git a/app/models/field/identifier_fields/luhn_field.rb b/app/models/field/identifier_fields/luhn_field.rb index 43ecb055e..52c1f7992 100644 --- a/app/models/field/identifier_fields/luhn_field.rb +++ b/app/models/field/identifier_fields/luhn_field.rb @@ -61,10 +61,9 @@ def compute_luhn_verifier(str) def largest_existing_luhn_value_in_this_field(collection) # Find largest existing value in ES - field_es_code = "properties.#{@field.es_code}" search = collection.new_search - search.field_exists(field_es_code) - search.sort field_es_code, false + search.field_exists(@field.es_code) + search.sort(@field.es_code, false) search.offset(0) search.limit(1) search.show_deleted diff --git a/app/models/search.rb b/app/models/search.rb index 916106573..7b6f3ca44 100644 --- a/app/models/search.rb +++ b/app/models/search.rb @@ -79,13 +79,7 @@ def sort(es_code, ascendent = true) when 'name' sort = 'name.downcase' else - es_code = remove_at_from_code es_code - field = fields.find { |x| x.code == es_code || x.es_code == es_code } - if field && field.kind == 'text' - sort = "#{field.es_code}.downcase" - else - sort = decode(es_code) - end + sort = sort_key(es_code) end ascendent = ascendent ? 'asc' : 'desc' @@ -95,6 +89,17 @@ def sort(es_code, ascendent = true) self end + protected def sort_key(es_code) + es_code = remove_at_from_code es_code + field = fields.find { |x| x.code == es_code || x.es_code == es_code } + + if field && field.kind == 'text' + query_key(field, downcase: true) + else + "properties.#{decode(es_code)}" + end + end + def sort_multiple(sort_list) sort_list.each_pair do |es_code, ascendent| sort(es_code, ascendent) @@ -206,8 +211,8 @@ def histogram_results(field_es_code) results = client.search index: @index_names, type: 'site', body: body histogram = {} - results["facets"]["field_#{field_es_code}_ratings"]["terms"].each do |item| - histogram[item["term"]] = item["count"] unless item["count"] == 0 + results["aggregations"]["field_#{field_es_code}_ratings"]["buckets"].each do |item| + histogram[item["key"]] = item["doc_count"] unless item["doc_count"] == 0 end histogram end diff --git a/app/models/search_base.rb b/app/models/search_base.rb index 3dd175195..fe73f9318 100644 --- a/app/models/search_base.rb +++ b/app/models/search_base.rb @@ -31,7 +31,7 @@ def uuid(uuid) def eq(field, value) if value.blank? - add_filter missing: {field: field.es_code} + add_filter missing: {field: query_key(field)} return self end @@ -48,27 +48,34 @@ def not_eq(field, value) end def query_params(field, value) - query_key = field.es_code validated_value = field.parse_for_query(value, @use_codes_instead_of_es_codes) if field.kind == 'date' - date_field_range(query_key, validated_value) + date_field_range(query_key(field), validated_value) elsif field.kind == 'yes_no' && !validated_value.is_a?(Array) && !Field.yes?(value) - { not: { :term => { query_key => true }}} # so we return false & nil values + { not: { :term => { query_key(field) => true }}} # so we return false & nil values elsif validated_value.is_a? Array - { terms: {query_key => validated_value} } + { terms: {query_key(field) => validated_value} } else - { term: {query_key => validated_value} } + { term: {query_key(field) => validated_value} } end # elsif field.select_kind? - # {term: {query_key => validated_value}} - # add_filter term: {query_key => validated_value} + # {term: {query_key(field) => validated_value}} + # add_filter term: {query_key(field) => validated_value} # else # end end + def query_key(field, downcase: false) + if downcase + "properties.#{field.es_code}.downcase" + else + "properties.#{field.es_code}" + end + end + def date_field_range(key, valid_value) date_from = valid_value[:date_from] date_to = valid_value[:date_to] @@ -78,20 +85,18 @@ def date_field_range(key, valid_value) def under(field, value) if value.blank? - add_filter missing: {field: field.es_code} + add_filter missing: {field: query_key(field)} return self end value = field.descendants_of_in_hierarchy value - query_key = field.es_code - add_filter terms: {query_key => value} + add_filter terms: {query_key(field) => value} self end def starts_with(field, value) validated_value = field.apply_format_query_validation(value, @use_codes_instead_of_es_codes) - query_key = field.es_code - add_prefix key: query_key, value: validated_value + add_prefix key: query_key(field), value: validated_value self end @@ -99,7 +104,7 @@ def starts_with(field, value) class_eval %Q( def #{op}(field, value) validated_value = field.apply_format_query_validation(value, @use_codes_instead_of_es_codes) - add_filter range: {field.es_code => {#{op}: validated_value}} + add_filter range: {query_key(field) => {#{op}: validated_value}} self end ) @@ -147,21 +152,24 @@ def where(properties = {}) # https://github.com/elasticsearch/elasticsearch/issues/1776 # The number I put here is the max integer in Java def histogram_search(field_es_code, filters=nil) - facets_hash = { + name = "field_#{field_es_code}_ratings" + + aggregation = { terms: { - field: field_es_code, + field: "properties.#{field_es_code}", size: 2147483647, all_terms: true, } } if filters.present? - query_params = query_params(filters.keys.first, filters.values.first) - query_hash = {facet_filter: {and: [query_params]} } - facets_hash.merge!(query_hash) + aggregation = { + filter: {and: [query_params(filters.keys.first, filters.values.first)]}, + aggs: { name => aggregation }, + } end - add_facet "field_#{field_es_code}_ratings", facets_hash + add_aggregation "field_#{field_es_code}_ratings", aggregation self end @@ -248,7 +256,7 @@ def radius(lat, lng, meters) end def field_exists(field_code) - add_filter exists: {field: field_code} + add_filter exists: {field: "properties.#{field_code}"} end def require_location @@ -266,7 +274,7 @@ def hierarchy(es_code, value) if value.present? eq field, value else - add_filter not: {exists: {field: es_code}} + add_filter not: {exists: {field: query_key(field)}} end end @@ -306,8 +314,8 @@ def get_body end end - if @facets - body[:facets] = @facets + if @aggregations + body[:aggs] = @aggregations end all_queries = [] @@ -343,9 +351,9 @@ def add_filter(filter) @filters.push filter end - def add_facet(name, value) - @facets ||= {} - @facets[name] = value + def add_aggregation(name, value) + @aggregations ||= {} + @aggregations[name] = value end private diff --git a/app/models/site/index_utils.rb b/app/models/site/index_utils.rb index b2a7b10aa..401d77370 100644 --- a/app/models/site/index_utils.rb +++ b/app/models/site/index_utils.rb @@ -7,7 +7,7 @@ module Site::IndexUtils index: { analysis: { analyzer: { - default_index: { + default: { tokenizer: :standard, filter: [:lowercase, :preserving_asciifolding], type: :custom diff --git a/plugins/fred_api/models/search/fred_api_concern.rb b/plugins/fred_api/models/search/fred_api_concern.rb index 4333a2ce5..87f00f4ac 100644 --- a/plugins/fred_api/models/search/fred_api_concern.rb +++ b/plugins/fred_api/models/search/fred_api_concern.rb @@ -51,9 +51,9 @@ def query_identifier(identifiers_proc, identifier_value) identifiers = identifiers_proc.call() if identifiers.empty? # there is no identifiers that satisfy the condition => the result should be an empty list - add_filter limit: {value: 0} + limit 0 else - terms = identifiers.map { |id_es_code| {:terms => { id_es_code => [identifier_value] }} } + terms = identifiers.map { |id_es_code| {:terms => { "properties.#{id_es_code}" => [identifier_value] }} } add_filter or: terms end self