Skip to content

Commit

Permalink
Fix: compatibility with elasticsearch 2 (#916)
Browse files Browse the repository at this point in the history
Fixes a number of deprecated features in elasticsearch 1 that got removed in elasticsearch 2:

- Replaces multi-field type for a string with a nested field (deprecated in elasticsearch 1).
- Avoids the `.` in field definitions (invalid in elasticsearch 2).
- Removes the path definition that isn't needed (deprecated in elasticsearch 2).
- Renames `default_index` as `default` (deprecated in elasticsearch 1).
- Use size instead of limit filter (deprecated in elasticsearch 1).
- Uses aggregations instead of facet searches (deprecated in elasticsearch 2).

These changes are compatible with elasticsearch v1.7 and v2.4 and the [elasticsearch migration plugin](https://github.com/elastic/elasticsearch-migration/tree/1.x) returns a full green status.

To test against elasticsearch 2, you must tweak the docker compose service. For some reason the `index.max_result_window` setting must be increased from the default of 10000. For example:

```yaml
services:
  elasticsearch:
    image: elasticsearch:2.4-alpine
    command: elasticsearch -Dindex.max_result_window=1000000
```
  • Loading branch information
ysbaddaden authored Nov 16, 2023
1 parent 0de95ab commit 3974045
Show file tree
Hide file tree
Showing 8 changed files with 63 additions and 49 deletions.
4 changes: 2 additions & 2 deletions app/models/elastic_search/query_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def full_text_search(text, search_base, collection, fields = nil)
field = collection.fields.find { |x| x.code == key || x.name == key}
next unless field

key = field.es_code
key = "properties.#{field.es_code}"
op, value = SearchParser.get_op_and_val value

# Check if the user is searching a label instead of the code
Expand Down Expand Up @@ -83,7 +83,7 @@ def search_value_ids(text, collection, fields_to_search = nil)
regex = /#{Regexp.escape text}/i
fields_to_search.each do |field|
option_id = search_value_id field, regex
codes[field.es_code] = option_id if option_id
codes["properties.#{field.es_code}"] = option_id if option_id
end
codes
end
Expand Down
5 changes: 2 additions & 3 deletions app/models/field/elasticsearch_concern.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,9 @@ def index_mapping
{ type: :date }
when kind == 'text'
{
type: :multi_field,
type: :string, index: :not_analyzed,
fields: {
es_code => { type: :string, index: :not_analyzed },
"#{es_code}.downcase" => { type: :string, path: :just_name, index: :analyzed, analyzer: :downcase },
downcase: { type: :string, index: :analyzed, analyzer: :downcase },
},
}
else
Expand Down
7 changes: 5 additions & 2 deletions app/models/field/identifier_fields/format_implementation.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,13 @@ def initialize(field)
end

def valid_value?(value, existing_site_id)
if existing_values[value]
if existing = existing_values[value]
# If the value already exists in the collection, the value will be invalid
# Unless this is an update to an update an existing site with the same value
raise "The value already exists in the collection" unless (existing_site_id && (existing_values[value]["id"].to_s == existing_site_id.to_s)) end
unless existing_site_id && (existing["id"].to_s == existing_site_id.to_s)
raise "The value already exists in the collection"
end
end
true
end

Expand Down
5 changes: 2 additions & 3 deletions app/models/field/identifier_fields/luhn_field.rb
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,9 @@ def compute_luhn_verifier(str)

def largest_existing_luhn_value_in_this_field(collection)
# Find largest existing value in ES
field_es_code = "properties.#{@field.es_code}"
search = collection.new_search
search.field_exists(field_es_code)
search.sort field_es_code, false
search.field_exists(@field.es_code)
search.sort(@field.es_code, false)
search.offset(0)
search.limit(1)
search.show_deleted
Expand Down
23 changes: 14 additions & 9 deletions app/models/search.rb
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,7 @@ def sort(es_code, ascendent = true)
when 'name'
sort = 'name.downcase'
else
es_code = remove_at_from_code es_code
field = fields.find { |x| x.code == es_code || x.es_code == es_code }
if field && field.kind == 'text'
sort = "#{field.es_code}.downcase"
else
sort = decode(es_code)
end
sort = sort_key(es_code)
end
ascendent = ascendent ? 'asc' : 'desc'

Expand All @@ -95,6 +89,17 @@ def sort(es_code, ascendent = true)
self
end

protected def sort_key(es_code)
es_code = remove_at_from_code es_code
field = fields.find { |x| x.code == es_code || x.es_code == es_code }

if field && field.kind == 'text'
query_key(field, downcase: true)
else
"properties.#{decode(es_code)}"
end
end

def sort_multiple(sort_list)
sort_list.each_pair do |es_code, ascendent|
sort(es_code, ascendent)
Expand Down Expand Up @@ -206,8 +211,8 @@ def histogram_results(field_es_code)
results = client.search index: @index_names, type: 'site', body: body

histogram = {}
results["facets"]["field_#{field_es_code}_ratings"]["terms"].each do |item|
histogram[item["term"]] = item["count"] unless item["count"] == 0
results["aggregations"]["field_#{field_es_code}_ratings"]["buckets"].each do |item|
histogram[item["key"]] = item["doc_count"] unless item["doc_count"] == 0
end
histogram
end
Expand Down
62 changes: 35 additions & 27 deletions app/models/search_base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def uuid(uuid)

def eq(field, value)
if value.blank?
add_filter missing: {field: field.es_code}
add_filter missing: {field: query_key(field)}
return self
end

Expand All @@ -48,27 +48,34 @@ def not_eq(field, value)
end

def query_params(field, value)
query_key = field.es_code
validated_value = field.parse_for_query(value, @use_codes_instead_of_es_codes)

if field.kind == 'date'
date_field_range(query_key, validated_value)
date_field_range(query_key(field), validated_value)
elsif field.kind == 'yes_no' && !validated_value.is_a?(Array) && !Field.yes?(value)
{ not: { :term => { query_key => true }}} # so we return false & nil values
{ not: { :term => { query_key(field) => true }}} # so we return false & nil values
elsif validated_value.is_a? Array
{ terms: {query_key => validated_value} }
{ terms: {query_key(field) => validated_value} }
else
{ term: {query_key => validated_value} }
{ term: {query_key(field) => validated_value} }
end

# elsif field.select_kind?
# {term: {query_key => validated_value}}
# add_filter term: {query_key => validated_value}
# {term: {query_key(field) => validated_value}}
# add_filter term: {query_key(field) => validated_value}
# else
# end

end

def query_key(field, downcase: false)
if downcase
"properties.#{field.es_code}.downcase"
else
"properties.#{field.es_code}"
end
end

def date_field_range(key, valid_value)
date_from = valid_value[:date_from]
date_to = valid_value[:date_to]
Expand All @@ -78,28 +85,26 @@ def date_field_range(key, valid_value)

def under(field, value)
if value.blank?
add_filter missing: {field: field.es_code}
add_filter missing: {field: query_key(field)}
return self
end

value = field.descendants_of_in_hierarchy value
query_key = field.es_code
add_filter terms: {query_key => value}
add_filter terms: {query_key(field) => value}
self
end

def starts_with(field, value)
validated_value = field.apply_format_query_validation(value, @use_codes_instead_of_es_codes)
query_key = field.es_code
add_prefix key: query_key, value: validated_value
add_prefix key: query_key(field), value: validated_value
self
end

['lt', 'lte', 'gt', 'gte'].each do |op|
class_eval %Q(
def #{op}(field, value)
validated_value = field.apply_format_query_validation(value, @use_codes_instead_of_es_codes)
add_filter range: {field.es_code => {#{op}: validated_value}}
add_filter range: {query_key(field) => {#{op}: validated_value}}
self
end
)
Expand Down Expand Up @@ -147,21 +152,24 @@ def where(properties = {})
# https://github.com/elasticsearch/elasticsearch/issues/1776
# The number I put here is the max integer in Java
def histogram_search(field_es_code, filters=nil)
facets_hash = {
name = "field_#{field_es_code}_ratings"

aggregation = {
terms: {
field: field_es_code,
field: "properties.#{field_es_code}",
size: 2147483647,
all_terms: true,
}
}

if filters.present?
query_params = query_params(filters.keys.first, filters.values.first)
query_hash = {facet_filter: {and: [query_params]} }
facets_hash.merge!(query_hash)
aggregation = {
filter: {and: [query_params(filters.keys.first, filters.values.first)]},
aggs: { name => aggregation },
}
end

add_facet "field_#{field_es_code}_ratings", facets_hash
add_aggregation "field_#{field_es_code}_ratings", aggregation

self
end
Expand Down Expand Up @@ -248,7 +256,7 @@ def radius(lat, lng, meters)
end

def field_exists(field_code)
add_filter exists: {field: field_code}
add_filter exists: {field: "properties.#{field_code}"}
end

def require_location
Expand All @@ -266,7 +274,7 @@ def hierarchy(es_code, value)
if value.present?
eq field, value
else
add_filter not: {exists: {field: es_code}}
add_filter not: {exists: {field: query_key(field)}}
end
end

Expand Down Expand Up @@ -306,8 +314,8 @@ def get_body
end
end

if @facets
body[:facets] = @facets
if @aggregations
body[:aggs] = @aggregations
end

all_queries = []
Expand Down Expand Up @@ -343,9 +351,9 @@ def add_filter(filter)
@filters.push filter
end

def add_facet(name, value)
@facets ||= {}
@facets[name] = value
def add_aggregation(name, value)
@aggregations ||= {}
@aggregations[name] = value
end

private
Expand Down
2 changes: 1 addition & 1 deletion app/models/site/index_utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ module Site::IndexUtils
index: {
analysis: {
analyzer: {
default_index: {
default: {
tokenizer: :standard,
filter: [:lowercase, :preserving_asciifolding],
type: :custom
Expand Down
4 changes: 2 additions & 2 deletions plugins/fred_api/models/search/fred_api_concern.rb
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ def query_identifier(identifiers_proc, identifier_value)
identifiers = identifiers_proc.call()
if identifiers.empty?
# there is no identifiers that satisfy the condition => the result should be an empty list
add_filter limit: {value: 0}
limit 0
else
terms = identifiers.map { |id_es_code| {:terms => { id_es_code => [identifier_value] }} }
terms = identifiers.map { |id_es_code| {:terms => { "properties.#{id_es_code}" => [identifier_value] }} }
add_filter or: terms
end
self
Expand Down

0 comments on commit 3974045

Please sign in to comment.