From 0041f6ef436441f7723751562d9293946e4d3aa0 Mon Sep 17 00:00:00 2001 From: Michael Walker Date: Tue, 4 Jun 2019 13:27:32 +0100 Subject: [PATCH] Rewrite `indices` query in the format migrator The format migrator prevents users from getting duplicate results when formats are migrated into the "govuk" index from the "government" or "detailed" ones: it ensures that only non-migrated formats are returned from the "government"/"detailed" indices and only migrated formats are returned from the "govuk" index. Previously it did this with an `indices` query. They look like this: { indices: { indices: ["list", "of", "indices"] , query: { query to perform against docs from the indices } , no_match_query: { query to perform against other docs } } } However, the `indices` query has been deprecated in ES5 and removed in ES6, in favour of just searching over the `_index` field in docs directly. This PR changes the `indices` query into a `should` with two clauses where one clause matches documents in the "govuk" index and the other clause matches documents in the other indices. It's a bit more cumbersome because the `terms` query I'm using doesn't handle index aliases[1], and involves another round-trip to elasticsearch when constructing the query to find out the real name of the "govuk" alias. Index real names only change very infrequently (whenever we reindex the cluster), so in principle they could be cached. But we don't currently have a good way for the reindexing rake task to signal to the web processes that they need to purge their cache. [1] https://github.com/elastic/elasticsearch/issues/23306 --- lib/search/format_migrator.rb | 49 +++++--- .../search/aggregate_example_fetcher_spec.rb | 25 +++- spec/unit/search/format_migrator_spec.rb | 107 ++++++++++++------ spec/unit/search/query_builder_spec.rb | 3 + spec/unit/sitemap/sitemap_generator_spec.rb | 6 + 5 files changed, 138 insertions(+), 52 deletions(-) diff --git a/lib/search/format_migrator.rb b/lib/search/format_migrator.rb index c2497b633..b0647b0f5 100644 --- a/lib/search/format_migrator.rb +++ b/lib/search/format_migrator.rb @@ -6,14 +6,17 @@ def initialize(base = nil) def call { - indices: { - indices: SearchConfig.instance.content_index_names, - query: excluding_formats, - no_match_query: only_formats + bool: { + minimum_should_match: 1, + should: [excluding_formats, only_formats] } } end + def migrated_indices + SearchConfig.instance.new_content_index.real_index_names + end + def migrated_formats GovukIndex::MigratedFormats.migrated_formats.keys end @@ -22,23 +25,41 @@ def migrated_formats def excluding_formats options = {} - options[:must] = @base if @base - options[:must_not] = { terms: { format: migrated_formats } } if migrated_formats.any? - { bool: options.any? ? options : { must: { match_all: {} } } } + options[:must] = base + + if migrated_formats.any? + options[:must_not] = [ + { terms: { _index: migrated_indices } }, + { terms: { format: migrated_formats } } + ] + else + options[:must_not] = { terms: { _index: migrated_indices } } + end + + { bool: options } end def only_formats - return 'none' if migrated_formats.empty? + return { bool: { must_not: { match_all: {} } } } if migrated_formats.empty? + { bool: { - must: - if @base - [@base, { terms: { format: migrated_formats } }] - else - { terms: { format: migrated_formats } } - end + must: [ + base, + { terms: { _index: migrated_indices } }, + { terms: { format: migrated_formats } } + ], }, } end + + def base + # {} isn't legal in a must + if @base && @base != {} + @base + else + { match_all: {} } + end + end end end diff --git a/spec/unit/search/aggregate_example_fetcher_spec.rb b/spec/unit/search/aggregate_example_fetcher_spec.rb index b6b5acd51..75ec701fc 100644 --- a/spec/unit/search/aggregate_example_fetcher_spec.rb +++ b/spec/unit/search/aggregate_example_fetcher_spec.rb @@ -12,11 +12,20 @@ def query_for_example_global(field, value, return_fields) bool: { must: [ { term: { field => value } }, - { indices: { - indices: SearchConfig.instance.content_index_names, - query: { bool: { must: { match_all: {} } } }, - no_match_query: 'none' - } } + { + bool: { + minimum_should_match: 1, + should: [ + { + bool: { + must: { match_all: {} }, + must_not: { terms: { _index: %w(govuk_test) } } + } + }, + { bool: { must_not: { match_all: {} } } } + ] + } + } ] }, }, @@ -70,6 +79,12 @@ def stub_index(_name) index end + # rubocop:disable RSpec/AnyInstance + before do + allow_any_instance_of(LegacyClient::IndexForSearch).to receive(:real_index_names).and_return(%w(govuk_test)) + end + # rubocop:enable RSpec/AnyInstance + context "#prepare_response" do it "map an empty response" do fetcher = described_class.new(@index, {}, Search::QueryParameters.new, @builder) diff --git a/spec/unit/search/format_migrator_spec.rb b/spec/unit/search/format_migrator_spec.rb index 8ce959adb..48090527a 100644 --- a/spec/unit/search/format_migrator_spec.rb +++ b/spec/unit/search/format_migrator_spec.rb @@ -1,16 +1,31 @@ require 'spec_helper' RSpec.describe Search::FormatMigrator do + # rubocop:disable RSpec/AnyInstance + before do + allow_any_instance_of(LegacyClient::IndexForSearch).to receive(:real_index_names).and_return(%w(govuk_test)) + end + # rubocop:enable RSpec/AnyInstance + it "when base query without migrated formats" do allow(GovukIndex::MigratedFormats).to receive(:migrated_formats).and_return({}) base_query = { filter: 'component' } expected = { - indices: { - indices: %w(government_test), - query: { - bool: { must: base_query } - }, - no_match_query: 'none' + bool: { + minimum_should_match: 1, + should: [ + { + bool: { + must: base_query, + must_not: { terms: { _index: %w(govuk_test) } } + } + }, + { + bool: { + must_not: { match_all: {} } + } + } + ] } } expect(described_class.new(base_query).call).to eq(expected) @@ -20,19 +35,28 @@ allow(GovukIndex::MigratedFormats).to receive(:migrated_formats).and_return('help_page' => :all) base_query = { filter: 'component' } expected = { - indices: { - indices: %w(government_test), - query: { - bool: { - must: base_query, - must_not: { terms: { format: %w[help_page] } }, + bool: { + minimum_should_match: 1, + should: [ + { + bool: { + must: base_query, + must_not: [ + { terms: { _index: %w(govuk_test) } }, + { terms: { format: %w(help_page) } } + ] + } + }, + { + bool: { + must: [ + base_query, + { terms: { _index: %w(govuk_test) } }, + { terms: { format: %w(help_page) } } + ] + } } - }, - no_match_query: { - bool: { - must: [base_query, { terms: { format: %w[help_page] } }], - } - } + ] } } expect(described_class.new(base_query).call).to eq(expected) @@ -41,10 +65,17 @@ it "when no base query without migrated formats" do allow(GovukIndex::MigratedFormats).to receive(:migrated_formats).and_return({}) expected = { - indices: { - indices: %w(government_test), - query: { bool: { must: { match_all: {} } } }, - no_match_query: 'none' + bool: { + minimum_should_match: 1, + should: [ + { + bool: { + must: { match_all: {} }, + must_not: { terms: { _index: %w(govuk_test) } } + } + }, + { bool: { must_not: { match_all: {} } } } + ] } } expect(described_class.new(nil).call).to eq(expected) @@ -53,18 +84,28 @@ it "when no base query with migrated formats" do allow(GovukIndex::MigratedFormats).to receive(:migrated_formats).and_return('help_page' => :all) expected = { - indices: { - indices: %w(government_test), - query: { - bool: { - must_not: { terms: { format: %w[help_page] } }, - } - }, - no_match_query: { - bool: { - must: { terms: { format: %w[help_page] } }, + bool: + { minimum_should_match: 1, + should: [ + { + bool: { + must: { match_all: {} }, + must_not: [ + { terms: { _index: %w(govuk_test) } }, + { terms: { format: %w(help_page) } } + ] + } + }, + { + bool: { + must: [ + { match_all: {} }, + { terms: { _index: %w(govuk_test) } }, + { terms: { format: %w(help_page) } } + ] + } } - } + ] } } expect(described_class.new(nil).call).to eq(expected) diff --git a/spec/unit/search/query_builder_spec.rb b/spec/unit/search/query_builder_spec.rb index 2f42ec3a5..c7e00aaa1 100644 --- a/spec/unit/search/query_builder_spec.rb +++ b/spec/unit/search/query_builder_spec.rb @@ -1,10 +1,13 @@ require 'spec_helper' RSpec.describe Search::QueryBuilder do + # rubocop:disable RSpec/AnyInstance before do + allow_any_instance_of(LegacyClient::IndexForSearch).to receive(:real_index_names).and_return(%w(govuk_test)) allow_any_instance_of(Search::BestBetsChecker).to receive(:best_bets).and_return([]) allow_any_instance_of(Search::BestBetsChecker).to receive(:worst_bets).and_return([]) end + # rubocop:enable RSpec/AnyInstance context "with a simple search query" do it "return a correct query object" do diff --git a/spec/unit/sitemap/sitemap_generator_spec.rb b/spec/unit/sitemap/sitemap_generator_spec.rb index 46bb4e8db..f4a0406e4 100644 --- a/spec/unit/sitemap/sitemap_generator_spec.rb +++ b/spec/unit/sitemap/sitemap_generator_spec.rb @@ -1,6 +1,12 @@ require 'spec_helper' RSpec.describe SitemapGenerator do + # rubocop:disable RSpec/AnyInstance + before do + allow_any_instance_of(LegacyClient::IndexForSearch).to receive(:real_index_names).and_return(%w(govuk_test)) + end + # rubocop:enable RSpec/AnyInstance + it "should generate sitemap" do sitemap = described_class.new(index_names: '')