Skip to content

Commit

Permalink
Rewrite indices query in the format migrator
Browse files Browse the repository at this point in the history
The format migrator prevents users from getting duplicate results when
formats are migrated into the "govuk" index from the "government" or
"detailed" ones: it ensures that only non-migrated formats are
returned from the "government"/"detailed" indices and only migrated
formats are returned from the "govuk" index.

Previously it did this with an `indices` query.  They look like
this:

    { indices:
      { indices: ["list", "of", "indices"]
      , query: { query to perform against docs from the indices }
      , no_match_query: { query to perform against other docs }
      }
    }

However, the `indices` query has been deprecated in ES5 and removed in
ES6, in favour of just searching over the `_index` field in docs
directly.

This PR changes the `indices` query into a `should` with two clauses
where one clause matches documents in the "govuk" index and the other
clause matches documents in the other indices.  It's a bit more
cumbersome because the `terms` query I'm using doesn't handle index
aliases[1], and involves another round-trip to elasticsearch when
constructing the query to find out the real name of the "govuk" alias.

Index real names only change very infrequently (whenever we reindex
the cluster), so in principle they could be cached.  But we don't
currently have a good way for the reindexing rake task to signal to
the web processes that they need to purge their cache.

[1] elastic/elasticsearch#23306
  • Loading branch information
barrucadu authored and Laurent Curau committed Dec 19, 2019
1 parent 7ed8dd9 commit 4c99a7b
Show file tree
Hide file tree
Showing 5 changed files with 138 additions and 52 deletions.
49 changes: 35 additions & 14 deletions lib/search/format_migrator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,17 @@ def initialize(base = nil)

def call
{
indices: {
indices: SearchConfig.instance.content_index_names,
query: excluding_formats,
no_match_query: only_formats
bool: {
minimum_should_match: 1,
should: [excluding_formats, only_formats]
}
}
end

def migrated_indices
SearchConfig.instance.new_content_index.real_index_names
end

def migrated_formats
GovukIndex::MigratedFormats.migrated_formats.keys
end
Expand All @@ -22,23 +25,41 @@ def migrated_formats

def excluding_formats
options = {}
options[:must] = @base if @base
options[:must_not] = { terms: { format: migrated_formats } } if migrated_formats.any?
{ bool: options.any? ? options : { must: { match_all: {} } } }
options[:must] = base

if migrated_formats.any?
options[:must_not] = [
{ terms: { _index: migrated_indices } },
{ terms: { format: migrated_formats } }
]
else
options[:must_not] = { terms: { _index: migrated_indices } }
end

{ bool: options }
end

def only_formats
return 'none' if migrated_formats.empty?
return { bool: { must_not: { match_all: {} } } } if migrated_formats.empty?

{
bool: {
must:
if @base
[@base, { terms: { format: migrated_formats } }]
else
{ terms: { format: migrated_formats } }
end
must: [
base,
{ terms: { _index: migrated_indices } },
{ terms: { format: migrated_formats } }
],
},
}
end

def base
# {} isn't legal in a must
if @base && @base != {}
@base
else
{ match_all: {} }
end
end
end
end
25 changes: 20 additions & 5 deletions spec/unit/search/aggregate_example_fetcher_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,20 @@ def query_for_example_global(field, value, return_fields)
bool: {
must: [
{ term: { field => value } },
{ indices: {
indices: SearchConfig.instance.content_index_names,
query: { bool: { must: { match_all: {} } } },
no_match_query: 'none'
} }
{
bool: {
minimum_should_match: 1,
should: [
{
bool: {
must: { match_all: {} },
must_not: { terms: { _index: %w(govuk_test) } }
}
},
{ bool: { must_not: { match_all: {} } } }
]
}
}
]
},
},
Expand Down Expand Up @@ -70,6 +79,12 @@ def stub_index(_name)
index
end

# rubocop:disable RSpec/AnyInstance
before do
allow_any_instance_of(LegacyClient::IndexForSearch).to receive(:real_index_names).and_return(%w(govuk_test))
end
# rubocop:enable RSpec/AnyInstance

context "#prepare_response" do
it "map an empty response" do
fetcher = described_class.new(@index, {}, Search::QueryParameters.new, @builder)
Expand Down
107 changes: 74 additions & 33 deletions spec/unit/search/format_migrator_spec.rb
Original file line number Diff line number Diff line change
@@ -1,16 +1,31 @@
require 'spec_helper'

RSpec.describe Search::FormatMigrator do
# rubocop:disable RSpec/AnyInstance
before do
allow_any_instance_of(LegacyClient::IndexForSearch).to receive(:real_index_names).and_return(%w(govuk_test))
end
# rubocop:enable RSpec/AnyInstance

it "when base query without migrated formats" do
allow(GovukIndex::MigratedFormats).to receive(:migrated_formats).and_return({})
base_query = { filter: 'component' }
expected = {
indices: {
indices: %w(government_test),
query: {
bool: { must: base_query }
},
no_match_query: 'none'
bool: {
minimum_should_match: 1,
should: [
{
bool: {
must: base_query,
must_not: { terms: { _index: %w(govuk_test) } }
}
},
{
bool: {
must_not: { match_all: {} }
}
}
]
}
}
expect(described_class.new(base_query).call).to eq(expected)
Expand All @@ -20,19 +35,28 @@
allow(GovukIndex::MigratedFormats).to receive(:migrated_formats).and_return('help_page' => :all)
base_query = { filter: 'component' }
expected = {
indices: {
indices: %w(government_test),
query: {
bool: {
must: base_query,
must_not: { terms: { format: %w[help_page] } },
bool: {
minimum_should_match: 1,
should: [
{
bool: {
must: base_query,
must_not: [
{ terms: { _index: %w(govuk_test) } },
{ terms: { format: %w(help_page) } }
]
}
},
{
bool: {
must: [
base_query,
{ terms: { _index: %w(govuk_test) } },
{ terms: { format: %w(help_page) } }
]
}
}
},
no_match_query: {
bool: {
must: [base_query, { terms: { format: %w[help_page] } }],
}
}
]
}
}
expect(described_class.new(base_query).call).to eq(expected)
Expand All @@ -41,10 +65,17 @@
it "when no base query without migrated formats" do
allow(GovukIndex::MigratedFormats).to receive(:migrated_formats).and_return({})
expected = {
indices: {
indices: %w(government_test),
query: { bool: { must: { match_all: {} } } },
no_match_query: 'none'
bool: {
minimum_should_match: 1,
should: [
{
bool: {
must: { match_all: {} },
must_not: { terms: { _index: %w(govuk_test) } }
}
},
{ bool: { must_not: { match_all: {} } } }
]
}
}
expect(described_class.new(nil).call).to eq(expected)
Expand All @@ -53,18 +84,28 @@
it "when no base query with migrated formats" do
allow(GovukIndex::MigratedFormats).to receive(:migrated_formats).and_return('help_page' => :all)
expected = {
indices: {
indices: %w(government_test),
query: {
bool: {
must_not: { terms: { format: %w[help_page] } },
}
},
no_match_query: {
bool: {
must: { terms: { format: %w[help_page] } },
bool:
{ minimum_should_match: 1,
should: [
{
bool: {
must: { match_all: {} },
must_not: [
{ terms: { _index: %w(govuk_test) } },
{ terms: { format: %w(help_page) } }
]
}
},
{
bool: {
must: [
{ match_all: {} },
{ terms: { _index: %w(govuk_test) } },
{ terms: { format: %w(help_page) } }
]
}
}
}
]
}
}
expect(described_class.new(nil).call).to eq(expected)
Expand Down
3 changes: 3 additions & 0 deletions spec/unit/search/query_builder_spec.rb
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
require 'spec_helper'

RSpec.describe Search::QueryBuilder do
# rubocop:disable RSpec/AnyInstance
before do
allow_any_instance_of(LegacyClient::IndexForSearch).to receive(:real_index_names).and_return(%w(govuk_test))
allow_any_instance_of(Search::BestBetsChecker).to receive(:best_bets).and_return([])
allow_any_instance_of(Search::BestBetsChecker).to receive(:worst_bets).and_return([])
end
# rubocop:enable RSpec/AnyInstance

context "with a simple search query" do
it "return a correct query object" do
Expand Down
6 changes: 6 additions & 0 deletions spec/unit/sitemap/sitemap_generator_spec.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
require 'spec_helper'

RSpec.describe SitemapGenerator do
# rubocop:disable RSpec/AnyInstance
before do
allow_any_instance_of(LegacyClient::IndexForSearch).to receive(:real_index_names).and_return(%w(govuk_test))
end
# rubocop:enable RSpec/AnyInstance

it "should generate sitemap" do
sitemap = described_class.new(index_names: '')

Expand Down

0 comments on commit 4c99a7b

Please sign in to comment.