From c6c3ad06af1d47b5ef456c83adc908a0ceac7ef9 Mon Sep 17 00:00:00 2001 From: Jessica Jenkins Date: Fri, 27 Aug 2021 12:30:55 +0100 Subject: [PATCH 1/5] Add empty default mappings to elasticsearch pkg --- .gitignore | 3 +++ elasticsearch/assets.go | 10 ++++++++++ elasticsearch/assets_test.go | 22 ++++++++++++++++++++++ elasticsearch/mappings.json | 2 ++ 4 files changed, 37 insertions(+) create mode 100644 elasticsearch/assets.go create mode 100644 elasticsearch/assets_test.go create mode 100644 elasticsearch/mappings.json diff --git a/.gitignore b/.gitignore index 09043126..bdadd962 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,6 @@ #VSCode .vscode + +#Mac +.DS_Store diff --git a/elasticsearch/assets.go b/elasticsearch/assets.go new file mode 100644 index 00000000..4b6b07e3 --- /dev/null +++ b/elasticsearch/assets.go @@ -0,0 +1,10 @@ +package elasticsearch + +import _ "embed" + +//go:embed mappings.json +var mappingsJson []byte + +func GetDefaultMappings() []byte { + return mappingsJson +} diff --git a/elasticsearch/assets_test.go b/elasticsearch/assets_test.go new file mode 100644 index 00000000..30ed1738 --- /dev/null +++ b/elasticsearch/assets_test.go @@ -0,0 +1,22 @@ +package elasticsearch_test + +import ( + "encoding/json" + "github.com/ONSdigital/dp-search-api/elasticsearch" + . "github.com/smartystreets/goconvey/convey" + "testing" +) + +func TestGetDefaultMappings_ValidJson(t *testing.T) { + Convey("File `matchers.json` is valid jason", t, func() { + Convey("When we get the default mappings json", func() { + mappingsJson := elasticsearch.GetDefaultMappings() + + Convey("Then the json returned should be valid", func() { + + So(json.Valid(mappingsJson), ShouldBeTrue) + }) + + }) + }) +} diff --git a/elasticsearch/mappings.json b/elasticsearch/mappings.json new file mode 100644 index 00000000..2c63c085 --- /dev/null +++ b/elasticsearch/mappings.json @@ -0,0 +1,2 @@ +{ +} From aa3231a4b78778d46d65cc8bb76718cb9fae5ae5 Mon Sep 17 00:00:00 2001 From: Jessica Jenkins Date: Mon, 13 Sep 2021 14:20:49 +0100 Subject: [PATCH 2/5] Add default search index settings --- elasticsearch/assets.go | 8 +- elasticsearch/assets_test.go | 6 +- elasticsearch/mappings.json | 2 - elasticsearch/search-index-settings.json | 278 +++++++++++++++++++++++ 4 files changed, 285 insertions(+), 9 deletions(-) delete mode 100644 elasticsearch/mappings.json create mode 100644 elasticsearch/search-index-settings.json diff --git a/elasticsearch/assets.go b/elasticsearch/assets.go index 4b6b07e3..406db767 100644 --- a/elasticsearch/assets.go +++ b/elasticsearch/assets.go @@ -2,9 +2,9 @@ package elasticsearch import _ "embed" -//go:embed mappings.json -var mappingsJson []byte +//go:embed search-index-settings.json +var searchIndexSettingsJson []byte -func GetDefaultMappings() []byte { - return mappingsJson +func GetSearchIndexSettings() []byte { + return searchIndexSettingsJson } diff --git a/elasticsearch/assets_test.go b/elasticsearch/assets_test.go index 30ed1738..5cfdf786 100644 --- a/elasticsearch/assets_test.go +++ b/elasticsearch/assets_test.go @@ -8,9 +8,9 @@ import ( ) func TestGetDefaultMappings_ValidJson(t *testing.T) { - Convey("File `matchers.json` is valid jason", t, func() { - Convey("When we get the default mappings json", func() { - mappingsJson := elasticsearch.GetDefaultMappings() + Convey("File `search-index-settings.json` is valid jason", t, func() { + Convey("When we get the default search index settings json", func() { + mappingsJson := elasticsearch.GetSearchIndexSettings() Convey("Then the json returned should be valid", func() { diff --git a/elasticsearch/mappings.json b/elasticsearch/mappings.json deleted file mode 100644 index 2c63c085..00000000 --- a/elasticsearch/mappings.json +++ /dev/null @@ -1,2 +0,0 @@ -{ -} diff --git a/elasticsearch/search-index-settings.json b/elasticsearch/search-index-settings.json new file mode 100644 index 00000000..ee73ee34 --- /dev/null +++ b/elasticsearch/search-index-settings.json @@ -0,0 +1,278 @@ +{ + "settings": { + "index": { + "number_of_shards": 3, + "number_of_replicas": 0 + }, + "analysis": { + "analyzer": { + "default_index": { + "tokenizer": "keyword", + "filter": [ + "trim", + "lowercase" + ] + }, + "ons_standard": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "standard", + "stop" + ] + }, + "ons_synonym_stem": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "ons_synonyms", + "standard", + "stop", + "stem_exclusion", + "snowball" + ] + }, + "ons_synonym": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "ons_synonyms", + "standard", + "stop" + ] + }, + "ons_stem": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "standard", + "stop", + "stem_exclusion", + "snowball" + ] + }, + "ons_synonym_stem_clear_dates": { + "tokenizer": "standard", + "char_filter": "clear_dates", + "filter": [ + "lowercase", + "ons_synonyms", + "standard", + "stop", + "stem_exclusion", + "snowball" + ] + }, + "ons_stem_clear_dates": { + "tokenizer": "standard", + "char_filter": "clear_dates", + "filter": [ + "lowercase", + "standard", + "stop", + "stem_exclusion", + "snowball" + ] + }, + "first_letter": { + "tokenizer": "keyword", + "filter": [ + "lowercase", + "first_letter" + ] + } + }, + "char_filter": { + "clear_dates": { + "type": "pattern_replace", + "pattern": "([1|2]\\d{3})|((?i)january|february|march|april|may|june|july|august|september|october|november|december)" + } + }, + "filter": { + "stem_exclusion": { + "type": "keyword_marker", + "keywords": [ + "productivity", + "production" + ] + }, + "first_letter": { + "type": "pattern_capture", + "preserve_original": false, + "patterns": [ + "^[^a-zA-Z]*([a-zA-Z]).*" + ] + }, + "ons_synonyms": { + "type": "synonym", + "synonyms": [ + "cpi, consumer price inflation, consumer price index", + "rpi, retail price index", + "gdp, gross domestic product", + "ashe, annual survey of hours and earnings", + "gva, gross value added", + "awe, average weekly earnings", + "lsoa, lower layer super output area", + "ppi, producer price inflation, producer price index", + "sic, standard industrial classification", + "ukea, uk economic accounts", + "neet, young people not in education", + "neet, employment or training", + "fdi, foreign direct investment", + "bop, balance of payments", + "sme, small medium enterprises", + "bres, business register and employment survey", + "gdhi, gross disposable household income", + "hpi, house price index", + "idbr, inter departmental business register", + "uk, united kingdom", + "copd, chronic obstructive pulmonary disease", + "lfs, labour force survey", + "imd, index of multiple deprivation", + "ccg, clinical commissioning group", + "abs, annual business survey", + "sppi, services producer price indices", + "hiv, human immunodeficiency virus", + "ips, international passenger survey", + "msoa, middle layer super output areas", + "aei, average earnings index", + "soc, standard occupational classification", + "jsa, jobseekers allowance", + "vat, value added tax", + "hmrc, hm revenue and customs published", + "ltim, long term international migration", + "ns sec, national statistics socio economic classification", + "nssec, national statistics socio economic classification", + "topsi, turnover and orders in production and services industries", + "r&d, research and development", + "berd, business enterprise research and development", + "iop, uk index of production", + "ios, index of services", + "rsi, retail sales index", + "ict, information and communication technology", + "gfcf, gross fixed capital formation", + "esa, european system of accounts", + "aps, annual population survey", + "eu, european union", + "m&a, mergers and acquisitions", + "itis, international trade in services", + "imr, infant mortality rate", + "tfr, total fertility rate", + "evo, estimates of the very old", + "asdr, age specific death rate", + "asmr, age standardised mortality rate or ratio", + "etb, etbhi, effects of taxes & benefits on household income", + "lcf, lcfs, living costs & food survey", + "eu-silc, eusilc, silc, eu-statistics on income & living condition", + "esspros, european system of social protections statistics", + "sdg, sustainable development goals", + "sdi, sustainable development indicators", + "pwb, personal well-being", + "ghg, greenhouse gas emissions", + "wfj, workforce jobs", + "was, wealth and assets survey", + "oa, output area", + "wz, workplace zone", + "npp, national population projections", + "snpp, subnational population projections", + "suid, sudden unexpected/unexplained infant deaths", + "drd, drug related deaths", + "c diff, clostridium difficile", + "eolc, end of life care", + "mb1, cancer registrations", + "imd, index of multiple deprivation", + "utla, upper tier local authority", + "sep, socioeconomic position", + "hi, health inequality", + "wellbeing => well being", + "psf => public sector finance" + ] + } + } + } + }, + "mappings": { + "dynamic_date_formats": [ + "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'" + ], + "properties": { + "type": { + "type": "keyword" + }, + "description": { + "properties": { + "cdid": { + "type": "text", + "analyzer": "ons_standard" + }, + "datasetId": { + "type": "text", + "analyzer": "ons_standard" + }, + "title": { + "type": "text", + "analyzer": "ons_synonym_stem", + "search_analyzer": "ons_stem", + "fields": { + "title_raw": { + "type": "keywords" + }, + "title_no_stem": { + "type": "text", + "analyzer": "ons_synonym", + "search_analyzer": "ons_standard" + }, + "title_no_synonym_no_stem": { + "type": "text", + "analyzer": "ons_standard" + }, + "title_no_dates": { + "type": "text", + "analyzer": "ons_synonym_stem_clear_dates", + "search_analyzer": "ons_stem_clear_dates" + }, + "title_first_letter": { + "type": "text", + "analyzer": "first_letter" + } + } + }, + "edition": { + "type": "text", + "analyzer": "ons_synonym_stem", + "search_analyzer": "ons_stem" + }, + "metaDescription": { + "type": "text", + "analyzer": "ons_standard" + }, + "summary": { + "type": "text", + "analyzer": "ons_standard" + }, + "keywords": { + "type": "text", + "analyzer": "ons_synonym_stem", + "search_analyzer": "ons_stem", + "fields": { + "keywords_raw": { + "type": "text" + } + } + }, + "releaseDate": { + "type": "date" + } + } + }, + "searchBoost": { + "type": "text", + "analyzer": "ons_synonym_stem", + "search_analyzer": "ons_stem", + "norms": { + "enabled": false + } + } + } + } +} From 9fd597a2ad58eb63c34e0dc4b1d861868586622c Mon Sep 17 00:00:00 2001 From: Jessica Jenkins Date: Mon, 13 Sep 2021 14:24:06 +0100 Subject: [PATCH 3/5] Upgrade go to 1.17.1 --- ci/build.yml | 2 +- ci/unit.yml | 2 +- go.mod | 15 ++++++++++++++- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/ci/build.yml b/ci/build.yml index 6cec6760..4d9c7c7a 100644 --- a/ci/build.yml +++ b/ci/build.yml @@ -6,7 +6,7 @@ image_resource: type: docker-image source: repository: golang - tag: 1.16.3 + tag: 1.17.1 inputs: - name: dp-search-api diff --git a/ci/unit.yml b/ci/unit.yml index 6da6e9cf..90543310 100644 --- a/ci/unit.yml +++ b/ci/unit.yml @@ -6,7 +6,7 @@ image_resource: type: docker-image source: repository: golang - tag: 1.16.3 + tag: 1.17.1 inputs: - name: dp-search-api diff --git a/go.mod b/go.mod index 5a6e7f73..d65777d4 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/ONSdigital/dp-search-api -go 1.16 +go 1.17 require ( github.com/ONSdigital/dp-api-clients-go v1.34.4 // indirect @@ -19,3 +19,16 @@ require ( github.com/tdewolff/parse v2.3.4+incompatible // indirect github.com/tdewolff/test v1.0.6 // indirect ) + +require ( + github.com/fatih/color v1.9.0 // indirect + github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 // indirect + github.com/jmespath/go-jmespath v0.4.0 // indirect + github.com/jtolds/gls v4.20.0+incompatible // indirect + github.com/justinas/alice v1.2.0 // indirect + github.com/mattn/go-colorable v0.1.4 // indirect + github.com/mattn/go-isatty v0.0.11 // indirect + github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d // indirect + golang.org/x/net v0.0.0-20210226172049-e18ecbb05110 // indirect + golang.org/x/sys v0.0.0-20201119102817-f84b799fce68 // indirect +) From 20a3aeed497b20d934ea120b13cc9b4f7358eb4a Mon Sep 17 00:00:00 2001 From: Jessica Jenkins Date: Mon, 13 Sep 2021 14:38:15 +0100 Subject: [PATCH 4/5] Fix go.mod --- go.mod | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/go.mod b/go.mod index d65777d4..b36e27fe 100644 --- a/go.mod +++ b/go.mod @@ -10,25 +10,22 @@ require ( github.com/ONSdigital/go-ns v0.0.0-20210410105122-6d6a140e952e github.com/ONSdigital/log.go v1.0.1 github.com/aws/aws-sdk-go v1.38.65 // indirect - github.com/gorilla/mux v1.8.0 - github.com/hokaccha/go-prettyjson v0.0.0-20210113012101-fb4e108d2519 // indirect - github.com/kelseyhightower/envconfig v1.4.0 - github.com/pkg/errors v0.9.1 - github.com/smartystreets/goconvey v1.6.4 - github.com/tdewolff/minify v2.3.6+incompatible - github.com/tdewolff/parse v2.3.4+incompatible // indirect - github.com/tdewolff/test v1.0.6 // indirect -) - -require ( github.com/fatih/color v1.9.0 // indirect github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 // indirect + github.com/gorilla/mux v1.8.0 + github.com/hokaccha/go-prettyjson v0.0.0-20210113012101-fb4e108d2519 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/jtolds/gls v4.20.0+incompatible // indirect github.com/justinas/alice v1.2.0 // indirect + github.com/kelseyhightower/envconfig v1.4.0 github.com/mattn/go-colorable v0.1.4 // indirect github.com/mattn/go-isatty v0.0.11 // indirect + github.com/pkg/errors v0.9.1 github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d // indirect + github.com/smartystreets/goconvey v1.6.4 + github.com/tdewolff/minify v2.3.6+incompatible + github.com/tdewolff/parse v2.3.4+incompatible // indirect + github.com/tdewolff/test v1.0.6 // indirect golang.org/x/net v0.0.0-20210226172049-e18ecbb05110 // indirect golang.org/x/sys v0.0.0-20201119102817-f84b799fce68 // indirect ) From 84a5b50f323f8e4c5af39293d88ae8cb32be93b3 Mon Sep 17 00:00:00 2001 From: Jessica Jenkins Date: Fri, 17 Sep 2021 10:58:58 +0100 Subject: [PATCH 5/5] Update number of shards and replicas in ES index settings --- elasticsearch/search-index-settings.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/elasticsearch/search-index-settings.json b/elasticsearch/search-index-settings.json index ee73ee34..11412625 100644 --- a/elasticsearch/search-index-settings.json +++ b/elasticsearch/search-index-settings.json @@ -1,8 +1,8 @@ { "settings": { "index": { - "number_of_shards": 3, - "number_of_replicas": 0 + "number_of_shards": 5, + "number_of_replicas": 1 }, "analysis": { "analyzer": {