diff --git a/.gitignore b/.gitignore index 09043126..bdadd962 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,6 @@ #VSCode .vscode + +#Mac +.DS_Store diff --git a/ci/build.yml b/ci/build.yml index 6cec6760..4d9c7c7a 100644 --- a/ci/build.yml +++ b/ci/build.yml @@ -6,7 +6,7 @@ image_resource: type: docker-image source: repository: golang - tag: 1.16.3 + tag: 1.17.1 inputs: - name: dp-search-api diff --git a/ci/unit.yml b/ci/unit.yml index 6da6e9cf..90543310 100644 --- a/ci/unit.yml +++ b/ci/unit.yml @@ -6,7 +6,7 @@ image_resource: type: docker-image source: repository: golang - tag: 1.16.3 + tag: 1.17.1 inputs: - name: dp-search-api diff --git a/elasticsearch/assets.go b/elasticsearch/assets.go new file mode 100644 index 00000000..406db767 --- /dev/null +++ b/elasticsearch/assets.go @@ -0,0 +1,10 @@ +package elasticsearch + +import _ "embed" + +//go:embed search-index-settings.json +var searchIndexSettingsJson []byte + +func GetSearchIndexSettings() []byte { + return searchIndexSettingsJson +} diff --git a/elasticsearch/assets_test.go b/elasticsearch/assets_test.go new file mode 100644 index 00000000..5cfdf786 --- /dev/null +++ b/elasticsearch/assets_test.go @@ -0,0 +1,22 @@ +package elasticsearch_test + +import ( + "encoding/json" + "github.com/ONSdigital/dp-search-api/elasticsearch" + . "github.com/smartystreets/goconvey/convey" + "testing" +) + +func TestGetDefaultMappings_ValidJson(t *testing.T) { + Convey("File `search-index-settings.json` is valid jason", t, func() { + Convey("When we get the default search index settings json", func() { + mappingsJson := elasticsearch.GetSearchIndexSettings() + + Convey("Then the json returned should be valid", func() { + + So(json.Valid(mappingsJson), ShouldBeTrue) + }) + + }) + }) +} diff --git a/elasticsearch/search-index-settings.json b/elasticsearch/search-index-settings.json new file mode 100644 index 00000000..11412625 --- /dev/null +++ b/elasticsearch/search-index-settings.json @@ -0,0 +1,278 @@ +{ + "settings": { + "index": { + "number_of_shards": 5, + "number_of_replicas": 1 + }, + "analysis": { + "analyzer": { + "default_index": { + "tokenizer": "keyword", + "filter": [ + "trim", + "lowercase" + ] + }, + "ons_standard": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "standard", + "stop" + ] + }, + "ons_synonym_stem": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "ons_synonyms", + "standard", + "stop", + "stem_exclusion", + "snowball" + ] + }, + "ons_synonym": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "ons_synonyms", + "standard", + "stop" + ] + }, + "ons_stem": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "standard", + "stop", + "stem_exclusion", + "snowball" + ] + }, + "ons_synonym_stem_clear_dates": { + "tokenizer": "standard", + "char_filter": "clear_dates", + "filter": [ + "lowercase", + "ons_synonyms", + "standard", + "stop", + "stem_exclusion", + "snowball" + ] + }, + "ons_stem_clear_dates": { + "tokenizer": "standard", + "char_filter": "clear_dates", + "filter": [ + "lowercase", + "standard", + "stop", + "stem_exclusion", + "snowball" + ] + }, + "first_letter": { + "tokenizer": "keyword", + "filter": [ + "lowercase", + "first_letter" + ] + } + }, + "char_filter": { + "clear_dates": { + "type": "pattern_replace", + "pattern": "([1|2]\\d{3})|((?i)january|february|march|april|may|june|july|august|september|october|november|december)" + } + }, + "filter": { + "stem_exclusion": { + "type": "keyword_marker", + "keywords": [ + "productivity", + "production" + ] + }, + "first_letter": { + "type": "pattern_capture", + "preserve_original": false, + "patterns": [ + "^[^a-zA-Z]*([a-zA-Z]).*" + ] + }, + "ons_synonyms": { + "type": "synonym", + "synonyms": [ + "cpi, consumer price inflation, consumer price index", + "rpi, retail price index", + "gdp, gross domestic product", + "ashe, annual survey of hours and earnings", + "gva, gross value added", + "awe, average weekly earnings", + "lsoa, lower layer super output area", + "ppi, producer price inflation, producer price index", + "sic, standard industrial classification", + "ukea, uk economic accounts", + "neet, young people not in education", + "neet, employment or training", + "fdi, foreign direct investment", + "bop, balance of payments", + "sme, small medium enterprises", + "bres, business register and employment survey", + "gdhi, gross disposable household income", + "hpi, house price index", + "idbr, inter departmental business register", + "uk, united kingdom", + "copd, chronic obstructive pulmonary disease", + "lfs, labour force survey", + "imd, index of multiple deprivation", + "ccg, clinical commissioning group", + "abs, annual business survey", + "sppi, services producer price indices", + "hiv, human immunodeficiency virus", + "ips, international passenger survey", + "msoa, middle layer super output areas", + "aei, average earnings index", + "soc, standard occupational classification", + "jsa, jobseekers allowance", + "vat, value added tax", + "hmrc, hm revenue and customs published", + "ltim, long term international migration", + "ns sec, national statistics socio economic classification", + "nssec, national statistics socio economic classification", + "topsi, turnover and orders in production and services industries", + "r&d, research and development", + "berd, business enterprise research and development", + "iop, uk index of production", + "ios, index of services", + "rsi, retail sales index", + "ict, information and communication technology", + "gfcf, gross fixed capital formation", + "esa, european system of accounts", + "aps, annual population survey", + "eu, european union", + "m&a, mergers and acquisitions", + "itis, international trade in services", + "imr, infant mortality rate", + "tfr, total fertility rate", + "evo, estimates of the very old", + "asdr, age specific death rate", + "asmr, age standardised mortality rate or ratio", + "etb, etbhi, effects of taxes & benefits on household income", + "lcf, lcfs, living costs & food survey", + "eu-silc, eusilc, silc, eu-statistics on income & living condition", + "esspros, european system of social protections statistics", + "sdg, sustainable development goals", + "sdi, sustainable development indicators", + "pwb, personal well-being", + "ghg, greenhouse gas emissions", + "wfj, workforce jobs", + "was, wealth and assets survey", + "oa, output area", + "wz, workplace zone", + "npp, national population projections", + "snpp, subnational population projections", + "suid, sudden unexpected/unexplained infant deaths", + "drd, drug related deaths", + "c diff, clostridium difficile", + "eolc, end of life care", + "mb1, cancer registrations", + "imd, index of multiple deprivation", + "utla, upper tier local authority", + "sep, socioeconomic position", + "hi, health inequality", + "wellbeing => well being", + "psf => public sector finance" + ] + } + } + } + }, + "mappings": { + "dynamic_date_formats": [ + "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'" + ], + "properties": { + "type": { + "type": "keyword" + }, + "description": { + "properties": { + "cdid": { + "type": "text", + "analyzer": "ons_standard" + }, + "datasetId": { + "type": "text", + "analyzer": "ons_standard" + }, + "title": { + "type": "text", + "analyzer": "ons_synonym_stem", + "search_analyzer": "ons_stem", + "fields": { + "title_raw": { + "type": "keywords" + }, + "title_no_stem": { + "type": "text", + "analyzer": "ons_synonym", + "search_analyzer": "ons_standard" + }, + "title_no_synonym_no_stem": { + "type": "text", + "analyzer": "ons_standard" + }, + "title_no_dates": { + "type": "text", + "analyzer": "ons_synonym_stem_clear_dates", + "search_analyzer": "ons_stem_clear_dates" + }, + "title_first_letter": { + "type": "text", + "analyzer": "first_letter" + } + } + }, + "edition": { + "type": "text", + "analyzer": "ons_synonym_stem", + "search_analyzer": "ons_stem" + }, + "metaDescription": { + "type": "text", + "analyzer": "ons_standard" + }, + "summary": { + "type": "text", + "analyzer": "ons_standard" + }, + "keywords": { + "type": "text", + "analyzer": "ons_synonym_stem", + "search_analyzer": "ons_stem", + "fields": { + "keywords_raw": { + "type": "text" + } + } + }, + "releaseDate": { + "type": "date" + } + } + }, + "searchBoost": { + "type": "text", + "analyzer": "ons_synonym_stem", + "search_analyzer": "ons_stem", + "norms": { + "enabled": false + } + } + } + } +} diff --git a/go.mod b/go.mod index 1b22b206..4d08fe8e 100644 --- a/go.mod +++ b/go.mod @@ -1,19 +1,32 @@ module github.com/ONSdigital/dp-search-api -go 1.16 +go 1.17 require ( + github.com/ONSdigital/dp-api-clients-go v1.41.1 // indirect github.com/ONSdigital/dp-elasticsearch/v2 v2.2.0 github.com/ONSdigital/dp-healthcheck v1.1.0 github.com/ONSdigital/dp-net v1.2.0 github.com/ONSdigital/go-ns v0.0.0-20210831102424-ebdecc20fe9e + github.com/ONSdigital/log.go v1.1.0 // indirect github.com/ONSdigital/log.go/v2 v2.0.9 github.com/aws/aws-sdk-go v1.38.65 // indirect + github.com/fatih/color v1.12.0 // indirect + github.com/gopherjs/gopherjs v0.0.0-20210202160940-bed99a852dfe // indirect github.com/gorilla/mux v1.8.0 + github.com/hokaccha/go-prettyjson v0.0.0-20210113012101-fb4e108d2519 // indirect + github.com/jmespath/go-jmespath v0.4.0 // indirect + github.com/jtolds/gls v4.20.0+incompatible // indirect + github.com/justinas/alice v1.2.0 // indirect github.com/kelseyhightower/envconfig v1.4.0 + github.com/mattn/go-colorable v0.1.8 // indirect + github.com/mattn/go-isatty v0.0.13 // indirect github.com/pkg/errors v0.9.1 + github.com/smartystreets/assertions v1.2.0 // indirect github.com/smartystreets/goconvey v1.6.4 github.com/tdewolff/minify v2.3.6+incompatible github.com/tdewolff/parse v2.3.4+incompatible // indirect github.com/tdewolff/test v1.0.6 // indirect + golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d // indirect + golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e // indirect )