diff --git a/.travis.yml b/.travis.yml index 30123a5..a7c3588 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,14 +1,25 @@ language: go addons: - postgresql: '9.6' + postgresql: '10' + apt: + packages: + - postgresql-10 + - postgresql-client-10 go: -- "1.10" +- "1.11" + +env: + global: + - GO111MODULE=on before_install: - wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-6.2.3.deb && sudo dpkg -i --force-confnew elasticsearch-6.2.3.deb - sudo service elasticsearch start + - sudo sed -i -e '/local.*peer/s/postgres/all/' -e 's/peer\|md5/trust/g' /etc/postgresql/*/main/pg_hba.conf + - sudo sed -i 's/port = 5433/port = 5432/' /etc/postgresql/10/main/postgresql.conf + - sudo service postgresql restart 10 - sleep 10 before_script: diff --git a/CHANGELOG.md b/CHANGELOG.md index 25bac76..09c2460 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,19 @@ +v2.0.0 +---------- + * Ignore value of is_test on contacts + +v1.0.27 +---------- + * update ES shards to match current ES best-practice guidance + +v1.0.26 +---------- + * move to go module, dont ignore any keywords + +v1.0.25 +---------- + * Changes to support both PG 10 and 9.6 + v1.0.24 ---------- * increase batch size to 500k @@ -28,7 +44,6 @@ v1.0.18 v1.0.17 ---------- - * change to number instead of decimal field * add example not exists query @@ -51,7 +66,7 @@ v1.0.13 v1.0.12 ---------- * add modified_on_mu for sorting / index creation - * add prefix name for index building + * add prefix name for index building v1.0.11 ---------- @@ -99,4 +114,3 @@ v1.0.2 v1.0.1 ---------- * Add changelog, move to fancy revving - diff --git a/Gopkg.lock b/Gopkg.lock deleted file mode 100644 index 39cde3f..0000000 --- a/Gopkg.lock +++ /dev/null @@ -1,129 +0,0 @@ -# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'. - - -[[projects]] - name = "github.com/certifi/gocertifi" - packages = ["."] - revision = "deb3ae2ef2610fde3330947281941c562861188b" - version = "2018.01.18" - -[[projects]] - name = "github.com/davecgh/go-spew" - packages = ["spew"] - revision = "346938d642f2ec3594ed81d874461961cd0faa76" - version = "v1.1.0" - -[[projects]] - name = "github.com/evalphobia/logrus_sentry" - packages = ["."] - revision = "57846a82817615f185b10cc40de8dc60c1ca5ae1" - version = "v0.4.5" - -[[projects]] - name = "github.com/fatih/structs" - packages = ["."] - revision = "a720dfa8df582c51dee1b36feabb906bde1588bd" - version = "v1.0" - -[[projects]] - branch = "master" - name = "github.com/getsentry/raven-go" - packages = ["."] - revision = "d1470f50d3a3a38533355a9fab7393e25a3b4226" - -[[projects]] - branch = "master" - name = "github.com/lib/pq" - packages = [ - ".", - "oid" - ] - revision = "d34b9ff171c21ad295489235aec8b6626023cd04" - -[[projects]] - branch = "master" - name = "github.com/mailru/easyjson" - packages = [ - ".", - "buffer", - "jlexer", - "jwriter" - ] - revision = "8b799c424f57fa123fc63a99d6383bc6e4c02578" - -[[projects]] - name = "github.com/naoina/go-stringutil" - packages = ["."] - revision = "6b638e95a32d0c1131db0e7fe83775cbea4a0d0b" - version = "v0.1.0" - -[[projects]] - name = "github.com/naoina/toml" - packages = [ - ".", - "ast" - ] - revision = "e6f5723bf2a66af014955e0888881314cf294129" - version = "v0.1.1" - -[[projects]] - name = "github.com/nyaruka/ezconf" - packages = ["."] - revision = "4189460bbdbcd38bb447e77ab6421747ce65b1b4" - version = "v0.2.1" - -[[projects]] - name = "github.com/olivere/elastic" - packages = [ - ".", - "config", - "uritemplates" - ] - revision = "d6362604399c7af560b54f048b4fcfbdd6eff293" - version = "v6.1.14" - -[[projects]] - name = "github.com/pkg/errors" - packages = ["."] - revision = "645ef00459ed84a119197bfb8d8205042c6df63d" - version = "v0.8.0" - -[[projects]] - name = "github.com/pmezard/go-difflib" - packages = ["difflib"] - revision = "792786c7400a136282c1664665ae0a8db921c6c2" - version = "v1.0.0" - -[[projects]] - name = "github.com/sirupsen/logrus" - packages = ["."] - revision = "c155da19408a8799da419ed3eeb0cb5db0ad5dbc" - version = "v1.0.5" - -[[projects]] - name = "github.com/stretchr/testify" - packages = ["assert"] - revision = "12b6f73e6084dad08a7c6e575284b177ecafbc71" - version = "v1.2.1" - -[[projects]] - branch = "master" - name = "golang.org/x/crypto" - packages = ["ssh/terminal"] - revision = "88942b9c40a4c9d203b82b3731787b672d6e809b" - -[[projects]] - branch = "master" - name = "golang.org/x/sys" - packages = [ - "unix", - "windows" - ] - revision = "13d03a9a82fba647c21a0ef8fba44a795d0f0835" - -[solve-meta] - analyzer-name = "dep" - analyzer-version = 1 - inputs-digest = "6f385dbd2b09601ea268378bfb9d683a46e71c875d1dbdff49017b511923e4f6" - solver-name = "gps-cdcl" - solver-version = 1 diff --git a/Gopkg.toml b/Gopkg.toml deleted file mode 100644 index f277bb8..0000000 --- a/Gopkg.toml +++ /dev/null @@ -1,50 +0,0 @@ -# Gopkg.toml example -# -# Refer to https://github.com/golang/dep/blob/master/docs/Gopkg.toml.md -# for detailed Gopkg.toml documentation. -# -# required = ["github.com/user/thing/cmd/thing"] -# ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"] -# -# [[constraint]] -# name = "github.com/user/project" -# version = "1.0.0" -# -# [[constraint]] -# name = "github.com/user/project2" -# branch = "dev" -# source = "github.com/myfork/project2" -# -# [[override]] -# name = "github.com/x/y" -# version = "2.4.0" -# -# [prune] -# non-go = false -# go-tests = true -# unused-packages = true - - -[[constraint]] - branch = "master" - name = "github.com/lib/pq" - -[[constraint]] - name = "github.com/nyaruka/ezconf" - version = "0.2.1" - -[[constraint]] - name = "github.com/sirupsen/logrus" - version = "1.0.5" - -[prune] - go-tests = true - unused-packages = true - -[[constraint]] - name = "github.com/stretchr/testify" - version = "1.2.1" - -[[constraint]] - name = "github.com/olivere/elastic" - version = "6.1.14" diff --git a/README.md b/README.md index 96dd4b9..9b8816c 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,14 @@ Simple service for indexing RapidPro contacts into ElasticSearch. -This service can run in two modes: +# Deploying + +As Indexer is a go application, it compiles to a binary and that binary along with the config file is all +you need to run it on your server. You can find bundles for each platform in the +[releases directory](https://github.com/nyaruka/rp-indexer/releases). You should only run a single indexer +instance for a deployment. + +Indexer can run in two modes: 1) the default mode, which simply queries the ElasticSearch database, finds the most recently modified contact, then on a schedule queries the `contacts_contact` table on the RapidPro @@ -14,36 +21,90 @@ all contacts on RapidPro. Once complete, this switches out the alias for the con with the newly build index. This can be run on a cron (in parallel with the mode above) to rebuild your index occasionally to get rid of bloat. -## Usage +# Configuration + +Indexer uses a tiered configuration system, each option takes precendence over the ones above it: + 1. The configuration file + 2. Environment variables starting with `INDEXER_` + 3. Command line parameters + +We recommend running Indexer with no changes to the configuration and no parameters, using only +environment variables to configure it. You can use `% rp-indexer --help` to see a list of the +environment variables and parameters and for more details on each option. -It is recommended to run the service with two environment variables set: +## RapidPro Configuration + +For use with RapidPro, you will want to configure these settings: * `INDEXER_DB`: a URL connection string for your RapidPro database * `INDEXER_ELASTIC_URL`: the URL for your ElasticSearch endpoint + +Recommended settings for error reporting: + + * `INDEXER_SENTRY_DSN`: The DSN to use when logging errors to Sentry + +# Development + +Install Indexer source in your workspace with: + +``` +go get github.com/nyaruka/rp-indexer +``` + +Build Indexer with: + +``` +go build github.com/nyaruka/rp-indexer/cmd/rp-indexer +``` + +This will create a new executable in your current directory `rp-indexer` + +To run the tests you need to create the test database: + +``` +$ createdb elastic_test +``` + +To run all of the tests: + +``` +go test github.com/nyaruka/rp-indexer/... -p=1 +``` + +# Usage ``` Indexes RapidPro contacts to ElasticSearch Usage of indexer: + -cleanup + whether to remove old indexes after a rebuild -db string - the connection string for our database (default "postgres://localhost/rapidpro") + the connection string for our database (default "postgres://localhost/rapidpro?sslmode=disable") -debug-conf - print where config values are coming from + print where config values are coming from -elastic-url string - the url for our elastic search instance (default "http://localhost:9200") + the url for our elastic search instance (default "http://localhost:9200") -help - print usage information + print usage information -index string - the alias for our contact index (default "contacts") + the alias for our contact index (default "contacts") + -log-level string + the log level, one of error, warn, info, debug (default "info") -poll int - the number of seconds to wait between checking for updated contacts (default 5) + the number of seconds to wait between checking for updated contacts (default 5) -rebuild - whether to rebuild the index, swapping it when complete, then exiting (default false) + whether to rebuild the index, swapping it when complete, then exiting (default false) + -sentry-dsn string + the sentry configuration to log errors to, if any Environment variables: + INDEXER_CLEANUP - bool INDEXER_DB - string INDEXER_ELASTIC_URL - string INDEXER_INDEX - string + INDEXER_LOG_LEVEL - string INDEXER_POLL - int INDEXER_REBUILD - bool -``` + INDEXER_SENTRY_DSN - string + ``` diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..724f3f5 --- /dev/null +++ b/go.mod @@ -0,0 +1,24 @@ +module github.com/nyaruka/rp-indexer + +require ( + github.com/certifi/gocertifi v0.0.0-20180118203423-deb3ae2ef261 + github.com/davecgh/go-spew v1.1.0 + github.com/evalphobia/logrus_sentry v0.4.5 + github.com/fatih/structs v1.0.0 + github.com/fortytw2/leaktest v1.3.0 // indirect + github.com/getsentry/raven-go v0.0.0-20180405121644-d1470f50d3a3 + github.com/lib/pq v0.0.0-20180327071824-d34b9ff171c2 + github.com/mailru/easyjson v0.0.0-20180323154445-8b799c424f57 + github.com/naoina/go-stringutil v0.1.0 + github.com/naoina/toml v0.1.1 + github.com/nyaruka/ezconf v0.2.1 + github.com/olivere/elastic v6.1.14+incompatible + github.com/pkg/errors v0.8.0 + github.com/pmezard/go-difflib v1.0.0 + github.com/sirupsen/logrus v1.0.5 + github.com/stretchr/testify v1.2.1 + golang.org/x/crypto v0.0.0-20180322175230-88942b9c40a4 + golang.org/x/sys v0.0.0-20180326154331-13d03a9a82fb + gopkg.in/airbrake/gobrake.v2 v2.0.9 // indirect + gopkg.in/gemnasium/logrus-airbrake-hook.v2 v2.1.2 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..2cb8612 --- /dev/null +++ b/go.sum @@ -0,0 +1,40 @@ +github.com/certifi/gocertifi v0.0.0-20180118203423-deb3ae2ef261 h1:6/yVvBsKeAw05IUj4AzvrxaCnDjN4nUqKjW9+w5wixg= +github.com/certifi/gocertifi v0.0.0-20180118203423-deb3ae2ef261/go.mod h1:GJKEexRPVJrBSOjoqN5VNOIKJ5Q3RViH6eu3puDRwx4= +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/evalphobia/logrus_sentry v0.4.5 h1:weRoBjojMYPp57TLDjPEkP58JVHHSiqNrxG+h3ODdPM= +github.com/evalphobia/logrus_sentry v0.4.5/go.mod h1:pKcp+vriitUqu9KiWj/VRFbRfFNUwz95/UkgG8a6MNc= +github.com/fatih/structs v1.0.0 h1:BrX964Rv5uQ3wwS+KRUAJCBBw5PQmgJfJ6v4yly5QwU= +github.com/fatih/structs v1.0.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M= +github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw= +github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= +github.com/getsentry/raven-go v0.0.0-20180405121644-d1470f50d3a3 h1:md1zEr2oSVWYNfQj+6TL/nmAFf5gY3Tp44lzskzK9QU= +github.com/getsentry/raven-go v0.0.0-20180405121644-d1470f50d3a3/go.mod h1:KungGk8q33+aIAZUIVWZDr2OfAEBsO49PX4NzFV5kcQ= +github.com/lib/pq v0.0.0-20180327071824-d34b9ff171c2 h1:hRGSmZu7j271trc9sneMrpOW7GN5ngLm8YUZIPzf394= +github.com/lib/pq v0.0.0-20180327071824-d34b9ff171c2/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= +github.com/mailru/easyjson v0.0.0-20180323154445-8b799c424f57 h1:qhv1ir3dIyOFmFU+5KqG4dF3zSQTA4nn1DFhu2NQC44= +github.com/mailru/easyjson v0.0.0-20180323154445-8b799c424f57/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +github.com/naoina/go-stringutil v0.1.0 h1:rCUeRUHjBjGTSHl0VC00jUPLz8/F9dDzYI70Hzifhks= +github.com/naoina/go-stringutil v0.1.0/go.mod h1:XJ2SJL9jCtBh+P9q5btrd/Ylo8XwT/h1USek5+NqSA0= +github.com/naoina/toml v0.1.1 h1:PT/lllxVVN0gzzSqSlHEmP8MJB4MY2U7STGxiouV4X8= +github.com/naoina/toml v0.1.1/go.mod h1:NBIhNtsFMo3G2szEBne+bO4gS192HuIYRqfvOWb4i1E= +github.com/nyaruka/ezconf v0.2.1 h1:TDXWoqjqYya1uhou1mAJZg7rgFYL98EB0Tb3+BWtUh0= +github.com/nyaruka/ezconf v0.2.1/go.mod h1:ey182kYkw2MIi4XiWe1FR/mzI33WCmTWuceDYYxgnQw= +github.com/olivere/elastic v6.1.14+incompatible h1:X7PDDou5+WuNrh5WgtS5+gKzbUmSNXvF0mQZ++VsZYU= +github.com/olivere/elastic v6.1.14+incompatible/go.mod h1:J+q1zQJTgAz9woqsbVRqGeB5G1iqDKVBWLNSYW8yfJ8= +github.com/pkg/errors v0.8.0 h1:WdK/asTD0HN+q6hsWO3/vpuAkAr+tw6aNJNDFFf0+qw= +github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/sirupsen/logrus v1.0.5 h1:8c8b5uO0zS4X6RPl/sd1ENwSkIc0/H2PaHxE3udaE8I= +github.com/sirupsen/logrus v1.0.5/go.mod h1:pMByvHTf9Beacp5x1UXfOR9xyW/9antXMhjMPG0dEzc= +github.com/stretchr/testify v1.2.1 h1:52QO5WkIUcHGIR7EnGagH88x1bUzqGXTC5/1bDTUQ7U= +github.com/stretchr/testify v1.2.1/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +golang.org/x/crypto v0.0.0-20180322175230-88942b9c40a4 h1:AJCW0rhPjFKEAoValWpqnRKxX8YV0Xvqfw+dOexCTPc= +golang.org/x/crypto v0.0.0-20180322175230-88942b9c40a4/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/sys v0.0.0-20180326154331-13d03a9a82fb h1:w1eDrzbtlRsu1SyjnZZFUBVuhRN2Rn6DlrHCx9s4ud0= +golang.org/x/sys v0.0.0-20180326154331-13d03a9a82fb/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +gopkg.in/airbrake/gobrake.v2 v2.0.9 h1:7z2uVWwn7oVeeugY1DtlPAy5H+KYgB1KeKTnqjNatLo= +gopkg.in/airbrake/gobrake.v2 v2.0.9/go.mod h1:/h5ZAUhDkGaJfjzjKLSjv6zCL6O0LLBxU4K+aSYdM/U= +gopkg.in/gemnasium/logrus-airbrake-hook.v2 v2.1.2 h1:OAj3g0cR6Dx/R07QgQe8wkA9RNjB2u4i700xBkIT4e0= +gopkg.in/gemnasium/logrus-airbrake-hook.v2 v2.1.2/go.mod h1:Xk6kEKp8OKb+X14hQBKWaSkCsqBpgog8nAV2xsGOxlo= diff --git a/indexer.go b/indexer.go index 680f592..fa51890 100644 --- a/indexer.go +++ b/indexer.go @@ -357,34 +357,34 @@ SELECT org_id, id, modified_on, is_active, row_to_json(t) FROM ( ( SELECT jsonb_agg(f.value) FROM ( - select case - when value ? 'ward' - then jsonb_build_object( - 'ward_keyword', (regexp_matches(value ->> 'ward', '(.* > )?([^>]+)'))[2] - ) - else '{}' :: jsonb - end || district_value.value as value - FROM ( - select case - when value ? 'district' - then jsonb_build_object( - 'district_keyword', (regexp_matches(value ->> 'district', '(.* > )?([^>]+)'))[2] - ) - else '{}' :: jsonb - end || state_value.value as value - FROM ( - - select case - when value ? 'state' - then jsonb_build_object( - 'state_keyword', (regexp_matches(value ->> 'state', '(.* > )?([^>]+)'))[2] - ) - else '{}' :: jsonb - end || - jsonb_build_object('field', key) || value as value - from jsonb_each(contacts_contact.fields) - ) state_value - ) as district_value + select case + when value ? 'ward' + then jsonb_build_object( + 'ward_keyword', trim(substring(value ->> 'ward' from '(?!.* > )([\w ]+)')) + ) + else '{}' :: jsonb + end || district_value.value as value + FROM ( + select case + when value ? 'district' + then jsonb_build_object( + 'district_keyword', trim(substring(value ->> 'district' from '(?!.* > )([\w ]+)')) + ) + else '{}' :: jsonb + end || state_value.value as value + FROM ( + + select case + when value ? 'state' + then jsonb_build_object( + 'state_keyword', trim(substring(value ->> 'state' from '(?!.* > )([\w ]+)')) + ) + else '{}' :: jsonb + end || + jsonb_build_object('field', key) || value as value + from jsonb_each(contacts_contact.fields) + ) state_value + ) as district_value ) as f ) as fields, ( @@ -397,7 +397,7 @@ SELECT org_id, id, modified_on, is_active, row_to_json(t) FROM ( ) g ) as groups FROM contacts_contact - WHERE is_test = FALSE AND modified_on >= $1 + WHERE modified_on >= $1 ORDER BY modified_on ASC LIMIT 500000 ) t; @@ -408,9 +408,9 @@ const indexSettings = ` { "settings": { "index": { - "number_of_shards": 5, + "number_of_shards": 2, "number_of_replicas": 1, - "routing_partition_size": 3 + "routing_partition_size": 1 }, "analysis": { "analyzer": { @@ -433,7 +433,7 @@ const indexSettings = ` "tokenizer": "standard", "filter": [ "lowercase", - "prefix_filter" + "prefix_filter" ] }, "name_search": { @@ -443,7 +443,7 @@ const indexSettings = ` "lowercase", "max_length" ] - } + } }, "tokenizer": { "location_tokenizer": { @@ -465,7 +465,7 @@ const indexSettings = ` } }, "filter": { - "prefix_filter": { + "prefix_filter": { "type": "edge_ngram", "min_gram": 2, "max_gram": 8 @@ -474,7 +474,7 @@ const indexSettings = ` "type": "truncate", "length": 8 } - } + } } }, @@ -492,7 +492,6 @@ const indexSettings = ` }, "text": { "type": "keyword", - "ignore_above": 64, "normalizer": "lowercase" }, "number": { @@ -508,8 +507,7 @@ const indexSettings = ` }, "state_keyword": { "type": "keyword", - "normalizer": "lowercase", - "ignore_above": 64 + "normalizer": "lowercase" }, "district": { "type": "text", @@ -517,8 +515,7 @@ const indexSettings = ` }, "district_keyword": { "type": "keyword", - "normalizer": "lowercase", - "ignore_above": 64 + "normalizer": "lowercase" }, "ward": { "type": "text", @@ -526,8 +523,7 @@ const indexSettings = ` }, "ward_keyword": { "type": "keyword", - "normalizer": "lowercase", - "ignore_above": 64 + "normalizer": "lowercase" } } }, @@ -540,7 +536,6 @@ const indexSettings = ` "fields": { "keyword": { "type": "keyword", - "ignore_above": 64, "normalizer": "lowercase" } } @@ -569,7 +564,7 @@ const indexSettings = ` }, "modified_on_mu": { "type": "long" - }, + }, "name": { "type": "text", "analyzer": "prefix", @@ -577,7 +572,6 @@ const indexSettings = ` "fields": { "keyword": { "type": "keyword", - "ignore_above": 64, "normalizer": "lowercase" } } diff --git a/indexer_test.go b/indexer_test.go index 7e919fc..276355b 100644 --- a/indexer_test.go +++ b/indexer_test.go @@ -70,45 +70,45 @@ func TestIndexing(t *testing.T) { time.Sleep(2 * time.Second) - assertQuery(t, client, physicalName, elastic.NewMatchQuery("name", "JOHn"), []int64{5}) + assertQuery(t, client, physicalName, elastic.NewMatchQuery("name", "JOHn"), []int64{4}) // prefix on name matches both john and joanne, but no ajodi - assertQuery(t, client, physicalName, elastic.NewMatchQuery("name", "JO"), []int64{5, 7}) - assertQuery(t, client, physicalName, elastic.NewTermQuery("name.keyword", "JOHN DOE"), []int64{5}) + assertQuery(t, client, physicalName, elastic.NewMatchQuery("name", "JO"), []int64{4, 6}) + assertQuery(t, client, physicalName, elastic.NewTermQuery("name.keyword", "JOHN DOE"), []int64{4}) // can search on both first and last name boolQuery := elastic.NewBoolQuery().Must( elastic.NewMatchQuery("name", "john"), elastic.NewMatchQuery("name", "doe")) - assertQuery(t, client, physicalName, boolQuery, []int64{5}) + assertQuery(t, client, physicalName, boolQuery, []int64{4}) // can search on a long name - assertQuery(t, client, physicalName, elastic.NewMatchQuery("name", "Ajodinabiff"), []int64{6}) + assertQuery(t, client, physicalName, elastic.NewMatchQuery("name", "Ajodinabiff"), []int64{5}) assertQuery(t, client, physicalName, elastic.NewMatchQuery("language", "eng"), []int64{1}) // test contact, not indexed assertQuery(t, client, physicalName, elastic.NewMatchQuery("language", "fra"), []int64{}) - assertQuery(t, client, physicalName, elastic.NewMatchQuery("is_blocked", "true"), []int64{4}) - assertQuery(t, client, physicalName, elastic.NewMatchQuery("is_stopped", "true"), []int64{3}) + assertQuery(t, client, physicalName, elastic.NewMatchQuery("is_blocked", "true"), []int64{3}) + assertQuery(t, client, physicalName, elastic.NewMatchQuery("is_stopped", "true"), []int64{2}) - assertQuery(t, client, physicalName, elastic.NewMatchQuery("org_id", "1"), []int64{1, 3, 4, 5}) + assertQuery(t, client, physicalName, elastic.NewMatchQuery("org_id", "1"), []int64{1, 2, 3, 4}) // created_on range query - assertQuery(t, client, physicalName, elastic.NewRangeQuery("created_on").Gt("2017-01-01"), []int64{1, 7, 9}) + assertQuery(t, client, physicalName, elastic.NewRangeQuery("created_on").Gt("2017-01-01"), []int64{1, 6, 8}) // urn query query := elastic.NewNestedQuery("urns", elastic.NewBoolQuery().Must( elastic.NewMatchQuery("urns.scheme", "facebook"), elastic.NewMatchQuery("urns.path.keyword", "1000001"))) - assertQuery(t, client, physicalName, query, []int64{9}) + assertQuery(t, client, physicalName, query, []int64{8}) // urn substring query query = elastic.NewNestedQuery("urns", elastic.NewBoolQuery().Must( elastic.NewMatchQuery("urns.scheme", "tel"), elastic.NewMatchPhraseQuery("urns.path", "779"))) - assertQuery(t, client, physicalName, query, []int64{1, 3, 4, 7}) + assertQuery(t, client, physicalName, query, []int64{1, 2, 3, 6}) // urn substring query with more characters (77911) query = elastic.NewNestedQuery("urns", elastic.NewBoolQuery().Must( @@ -120,7 +120,7 @@ func TestIndexing(t *testing.T) { query = elastic.NewNestedQuery("urns", elastic.NewBoolQuery().Must( elastic.NewMatchQuery("urns.scheme", "tel"), elastic.NewMatchPhraseQuery("urns.path", "600055"))) - assertQuery(t, client, physicalName, query, []int64{6}) + assertQuery(t, client, physicalName, query, []int64{5}) // match a contact with multiple tel urns query = elastic.NewNestedQuery("urns", elastic.NewBoolQuery().Must( @@ -139,7 +139,7 @@ func TestIndexing(t *testing.T) { elastic.NewNestedQuery("fields", elastic.NewBoolQuery().Must( elastic.NewMatchQuery("fields.field", "17103bb1-1b48-4b70-92f7-1f6b73bd3488"), elastic.NewExistsQuery("fields.text")))) - assertQuery(t, client, physicalName, notQuery, []int64{3, 4, 5, 6, 7, 8, 9, 10}) + assertQuery(t, client, physicalName, notQuery, []int64{2, 3, 4, 5, 6, 7, 8, 9}) // no tokenizing of field text query = elastic.NewNestedQuery("fields", elastic.NewBoolQuery().Must( @@ -151,24 +151,24 @@ func TestIndexing(t *testing.T) { query = elastic.NewNestedQuery("fields", elastic.NewBoolQuery().Must( elastic.NewMatchQuery("fields.field", "05bca1cd-e322-4837-9595-86d0d85e5adb"), elastic.NewRangeQuery("fields.number").Gt(10))) - assertQuery(t, client, physicalName, query, []int64{3}) + assertQuery(t, client, physicalName, query, []int64{2}) // datetime field range query query = elastic.NewNestedQuery("fields", elastic.NewBoolQuery().Must( elastic.NewMatchQuery("fields.field", "e0eac267-463a-4c00-9732-cab62df07b16"), elastic.NewRangeQuery("fields.datetime").Lt(time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC)))) - assertQuery(t, client, physicalName, query, []int64{4}) + assertQuery(t, client, physicalName, query, []int64{3}) // state query query = elastic.NewNestedQuery("fields", elastic.NewBoolQuery().Must( elastic.NewMatchQuery("fields.field", "22d11697-edba-4186-b084-793e3b876379"), elastic.NewMatchPhraseQuery("fields.state", "washington"))) - assertQuery(t, client, physicalName, query, []int64{6}) + assertQuery(t, client, physicalName, query, []int64{5}) query = elastic.NewNestedQuery("fields", elastic.NewBoolQuery().Must( elastic.NewMatchQuery("fields.field", "22d11697-edba-4186-b084-793e3b876379"), elastic.NewMatchQuery("fields.state_keyword", " washington"))) - assertQuery(t, client, physicalName, query, []int64{6}) + assertQuery(t, client, physicalName, query, []int64{5}) // doesn't include country query = elastic.NewNestedQuery("fields", elastic.NewBoolQuery().Must( @@ -185,29 +185,29 @@ func TestIndexing(t *testing.T) { query = elastic.NewNestedQuery("fields", elastic.NewBoolQuery().Must( elastic.NewMatchQuery("fields.field", "fcab2439-861c-4832-aa54-0c97f38f24ab"), elastic.NewMatchPhraseQuery("fields.district", "king"))) - assertQuery(t, client, physicalName, query, []int64{8, 10}) + assertQuery(t, client, physicalName, query, []int64{7, 9}) // phrase matches all query = elastic.NewNestedQuery("fields", elastic.NewBoolQuery().Must( elastic.NewMatchQuery("fields.field", "fcab2439-861c-4832-aa54-0c97f38f24ab"), - elastic.NewMatchPhraseQuery("fields.district", "King County"))) - assertQuery(t, client, physicalName, query, []int64{8}) + elastic.NewMatchPhraseQuery("fields.district", "King Côunty"))) + assertQuery(t, client, physicalName, query, []int64{7}) query = elastic.NewNestedQuery("fields", elastic.NewBoolQuery().Must( elastic.NewMatchQuery("fields.field", "fcab2439-861c-4832-aa54-0c97f38f24ab"), - elastic.NewMatchQuery("fields.district_keyword", "King County"))) - assertQuery(t, client, physicalName, query, []int64{8}) + elastic.NewMatchQuery("fields.district_keyword", "King Côunty"))) + assertQuery(t, client, physicalName, query, []int64{7}) // ward query query = elastic.NewNestedQuery("fields", elastic.NewBoolQuery().Must( elastic.NewMatchQuery("fields.field", "a551ade4-e5a0-4d83-b185-53b515ad2f2a"), elastic.NewMatchPhraseQuery("fields.ward", "district"))) - assertQuery(t, client, physicalName, query, []int64{9}) + assertQuery(t, client, physicalName, query, []int64{8}) query = elastic.NewNestedQuery("fields", elastic.NewBoolQuery().Must( elastic.NewMatchQuery("fields.field", "a551ade4-e5a0-4d83-b185-53b515ad2f2a"), elastic.NewMatchQuery("fields.ward_keyword", "central district"))) - assertQuery(t, client, physicalName, query, []int64{9}) + assertQuery(t, client, physicalName, query, []int64{8}) // no substring though on keyword query = elastic.NewNestedQuery("fields", elastic.NewBoolQuery().Must( @@ -217,7 +217,7 @@ func TestIndexing(t *testing.T) { // group query assertQuery(t, client, physicalName, elastic.NewMatchQuery("groups", "4ea0f313-2f62-4e57-bdf0-232b5191dd57"), []int64{1}) - assertQuery(t, client, physicalName, elastic.NewMatchQuery("groups", "529bac39-550a-4d6f-817c-1833f3449007"), []int64{1, 3}) + assertQuery(t, client, physicalName, elastic.NewMatchQuery("groups", "529bac39-550a-4d6f-817c-1833f3449007"), []int64{1, 2}) assertQuery(t, client, physicalName, elastic.NewMatchQuery("groups", "4c016340-468d-4675-a974-15cb7a45a5ab"), []int64{}) lastModified, err := GetLastModified(elasticURL, physicalName) @@ -230,7 +230,7 @@ func TestIndexing(t *testing.T) { time.Sleep(5 * time.Second) // try a test query to check it worked - assertQuery(t, client, indexName, elastic.NewMatchQuery("name", "john"), []int64{5}) + assertQuery(t, client, indexName, elastic.NewMatchQuery("name", "john"), []int64{4}) // look up our mapping physical := FindPhysicalIndexes(elasticURL, indexName) @@ -265,7 +265,7 @@ func TestIndexing(t *testing.T) { assert.Equal(t, resp.StatusCode, http.StatusNotFound) // new index still works - assertQuery(t, client, newIndex, elastic.NewMatchQuery("name", "john"), []int64{5}) + assertQuery(t, client, newIndex, elastic.NewMatchQuery("name", "john"), []int64{4}) // update our database, removing one contact, updating another dbUpdate, err := ioutil.ReadFile("testdb_update.sql") @@ -281,7 +281,7 @@ func TestIndexing(t *testing.T) { time.Sleep(5 * time.Second) // should only match new john, old john is gone - assertQuery(t, client, indexName, elastic.NewMatchQuery("name", "john"), []int64{3}) + assertQuery(t, client, indexName, elastic.NewMatchQuery("name", "john"), []int64{2}) // 3 is no longer in our group assertQuery(t, client, indexName, elastic.NewMatchQuery("groups", "529bac39-550a-4d6f-817c-1833f3449007"), []int64{1}) diff --git a/testdb.sql b/testdb.sql index 6422980..7abd916 100644 --- a/testdb.sql +++ b/testdb.sql @@ -91,37 +91,35 @@ ALTER SEQUENCE contacts_contactgroup_contacts_id_seq OWNED BY contacts_contactgr INSERT INTO contacts_contact(id, is_active, created_by_id, created_on, modified_by_id, modified_on, org_id, is_blocked, name, is_test, language, uuid, is_stopped, fields) VALUES (1, TRUE, -1, '2017-11-10 21:11:59.890662+00', -1, '2017-11-10 21:11:59.890662+00', 1, FALSE, NULL, FALSE, 'eng', 'c7a2dd87-a80e-420b-8431-ca48d422e924', FALSE, '{ "17103bb1-1b48-4b70-92f7-1f6b73bd3488": {"text": "the rock"}}'), -(2, TRUE, -1, '2015-03-25 17:15:12.982168+00', -1, '2015-03-25 17:15:12.982168+00', 1, FALSE, NULL, TRUE, 'fra', '1ad43adc-c4fc-4244-8b3d-a938b8eba57a', FALSE, NULL), -(3, TRUE, -1, '2015-03-26 10:07:14.054521+00', -1, '2015-03-26 10:07:14.054521+00', 1, FALSE, NULL, FALSE, NULL, '7a6606c7-ff41-4203-aa98-454a10d37209', TRUE, +(2, TRUE, -1, '2015-03-26 10:07:14.054521+00', -1, '2015-03-26 10:07:14.054521+00', 1, FALSE, NULL, FALSE, NULL, '7a6606c7-ff41-4203-aa98-454a10d37209', TRUE, '{ "05bca1cd-e322-4837-9595-86d0d85e5adb": {"text": "11", "number": 11 }}'), -(4, TRUE, -1, '2015-03-26 13:04:58.699648+00', -1, '2015-03-26 13:04:58.699648+00', 1, TRUE, NULL, FALSE, NULL, '29b45297-15ad-4061-a7d4-e0b33d121541', FALSE, +(3, TRUE, -1, '2015-03-26 13:04:58.699648+00', -1, '2015-03-26 13:04:58.699648+00', 1, TRUE, NULL, FALSE, NULL, '29b45297-15ad-4061-a7d4-e0b33d121541', FALSE, '{ "05bca1cd-e322-4837-9595-86d0d85e5adb": {"text": "9", "number": 9 }, "e0eac267-463a-4c00-9732-cab62df07b16": { "text": "2018-04-06T18:37:59+00:00", "datetime": "2018-04-06T18:37:59+00:00"}}'), -(5, TRUE, -1, '2015-03-27 07:39:28.955051+00', -1, '2015-03-27 07:39:28.955051+00', 1, FALSE, 'John Doe', FALSE, NULL, '51762bba-01a2-4c4e-b5cd-b182d0405cd4', FALSE, +(4, TRUE, -1, '2015-03-27 07:39:28.955051+00', -1, '2015-03-27 07:39:28.955051+00', 1, FALSE, 'John Doe', FALSE, NULL, '51762bba-01a2-4c4e-b5cd-b182d0405cd4', FALSE, '{ "e0eac267-463a-4c00-9732-cab62df07b16": { "text": "2030-04-06T18:37:59+00:00", "datetime": "2030-04-06T18:37:59+00:00"}}'), -(6, TRUE, -1, '2015-10-30 19:42:27.001837+00', -1, '2015-10-30 19:42:27.001837+00', 2, FALSE, 'Ajodinabiff Dane', FALSE, NULL, '3e814add-e614-41f7-8b5d-a07f670a698f', FALSE, +(5, TRUE, -1, '2015-10-30 19:42:27.001837+00', -1, '2015-10-30 19:42:27.001837+00', 2, FALSE, 'Ajodinabiff Dane', FALSE, NULL, '3e814add-e614-41f7-8b5d-a07f670a698f', FALSE, '{ "22d11697-edba-4186-b084-793e3b876379": { "text": "USA > Washington", "state": "USA > Washington"} }'), -(7, TRUE, -1, '2017-11-10 21:11:59.890662+00', -1, '2017-11-10 21:11:59.890662+00', 2, FALSE, 'Joanne Stone', FALSE, NULL, '7051dff0-0a27-49d7-af1f-4494239139e6', FALSE, +(6, TRUE, -1, '2017-11-10 21:11:59.890662+00', -1, '2017-11-10 21:11:59.890662+00', 2, FALSE, 'Joanne Stone', FALSE, NULL, '7051dff0-0a27-49d7-af1f-4494239139e6', FALSE, '{ "22d11697-edba-4186-b084-793e3b876379": { "text": "USA > Colorado", "state": "USA > Colorado"} }'), -(8, TRUE, -1, '2015-03-27 13:39:43.995812+00', -1, '2015-03-27 13:39:43.995812+00', 2, FALSE, NULL, FALSE, NULL, 'b46f6e18-95b4-4984-9926-dded047f4eb3', FALSE, -'{ "fcab2439-861c-4832-aa54-0c97f38f24ab": { "text": "USA > Washington > King County", "district": "USA > Washington > King County"} }'), -(9, TRUE, -1, '2017-11-10 21:11:59.890662+00', -1, '2017-11-10 21:11:59.890662+00', 2, FALSE, NULL, FALSE, NULL, '9195c8b7-6138-4d84-ac56-5192cc3d8ceb', FALSE, -'{ "a551ade4-e5a0-4d83-b185-53b515ad2f2a": { "text": "USA > Washington > King County > Central District", "ward": "USA > Washington > King County > Central District"} }'), -(10, TRUE, -1, '2016-08-22 14:20:05.690311+00', -1, '2016-08-22 14:20:05.690311+00', 2, FALSE, NULL, FALSE, NULL, '2b8bd28d-43e0-4c34-a4bb-0f10b11fdb8a', FALSE, +(7, TRUE, -1, '2015-03-27 13:39:43.995812+00', -1, '2015-03-27 13:39:43.995812+00', 2, FALSE, NULL, FALSE, NULL, 'b46f6e18-95b4-4984-9926-dded047f4eb3', FALSE, +'{ "fcab2439-861c-4832-aa54-0c97f38f24ab": { "text": "USA > Washington > King Côunty", "district": "USA > Washington > King Côunty"} }'), +(8, TRUE, -1, '2017-11-10 21:11:59.890662+00', -1, '2017-11-10 21:11:59.890662+00', 2, FALSE, NULL, FALSE, NULL, '9195c8b7-6138-4d84-ac56-5192cc3d8ceb', FALSE, +'{ "a551ade4-e5a0-4d83-b185-53b515ad2f2a": { "text": "USA > Washington > King Côunty > Central District", "ward": "USA > Washington > King Côunty > Central District"} }'), +(9, TRUE, -1, '2016-08-22 14:20:05.690311+00', -1, '2016-08-22 14:20:05.690311+00', 2, FALSE, NULL, FALSE, NULL, '2b8bd28d-43e0-4c34-a4bb-0f10b11fdb8a', FALSE, '{ "fcab2439-861c-4832-aa54-0c97f38f24ab": { "text": "USA > Colorado > King", "district": "USA > Colorado > King"} }'); INSERT INTO contacts_contacturn(id, contact_id, scheme, org_id, priority, path, display, identity) VALUES (1, 1, 'tel', 1, 50, '+12067791111', NULL, 'tel:+12067791111'), (2, 1, 'tel', 1, 50, '+12067792222', NULL, 'tel:+12067792222'), -(3, 2, 'tel', 1, 50, '+12067793333', NULL, 'tel:+12067793333'), -(4, 3, 'tel', 1, 50, '+12067794444', NULL, 'tel:+12067794444'), -(5, 4, 'tel', 1, 50, '+12067795555', NULL, 'tel:+12067795555'), -(6, 5, 'tel', 1, 50, '+12060000556', NULL, 'tel:+12067796666'), -(7, 6, 'tel', 2, 50, '+12060005577', NULL, 'tel:+12067797777'), -(8, 7, 'tel', 2, 50, '+12067798888', NULL, 'tel:+12067798888'), -(9, 8, 'viber', 2, 90, 'viberpath==', NULL, 'viber:viberpath=='), -(10, 9, 'facebook', 2, 90, 1000001, 'funguy', 'facebook:1000001'), -(11, 10, 'twitterid', 2, 90, 1000001, 'fungal', 'twitterid:1000001'), -(12, 11, 'whatsapp', 2, 90, 1000003, NULL, 'whatsapp:1000003'); +(3, 2, 'tel', 1, 50, '+12067794444', NULL, 'tel:+12067794444'), +(4, 3, 'tel', 1, 50, '+12067795555', NULL, 'tel:+12067795555'), +(5, 4, 'tel', 1, 50, '+12060000556', NULL, 'tel:+12067796666'), +(6, 5, 'tel', 2, 50, '+12060005577', NULL, 'tel:+12067797777'), +(7, 6, 'tel', 2, 50, '+12067798888', NULL, 'tel:+12067798888'), +(8, 7, 'viber', 2, 90, 'viberpath==', NULL, 'viber:viberpath=='), +(9, 8, 'facebook', 2, 90, 1000001, 'funguy', 'facebook:1000001'), +(10, 9, 'twitterid', 2, 90, 1000001, 'fungal', 'twitterid:1000001'), +(11, 10, 'whatsapp', 2, 90, 1000003, NULL, 'whatsapp:1000003'); INSERT INTO contacts_contactgroup(id, uuid, name) VALUES (1, '4ea0f313-2f62-4e57-bdf0-232b5191dd57', 'Group 1'), @@ -131,6 +129,5 @@ INSERT INTO contacts_contactgroup(id, uuid, name) VALUES INSERT INTO contacts_contactgroup_contacts(id, contact_id, contactgroup_id) VALUES (1, 1, 1), -(2, 2, 1), -(3, 1, 4), -(4, 3, 4); +(2, 1, 4), +(3, 2, 4); diff --git a/testdb_update.sql b/testdb_update.sql index af2cbf0..e551846 100644 --- a/testdb_update.sql +++ b/testdb_update.sql @@ -1,8 +1,8 @@ -- update one of our contacts -DELETE FROM contacts_contactgroup_contacts WHERE id = 4; -UPDATE contacts_contact SET name = 'John Deer', modified_on = '2020-08-20 14:00:00+00' where id = 3; +DELETE FROM contacts_contactgroup_contacts WHERE id = 3; +UPDATE contacts_contact SET name = 'John Deer', modified_on = '2020-08-20 14:00:00+00' where id = 2; -- delete one of our others -UPDATE contacts_contact SET is_active = FALSE, modified_on = '2020-08-22 15:00:00+00' where id = 5; +UPDATE contacts_contact SET is_active = FALSE, modified_on = '2020-08-22 15:00:00+00' where id = 4;