From db54150e71e81662b8d2c17d32f2ee50be6bdf5e Mon Sep 17 00:00:00 2001 From: Rowan Seymour Date: Fri, 4 Jun 2021 12:35:53 -0500 Subject: [PATCH] Fix creating of location keyword fields when values have punctuation --- indexer.go | 6 +++--- indexer_test.go | 4 ++-- testdb.sql | 54 ++++++++++++++++++++++++++++++++----------------- 3 files changed, 41 insertions(+), 23 deletions(-) diff --git a/indexer.go b/indexer.go index 7d0ea67..8e62daa 100644 --- a/indexer.go +++ b/indexer.go @@ -399,7 +399,7 @@ SELECT org_id, id, modified_on, is_active, row_to_json(t) FROM ( select case when value ? 'ward' then jsonb_build_object( - 'ward_keyword', trim(substring(value ->> 'ward' from '(?!.* > )([\w ]+)')) + 'ward_keyword', trim(substring(value ->> 'ward' from '(?!.* > )([^>]+)')) ) else '{}' :: jsonb end || district_value.value as value @@ -407,7 +407,7 @@ SELECT org_id, id, modified_on, is_active, row_to_json(t) FROM ( select case when value ? 'district' then jsonb_build_object( - 'district_keyword', trim(substring(value ->> 'district' from '(?!.* > )([\w ]+)')) + 'district_keyword', trim(substring(value ->> 'district' from '(?!.* > )([^>]+)')) ) else '{}' :: jsonb end || state_value.value as value @@ -416,7 +416,7 @@ SELECT org_id, id, modified_on, is_active, row_to_json(t) FROM ( select case when value ? 'state' then jsonb_build_object( - 'state_keyword', trim(substring(value ->> 'state' from '(?!.* > )([\w ]+)')) + 'state_keyword', trim(substring(value ->> 'state' from '(?!.* > )([^>]+)')) ) else '{}' :: jsonb end || diff --git a/indexer_test.go b/indexer_test.go index 41d2a8f..9ce9818 100644 --- a/indexer_test.go +++ b/indexer_test.go @@ -202,12 +202,12 @@ func TestIndexing(t *testing.T) { // phrase matches all query = elastic.NewNestedQuery("fields", elastic.NewBoolQuery().Must( elastic.NewMatchQuery("fields.field", "fcab2439-861c-4832-aa54-0c97f38f24ab"), - elastic.NewMatchPhraseQuery("fields.district", "King Côunty"))) + elastic.NewMatchPhraseQuery("fields.district", "King-Côunty"))) assertQuery(t, client, physicalName, query, []int64{7}) query = elastic.NewNestedQuery("fields", elastic.NewBoolQuery().Must( elastic.NewMatchQuery("fields.field", "fcab2439-861c-4832-aa54-0c97f38f24ab"), - elastic.NewMatchQuery("fields.district_keyword", "King Côunty"))) + elastic.NewMatchQuery("fields.district_keyword", "King-Côunty"))) assertQuery(t, client, physicalName, query, []int64{7}) // ward query diff --git a/testdb.sql b/testdb.sql index 76bb79c..5914f8b 100644 --- a/testdb.sql +++ b/testdb.sql @@ -88,24 +88,42 @@ ALTER SEQUENCE contacts_contactgroup_contacts_id_seq OWNED BY contacts_contactgr -- a551ade4-e5a0-4d83-b185-53b515ad2f2a - home_ward (ward) INSERT INTO contacts_contact(id, is_active, created_by_id, created_on, modified_by_id, modified_on, last_seen_on, org_id, status, name, language, uuid, fields) VALUES -(1, TRUE, -1, '2017-11-10 21:11:59.890662+00', -1, '2017-11-10 21:11:59.890662+00', '2020-08-04 21:11', 1, 'A', NULL, 'eng', 'c7a2dd87-a80e-420b-8431-ca48d422e924', -'{ "17103bb1-1b48-4b70-92f7-1f6b73bd3488": {"text": "the rock"}}'), -(2, TRUE, -1, '2015-03-26 10:07:14.054521+00', -1, '2015-03-26 10:07:14.054521+00', '2020-08-03 13:11', 1, 'S', NULL, NULL, '7a6606c7-ff41-4203-aa98-454a10d37209', -'{ "05bca1cd-e322-4837-9595-86d0d85e5adb": {"text": "11", "number": 11 }}'), -(3, TRUE, -1, '2015-03-26 13:04:58.699648+00', -1, '2015-03-26 13:04:58.699648+00', '2018-05-04 21:11', 1, 'B', NULL, NULL, '29b45297-15ad-4061-a7d4-e0b33d121541', -'{ "05bca1cd-e322-4837-9595-86d0d85e5adb": {"text": "9", "number": 9 }, "e0eac267-463a-4c00-9732-cab62df07b16": { "text": "2018-04-06T18:37:59+00:00", "datetime": "2018-04-06T18:37:59+00:00"}}'), -(4, TRUE, -1, '2015-03-27 07:39:28.955051+00', -1, '2015-03-27 07:39:28.955051+00', '2015-12-31 23:59', 1, 'A', 'John Doe', NULL, '51762bba-01a2-4c4e-b5cd-b182d0405cd4', -'{ "e0eac267-463a-4c00-9732-cab62df07b16": { "text": "2030-04-06T18:37:59+00:00", "datetime": "2030-04-06T18:37:59+00:00"}}'), -(5, TRUE, -1, '2015-10-30 19:42:27.001837+00', -1, '2015-10-30 19:42:27.001837+00', '2020-08-04 21:11', 2, 'A', 'Ajodinabiff Dane', NULL, '3e814add-e614-41f7-8b5d-a07f670a698f', -'{ "22d11697-edba-4186-b084-793e3b876379": { "text": "USA > Washington", "state": "USA > Washington"} }'), -(6, TRUE, -1, '2017-11-10 21:11:59.890662+00', -1, '2017-11-10 21:11:59.890662+00', '2020-08-04 21:00', 2, 'A', 'Joanne Stone', NULL, '7051dff0-0a27-49d7-af1f-4494239139e6', -'{ "22d11697-edba-4186-b084-793e3b876379": { "text": "USA > Colorado", "state": "USA > Colorado"} }'), -(7, TRUE, -1, '2015-03-27 13:39:43.995812+00', -1, '2015-03-27 13:39:43.995812+00', NULL, 2, 'A', NULL, NULL, 'b46f6e18-95b4-4984-9926-dded047f4eb3', -'{ "fcab2439-861c-4832-aa54-0c97f38f24ab": { "text": "USA > Washington > King Côunty", "district": "USA > Washington > King Côunty"} }'), -(8, TRUE, -1, '2017-11-10 21:11:59.890662+00', -1, '2017-11-10 21:11:59.890662+00', NULL, 2, 'A', NULL, NULL, '9195c8b7-6138-4d84-ac56-5192cc3d8ceb', -'{ "a551ade4-e5a0-4d83-b185-53b515ad2f2a": { "text": "USA > Washington > King Côunty > Central District", "ward": "USA > Washington > King Côunty > Central District"} }'), -(9, TRUE, -1, '2016-08-22 14:20:05.690311+00', -1, '2016-08-22 14:20:05.690311+00', NULL, 2, 'A', NULL, NULL, '2b8bd28d-43e0-4c34-a4bb-0f10b11fdb8a', -'{ "fcab2439-861c-4832-aa54-0c97f38f24ab": { "text": "USA > Colorado > King", "district": "USA > Colorado > King"} }'); +( + 1, TRUE, -1, '2017-11-10 21:11:59.890662+00', -1, '2017-11-10 21:11:59.890662+00', '2020-08-04 21:11', 1, 'A', NULL, 'eng', 'c7a2dd87-a80e-420b-8431-ca48d422e924', + '{ "17103bb1-1b48-4b70-92f7-1f6b73bd3488": {"text": "the rock"}}' +), +( + 2, TRUE, -1, '2015-03-26 10:07:14.054521+00', -1, '2015-03-26 10:07:14.054521+00', '2020-08-03 13:11', 1, 'S', NULL, NULL, '7a6606c7-ff41-4203-aa98-454a10d37209', + '{ "05bca1cd-e322-4837-9595-86d0d85e5adb": {"text": "11", "number": 11 }}' +), +( + 3, TRUE, -1, '2015-03-26 13:04:58.699648+00', -1, '2015-03-26 13:04:58.699648+00', '2018-05-04 21:11', 1, 'B', NULL, NULL, '29b45297-15ad-4061-a7d4-e0b33d121541', + '{ "05bca1cd-e322-4837-9595-86d0d85e5adb": {"text": "9", "number": 9 }, "e0eac267-463a-4c00-9732-cab62df07b16": { "text": "2018-04-06T18:37:59+00:00", "datetime": "2018-04-06T18:37:59+00:00"}}' +), +( + 4, TRUE, -1, '2015-03-27 07:39:28.955051+00', -1, '2015-03-27 07:39:28.955051+00', '2015-12-31 23:59', 1, 'A', 'John Doe', NULL, '51762bba-01a2-4c4e-b5cd-b182d0405cd4', + '{ "e0eac267-463a-4c00-9732-cab62df07b16": { "text": "2030-04-06T18:37:59+00:00", "datetime": "2030-04-06T18:37:59+00:00"}}' + ), +( + 5, TRUE, -1, '2015-10-30 19:42:27.001837+00', -1, '2015-10-30 19:42:27.001837+00', '2020-08-04 21:11', 2, 'A', 'Ajodinabiff Dane', NULL, '3e814add-e614-41f7-8b5d-a07f670a698f', + '{ "22d11697-edba-4186-b084-793e3b876379": { "text": "USA > Washington", "state": "USA > Washington"} }' +), +( + 6, TRUE, -1, '2017-11-10 21:11:59.890662+00', -1, '2017-11-10 21:11:59.890662+00', '2020-08-04 21:00', 2, 'A', 'Joanne Stone', NULL, '7051dff0-0a27-49d7-af1f-4494239139e6', + '{ "22d11697-edba-4186-b084-793e3b876379": { "text": "USA > Colorado", "state": "USA > Colorado"} }' +), +( + 7, TRUE, -1, '2015-03-27 13:39:43.995812+00', -1, '2015-03-27 13:39:43.995812+00', NULL, 2, 'A', NULL, NULL, 'b46f6e18-95b4-4984-9926-dded047f4eb3', + '{ "fcab2439-861c-4832-aa54-0c97f38f24ab": { "text": "USA > Washington > King-Côunty", "district": "USA > Washington > King-Côunty"} }' +), +( + 8, TRUE, -1, '2017-11-10 21:11:59.890662+00', -1, '2017-11-10 21:11:59.890662+00', NULL, 2, 'A', NULL, NULL, '9195c8b7-6138-4d84-ac56-5192cc3d8ceb', + '{ "a551ade4-e5a0-4d83-b185-53b515ad2f2a": { "text": "USA > Washington > King-Côunty > Central District", "ward": "USA > Washington > King-Côunty > Central District"} }' +), +( + 9, TRUE, -1, '2016-08-22 14:20:05.690311+00', -1, '2016-08-22 14:20:05.690311+00', NULL, 2, 'A', NULL, NULL, '2b8bd28d-43e0-4c34-a4bb-0f10b11fdb8a', + '{ "fcab2439-861c-4832-aa54-0c97f38f24ab": { "text": "USA > Colorado > King", "district": "USA > Colorado > King"} }' +); INSERT INTO contacts_contacturn(id, contact_id, scheme, org_id, priority, path, display, identity) VALUES (1, 1, 'tel', 1, 50, '+12067791111', NULL, 'tel:+12067791111'),