From 4017c2b4c965f0af2f3f90e9212652c76a645c2f Mon Sep 17 00:00:00 2001 From: Rhys St Romaine Date: Wed, 23 Oct 2024 15:33:26 +0100 Subject: [PATCH 1/4] Add special characters to whitelist --- api/search.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/search.go b/api/search.go index 2e39af36..120de079 100644 --- a/api/search.go +++ b/api/search.go @@ -835,7 +835,7 @@ func sanitiseDoubleQuotes(str string) string { } func checkForSpecialCharacters(str string) bool { - re := regexp.MustCompile("[[:^ascii:]]") + re := regexp.MustCompile("[[:^ascii:]&&[^–‘’]]") return re.MatchString(str) } From 58e9ebba1450eca4700d3f67f55dcf776921f186 Mon Sep 17 00:00:00 2001 From: Rhys St Romaine Date: Wed, 23 Oct 2024 16:02:22 +0100 Subject: [PATCH 2/4] Update special character regex --- api/search.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/search.go b/api/search.go index 120de079..2350d7c9 100644 --- a/api/search.go +++ b/api/search.go @@ -835,7 +835,7 @@ func sanitiseDoubleQuotes(str string) string { } func checkForSpecialCharacters(str string) bool { - re := regexp.MustCompile("[[:^ascii:]&&[^–‘’]]") + re := regexp.MustCompile("[^[:ascii:]–‘’]") return re.MatchString(str) } From 19b4e67955227ea007a9d24581f747a920e9db47 Mon Sep 17 00:00:00 2001 From: Rhys St Romaine Date: Wed, 23 Oct 2024 16:02:45 +0100 Subject: [PATCH 3/4] Update tests --- api/search_test.go | 6 ++++++ features/search.feature | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/api/search_test.go b/api/search_test.go index d73bc031..cf66efd0 100644 --- a/api/search_test.go +++ b/api/search_test.go @@ -106,6 +106,12 @@ func TestCheckForSpecialCharacters(t *testing.T) { c.So(actual, c.ShouldEqual, expected) }) + c.Convey("A string containing whitelisted special characters should return false", t, func() { + expected := false + actual := checkForSpecialCharacters("Test string –‘’") + c.So(actual, c.ShouldEqual, expected) + }) + c.Convey("A string containing special characters should return true", t, func() { expected := true actual := checkForSpecialCharacters("Test 怎么开 string") diff --git a/features/search.feature b/features/search.feature index ad26ab3f..7f9abd11 100644 --- a/features/search.feature +++ b/features/search.feature @@ -57,3 +57,11 @@ Feature: Search endpoint should return data for requested search parameter """ invalid URI prefix parameter """ + + Scenario: When Searching with whitelisted special characters I get the expected results + Given elasticsearch is healthy + And elasticsearch returns one item in search response + When I GET "/search?q=CPI–‘’" + Then the HTTP status code should be "200" + And the response header "Content-Type" should be "application/json;charset=utf-8" + And the response body is the same as the json in "./features/testdata/expected_single_search_result.json" From eed9944c5283e48e35960355d87f2eec1d399e73 Mon Sep 17 00:00:00 2001 From: Rhys St Romaine Date: Thu, 24 Oct 2024 08:56:32 +0100 Subject: [PATCH 4/4] Update special character regex and whitelist --- api/search.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/api/search.go b/api/search.go index 2350d7c9..ba2bfc5d 100644 --- a/api/search.go +++ b/api/search.go @@ -71,6 +71,9 @@ var defaultContentTypes = []string{ "timeseries_dataset", } +// contains the special characters that are allowed in query validation +const AllowedSpecialCharacters = "–‘’" + type URIsRequest struct { URIs []string `json:"uris"` Limit int `json:"limit,omitempty"` // Limit is optional @@ -835,7 +838,7 @@ func sanitiseDoubleQuotes(str string) string { } func checkForSpecialCharacters(str string) bool { - re := regexp.MustCompile("[^[:ascii:]–‘’]") + re := regexp.MustCompile(fmt.Sprintf("[^[:ascii:]%s]", regexp.QuoteMeta(AllowedSpecialCharacters))) return re.MatchString(str) }