diff --git a/src/Aws/CloudSearch/RequestParameter/Query/Builder/LuceneQueryParametersBuilder.php b/src/Aws/CloudSearch/RequestParameter/Query/Builder/LuceneQueryParametersBuilder.php new file mode 100644 index 0000000..5b8b554 --- /dev/null +++ b/src/Aws/CloudSearch/RequestParameter/Query/Builder/LuceneQueryParametersBuilder.php @@ -0,0 +1,172 @@ + */ + private array $searchByFields = []; + + public function __construct(string $rawQuery) + { + $this->rawQuery = $rawQuery; + } + + public static function forRawQuery(string $query): self + { + return new self($query); + } + + public function useExactMatchingForIndividualWords(): self + { + $this->shouldUseExactMatchingForIndividualWords = true; + + return $this; + } + + public function useWildcardMatching(): self + { + $this->shouldUseWildcardMatching = true; + + return $this; + } + + public function searchByFields(string ...$fields): self + { + $this->searchByFields = $fields; + + return $this; + } + + public function build(): QueryParameters + { + if (false === $this->shouldUseWildcardMatching && false === $this->shouldUseExactMatchingForIndividualWords) { + return QueryParameters::usingLucene($this->escapeString($this->rawQuery)); + } + + $queryParts = []; + $queryTokens = $this->filterOutEmptyElements(explode(' ', $this->rawQuery)); + + foreach ($queryTokens as $queryToken) { + $queryTokenParts = []; + + if (true === $this->shouldUseExactMatchingForIndividualWords) { + $escapedQueryToken = $this->escapeString($queryToken); + $queryTokenParts[] = $this->buildQueryForMatchPattern($escapedQueryToken, self::EXACT_MATCH_PATTERN); + } + + if (true === $this->shouldUseWildcardMatching) { + $queryTokenParts[] = $this->buildQueryPartWithWildcard($queryToken); + } + + $queryParts[] = $this->joinPartsWithOperator($queryTokenParts, self::OPERATOR_OR); + } + + return QueryParameters::usingLucene($this->joinPartsWithOperator($queryParts, self::OPERATOR_AND)); + } + + private function buildQueryForMatchPattern(string $queryToken, string $matchPattern): string + { + if (empty($this->searchByFields)) { + return sprintf($matchPattern, $queryToken); + } + + $matchPatternWithField = sprintf( + '%s %s', + self::SEARCH_FIELD_PATTERN, + $matchPattern + ); + $queryTokenParts = array_map( + static fn (string $searchField) => sprintf($matchPatternWithField, $searchField, $queryToken), + $this->searchByFields + ); + + return $this->joinPartsWithOperator($queryTokenParts, self::OPERATOR_OR); + } + + private function joinPartsWithOperator(array $queryParts, string $operator): string + { + $queryParts = $this->filterOutEmptyElements($queryParts); + + return sprintf( + count($queryParts) > 1 ? '(%s)' : '%s', + implode( + 'OR' === $operator ? ' OR ' : ' AND ', + $queryParts + ) + ); + } + + private function buildQueryPartWithWildcard(string $queryToken): string + { + $hasAnyOfTheSpecialCharacters = strpbrk($queryToken, implode('', self::SPECIAL_CHARACTERS)); + + if (false === $hasAnyOfTheSpecialCharacters) { + return $this->buildQueryForMatchPattern($queryToken, self::WILDCARD_MATCH_PATTERN); + } + + // Wildcard query is not tokenized on CloudSearch side, so we need to do it here to get accurate results. + // Currently there is no spaces in $queryToken, so only special characters must be used to tokenize the query. + // More details about this case: + // https://jaygurnaniblog.wordpress.com/2017/05/03/lucene-parsing-engine-in-aws-with-special-characters/ + $wildcardQueryTokens = explode( + ' ', + str_replace(self::SPECIAL_CHARACTERS, ' ', $queryToken) + ); + $wildcardQueryTokens = $this->filterOutEmptyElements($wildcardQueryTokens); + + if (empty($wildcardQueryTokens)) { + return ''; + } + + $wildcardQuery = implode( + ' AND ', + array_map( + static fn(string $wildcardQueryToken) => sprintf(self::WILDCARD_MATCH_PATTERN, $wildcardQueryToken), + $wildcardQueryTokens + ) + ); + + if (count($this->searchByFields) > 0) { + $matchPatternWithField = sprintf( + '%s (%s)', + self::SEARCH_FIELD_PATTERN, + $wildcardQuery + ); + $queryTokenParts = array_map( + static fn (string $searchField) => sprintf($matchPatternWithField, $searchField, $queryToken), + $this->searchByFields + ); + + return $this->joinPartsWithOperator($queryTokenParts, self::OPERATOR_OR); + } + + return $wildcardQuery; + } + + private function escapeString(string $queryToken): string + { + // Backslash is more special character... + return str_replace('\\', '\\\\', $queryToken); + } + + private function filterOutEmptyElements(array $elements): array + { + return array_filter($elements, static fn (string $queryToken) => '' !== trim($queryToken)); + } +} diff --git a/tests/Aws/CloudSearch/RequestParameter/Query/Builder/LuceneQueryParametersBuilderTest.php b/tests/Aws/CloudSearch/RequestParameter/Query/Builder/LuceneQueryParametersBuilderTest.php new file mode 100644 index 0000000..e0cf8cd --- /dev/null +++ b/tests/Aws/CloudSearch/RequestParameter/Query/Builder/LuceneQueryParametersBuilderTest.php @@ -0,0 +1,115 @@ +build()->getQuery()); + } + + public function testItBuildsWithExactMatch(): void + { + $builder = LuceneQueryParametersBuilder::forRawQuery('test query ') + ->useExactMatchingForIndividualWords(); + + self::assertSame('("test" AND "query")', $builder->build()->getQuery()); + } + + public function testItBuildsWithExactMatchAndFields(): void + { + $builder = LuceneQueryParametersBuilder::forRawQuery('test query') + ->useExactMatchingForIndividualWords() + ->searchByFields('field1', 'field2'); + + self::assertSame( + '((field1: "test" OR field2: "test") AND (field1: "query" OR field2: "query"))', + $builder->build()->getQuery() + ); + } + + /** + * @dataProvider getQueryForWildcardSearch + */ + public function testItBuildsWithWildcardMatch(string $searchQuery, string $expectedQuery): void + { + $builder = LuceneQueryParametersBuilder::forRawQuery($searchQuery) + ->useWildcardMatching(); + + self::assertSame($expectedQuery, $builder->build()->getQuery()); + } + + public function getQueryForWildcardSearch(): \Generator + { + yield ['test', '*test*']; + yield ['test query', '(*test* AND *query*)']; + yield [ + 'https://landingi.example/test?query-string', + '*https* AND *landingi.example* AND *test* AND *query* AND *string*', + ]; + yield [ + '!~@#^$ \\', + '*@#* AND *$*', + ]; + } + + /** + * @dataProvider getQueryForWildcardSearchWithFields + */ + public function testItBuildsWithWildcardMatchAndFields(string $searchQuery, string $expectedQuery): void + { + $builder = LuceneQueryParametersBuilder::forRawQuery($searchQuery) + ->useWildcardMatching() + ->searchByFields('field1', 'field2'); + + self::assertSame($expectedQuery, $builder->build()->getQuery()); + } + + public function getQueryForWildcardSearchWithFields(): \Generator + { + yield ['test', '(field1: *test* OR field2: *test*)']; + yield ['test query', '((field1: *test* OR field2: *test*) AND (field1: *query* OR field2: *query*))']; + yield [ + 'https://landingi.example/test?query-string', + '(field1: (*https* AND *landingi.example* AND *test* AND *query* AND *string*) OR field2: (*https* AND *landingi.example* AND *test* AND *query* AND *string*))', + ]; + yield [ + '!~@#^$ \\', + '(field1: (*@#* AND *$*) OR field2: (*@#* AND *$*))', + ]; + } + + /** + * @dataProvider getQueryForWildcardAndExactSearchWithFields + */ + public function testItBuildsWithWildcardAndExactMatchAndOneField(string $searchQuery, string $expectedQuery): void + { + $builder = LuceneQueryParametersBuilder::forRawQuery($searchQuery) + ->useWildcardMatching() + ->useExactMatchingForIndividualWords() + ->searchByFields('field1'); + + self::assertSame($expectedQuery, $builder->build()->getQuery()); + } + + public function getQueryForWildcardAndExactSearchWithFields(): \Generator + { + yield ['test', '(field1: "test" OR field1: *test*)']; + yield ['test query', '((field1: "test" OR field1: *test*) AND (field1: "query" OR field1: *query*))']; + yield [ + 'https://landingi.example/test?query-string', + '(field1: "https://landingi.example/test?query-string" OR field1: (*https* AND *landingi.example* AND *test* AND *query* AND *string*))', + ]; + yield [ + '!~@#^$ \\', + '((field1: "!~@#^$" OR field1: (*@#* AND *$*)) AND field1: "\\\")', + ]; + } +}