Skip to content

Commit

Permalink
Merge pull request #62 from NatLibFi/simplye-345/scandinavian-sorting…
Browse files Browse the repository at this point in the history
…-in-search

Adjust search sort field analyzers to work with scandinavian letters
  • Loading branch information
ttuovinen authored Apr 25, 2024
2 parents 9430f97 + 2b8ab5c commit 6aea2ae
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 5 deletions.
8 changes: 8 additions & 0 deletions core/search/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,14 @@ def sort_author_keyword() -> SearchMappingFieldTypeParameterized:
return t


# Finland: added for correct sorting on scandinavian letters in titles
def sort_title_keyword() -> SearchMappingFieldTypeParameterized:
t = SearchMappingFieldTypeParameterized("text")
t.parameters["analyzer"] = "sort_title_analyzer"
t.parameters["fielddata"] = "true"
return t


class SearchMappingFieldTypeObject(SearchMappingFieldType):
"""See: https://opensearch.org/docs/latest/field-types/supported-field-types/object/"""

Expand Down
17 changes: 12 additions & 5 deletions core/search/v5.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
LONG,
SearchMappingDocument,
SearchMappingFieldType,
icu_collation_keyword,
keyword,
nested,
sort_author_keyword,
sort_title_keyword,
)
from core.search.revision import SearchSchemaRevision

Expand Down Expand Up @@ -208,8 +208,9 @@ def __init__(self):
# Here's a special filter used only by that analyzer. It
# duplicates the filter used by the icu_collation_keyword data
# type.
self._filters["en_sortable_filter"] = dict(
type="icu_collation", language="en", country="US"
# Finland: change language to fi to correclty handle scandinavian letters
self._filters["sortable_filter"] = dict(
type="icu_collation", language="fi", country="FI"
)

# Here's the analyzer used by the 'sort_author' property.
Expand All @@ -221,10 +222,16 @@ def __init__(self):
# fields can't specify char_filter.
self._analyzers["en_sort_author_analyzer"] = dict(
tokenizer="keyword",
filter=["en_sortable_filter"],
filter=["sortable_filter"],
char_filter=self.AUTHOR_CHAR_FILTER_NAMES,
)

# Finland
self._analyzers["sort_title_analyzer"] = dict(
tokenizer="keyword",
filter=["sortable_filter"],
)

self._fields: dict[str, SearchMappingFieldType] = {
"summary": BASIC_TEXT,
"title": FILTERABLE_TEXT,
Expand All @@ -235,7 +242,7 @@ def __init__(self):
"publisher": FILTERABLE_TEXT,
"imprint": FILTERABLE_TEXT,
"presentation_ready": BOOLEAN,
"sort_title": icu_collation_keyword(),
"sort_title": sort_title_keyword(),
"sort_author": sort_author_keyword(),
"series_position": INTEGER,
"work_id": INTEGER,
Expand Down

0 comments on commit 6aea2ae

Please sign in to comment.