diff --git a/components/language-chooser/common/find-language/searchForLanguage.ts b/components/language-chooser/common/find-language/searchForLanguage.ts index d541fc7..5f066e2 100644 --- a/components/language-chooser/common/find-language/searchForLanguage.ts +++ b/components/language-chooser/common/find-language/searchForLanguage.ts @@ -62,39 +62,36 @@ export function searchForLanguage( // e.g. if querystring is "otl", then " otl" is a prefix match for " San Felipe Otlaltepec Popoloca " but not "botlikh" const prefixMatchResults = exactMatchFuse.search(" " + queryString); - const fuzzyMatchFuse = new Fuse(spacePaddedLanguages as ILanguage[], { + const fuzzyMatchFuse = new Fuse(languages as ILanguage[], { ...baseFuseOptions, threshold: 0.3, }); const fuzzyMatchResults = fuzzyMatchFuse.search(queryString); - // Combine all the result lists with no duplicates, prioritizing whole word exact matches then prefix exact matches then all other fuzzy matches - const results = []; - const alreadyIncludedResultCodes = new Set(); + // Use the results from the fuzzy match search, since the others will have incorrect match indices due to the space padding. + // But order the results in order of whole word matches, then prefix matches, then the rest with no duplicates + const resultsByIso639_3Code = new Map>(); + for (const result of fuzzyMatchResults) { + resultsByIso639_3Code.set(result.item.iso639_3_code, result); + } + const orderedResults = []; for (const resultList of [ wholeWordMatchResults, prefixMatchResults, fuzzyMatchResults, ]) { - for (const result of resultList) { - if (!alreadyIncludedResultCodes.has(result.item.iso639_3_code)) { - results.push(result); - alreadyIncludedResultCodes.add(result.item.iso639_3_code); + for (const r of resultList) { + const isoCode = r.item.iso639_3_code; + const correctResult = resultsByIso639_3Code.get(isoCode); + if (correctResult) { + // this language was not already added as part of a previous subset + // (wholeWordMatchResults should be a subset of prefixMatchResults which should be a subset of fuzzyMatchResults) + orderedResults.push(correctResult); + resultsByIso639_3Code.delete(isoCode); } } } - - return results.map((r) => ({ - ...r, - // We trim off the spaces that we added above to find exact and prefix matches. - item: { - ...r.item, - autonym: r.item.autonym ? r.item.autonym.trim() : undefined, - exonym: r.item.exonym.trim(), - names: r.item.names.map((n) => n.trim()), - languageSubtag: r.item.languageSubtag.trim(), - }, - })); + return orderedResults; } // get language (not macrolanguage) with exact match on subtag diff --git a/components/language-chooser/react/common/language-chooser-react-hook/languageTagHandling.spec.ts b/components/language-chooser/react/common/language-chooser-react-hook/languageTagHandling.spec.ts index e3ecc21..dc8c211 100644 --- a/components/language-chooser/react/common/language-chooser-react-hook/languageTagHandling.spec.ts +++ b/components/language-chooser/react/common/language-chooser-react-hook/languageTagHandling.spec.ts @@ -1,5 +1,8 @@ import { expect, it, describe } from "vitest"; -import { parseLangtagFromLangChooser } from "./languageTagHandling"; +import { + defaultRegionForLangTag, + parseLangtagFromLangChooser, +} from "./languageTagHandling"; import { getRegionBySubtag } from "@ethnolib/find-language"; describe("Tag parsing", () => { it("should find a language by 2 letter language subtag", () => { @@ -133,3 +136,24 @@ describe("Tag parsing", () => { ); expect(ssh_Arab_AE_x_foobar_result?.customDetails?.dialect).toEqual("foobar"); }); + +describe("defaultRegionForLangTag", () => { + it("should return the region for a language tag that already has a region", () => { + expect(defaultRegionForLangTag("en-Latn-US")?.name).toEqual( + "United States of America" + ); + expect(defaultRegionForLangTag("en-CN-x-foobar")?.name).toEqual("China"); + expect(defaultRegionForLangTag("en-JP")?.name).toEqual("Japan"); + }); + it("should return the region for the closest maximal equivalent of the language tag", () => { + expect(defaultRegionForLangTag("uz")?.name).toEqual("Uzbekistan"); + expect(defaultRegionForLangTag("uz-Cyrl")?.name).toEqual("Uzbekistan"); + expect(defaultRegionForLangTag("uz-Arab")?.name).toEqual("Afghanistan"); + expect(defaultRegionForLangTag("uz-Arab-x-foobar")?.name).toEqual( + "Afghanistan" + ); + expect(defaultRegionForLangTag("uz-Taml-x-foobar")?.name).toEqual( + "Uzbekistan" + ); + }); +}); diff --git a/components/language-chooser/react/common/language-chooser-react-hook/languageTagHandling.ts b/components/language-chooser/react/common/language-chooser-react-hook/languageTagHandling.ts index bc9d9dd..d741fd1 100644 --- a/components/language-chooser/react/common/language-chooser-react-hook/languageTagHandling.ts +++ b/components/language-chooser/react/common/language-chooser-react-hook/languageTagHandling.ts @@ -118,3 +118,24 @@ export function parseLangtagFromLangChooser( } as ICustomizableLanguageDetails, } as IOrthography; } + +export function defaultRegionForLangTag(languageTag: string) { + // if languageTag already has a region tag in it, use that + const orthography = parseLangtagFromLangChooser(languageTag); + if (orthography?.customDetails?.region) { + return orthography.customDetails.region; + } + + // Otherwise, the maximal equivalent language tag will have the region code + const languageSubtag = orthography?.language?.languageSubtag; + const scriptSubtag = orthography?.script?.code; + + // Take the most specific/relevant matching maximal tag that we are able to find + const maximalTag = + getMaximalLangtag(languageTag) || + getMaximalLangtag(`${languageSubtag}-${scriptSubtag}`) || + getMaximalLangtag(`${languageSubtag}`) || + ""; + const maximalTagOrthography = parseLangtagFromLangChooser(maximalTag); + return maximalTagOrthography?.customDetails?.region; +} diff --git a/components/language-chooser/react/language-chooser-react-mui/src/index.ts b/components/language-chooser/react/language-chooser-react-mui/src/index.ts index daff475..3f22eb0 100644 --- a/components/language-chooser/react/language-chooser-react-mui/src/index.ts +++ b/components/language-chooser/react/language-chooser-react-mui/src/index.ts @@ -9,9 +9,10 @@ export { isUnlistedLanguage, createTagFromOrthography, parseLangtagFromLangChooser, + defaultDisplayName, + defaultRegionForLangTag, } from "@ethnolib/language-chooser-react-hook"; export type { IOrthography, ICustomizableLanguageDetails, - defaultDisplayName, } from "@ethnolib/language-chooser-react-hook";