Skip to content

Commit

Permalink
Fix regex word boundary does ignore umlauts and other non ascii.
Browse files Browse the repository at this point in the history
  • Loading branch information
ksuess committed Mar 31, 2022
1 parent a7f5001 commit 2d213c5
Showing 1 changed file with 12 additions and 3 deletions.
15 changes: 12 additions & 3 deletions src/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,18 @@ const TextWithGlossaryTooltips = ({ text }) => {
glossaryterms.forEach((term) => {
result = result.map((chunk) => {
if (chunk.type === 'text') {
let myre = `\\b${term.term}\\b`;
let regExpTerm = new RegExp(myre);
let splittedtext = chunk.val.split(regExpTerm).reverse();
var splittedtext;
// regex word boundary does ignore umlauts and other non ascii
if (['ä', 'ö', 'ü', 'Ä', 'Ö', 'Ü'].includes(term.term[0])) {
// let myre = `(?<!\w)${term.term}(?!\w)`;
let myre = `(?<=[ ,\.])${term.term}(?=[ ,\.])`;
let regExpTerm = new RegExp(myre, 'g');
splittedtext = chunk.val.split(regExpTerm).reverse();
} else {
let myre = `\\b${term.term}\\b`;
let regExpTerm = new RegExp(myre);
splittedtext = chunk.val.split(regExpTerm).reverse();
}
chunk = [{ type: 'text', val: splittedtext.pop() }];
while (splittedtext.length > 0) {
chunk.push({
Expand Down

0 comments on commit 2d213c5

Please sign in to comment.