From a6f8cc8e475b633b3287f6b5dde345e7e58eff9b Mon Sep 17 00:00:00 2001 From: Csillag Kristof Date: Fri, 10 May 2024 16:25:14 +0200 Subject: [PATCH] Tighter integration between text matching/trimming/highlighting When we trim a text around a pattern match, and later we try to highlight the match, pass the information along from the trimming phase to the highlighting phase. This avoids a round of unnecessary text matching, and fixes the corner case when the trimmed part is actually shorter than the pattern. --- .../HighlightedTrimmedText.tsx | 9 +---- src/app/components/HighlightedText/index.tsx | 38 ++++++++++++++----- .../HighlightedText/text-matching.ts | 32 ++++++++++++---- .../HighlightedText/text-trimming.ts | 31 +++++++++++---- 4 files changed, 79 insertions(+), 31 deletions(-) diff --git a/src/app/components/HighlightedText/HighlightedTrimmedText.tsx b/src/app/components/HighlightedText/HighlightedTrimmedText.tsx index f89ddba9d..6ac7dec06 100644 --- a/src/app/components/HighlightedText/HighlightedTrimmedText.tsx +++ b/src/app/components/HighlightedText/HighlightedTrimmedText.tsx @@ -33,11 +33,6 @@ type HighlightedTrimmedTextProps = { */ export const HighlightedTrimmedText: FC = props => { const { text, pattern, fragmentLength, options } = props - return ( - - ) + const { part, match } = trimAroundMatch(text, pattern, { fragmentLength }) + return } diff --git a/src/app/components/HighlightedText/index.tsx b/src/app/components/HighlightedText/index.tsx index f8e3a76fe..dbf0886dd 100644 --- a/src/app/components/HighlightedText/index.tsx +++ b/src/app/components/HighlightedText/index.tsx @@ -1,5 +1,5 @@ import * as React from 'react' -import { findTextMatch, NormalizerOptions } from './text-matching' +import { MatchInfo, findTextMatch, NO_MATCH, NormalizerOptions } from './text-matching' import { FC } from 'react' import { SxProps } from '@mui/material/styles' import Box from '@mui/material/Box' @@ -46,6 +46,14 @@ interface HighlightedTextProps { */ pattern: string | undefined + /** + * Instructions about which part to highlight. + * + * If not given, we will just search for the pattern. + * If given, this will be executed, and the pattern will not even be considered. + */ + part?: MatchInfo + /** * Options for highlighting (case sensitivity, styling, etc.) * @@ -57,19 +65,31 @@ interface HighlightedTextProps { /** * Display a text, with potential pattern matches highlighted with html MARKs */ -export const HighlightedText: FC = ({ text, pattern, options = defaultHighlight }) => { +export const HighlightedText: FC = ({ + text, + pattern, + part, + options = defaultHighlight, +}) => { const { sx = defaultHighlightStyle, findOptions = {} } = options - const match = findTextMatch(text, [pattern], findOptions) - return text === undefined ? undefined : match ? ( + // Have we been told what to highlight exactly? If not, look for the pattern + const task = part ?? findTextMatch(text, [pattern], findOptions) + + if (text === undefined) return undefined // Nothing to display + if (task === NO_MATCH) return text // We don't have to highlight anything + + const beginning = text.substring(0, task.startPos) + const focus = text.substring(task.startPos, task.endPos) + const end = text.substring(task.endPos) + + return ( <> - {text.substring(0, match.startPos)} + {beginning} - {text.substring(match.startPos, match.startPos + match.searchText.length)} + {focus} - {text.substring(match.startPos + match.searchText.length)} + {end} - ) : ( - text ) } diff --git a/src/app/components/HighlightedText/text-matching.ts b/src/app/components/HighlightedText/text-matching.ts index 78c98d1a8..b4924c7e6 100644 --- a/src/app/components/HighlightedText/text-matching.ts +++ b/src/app/components/HighlightedText/text-matching.ts @@ -4,11 +4,15 @@ export type { NormalizerOptions } from './text-normalization' /** * Store info about where did we found the pattern inside the corpus */ -export interface MatchInfo { - searchText: string +export interface PositiveMatchInfo { startPos: number + endPos: number } +export const NO_MATCH = 'NO_MATCH' + +export type MatchInfo = PositiveMatchInfo | typeof NO_MATCH + /** * Identify pattern matches within a corpus, also considering normalization * @@ -19,20 +23,32 @@ export const findTextMatch = ( rawCorpus: string | null | undefined, search: (string | undefined)[], options: NormalizerOptions = {}, -): MatchInfo | undefined => { +): MatchInfo => { const normalizedCorpus = normalizeTextForSearch(rawCorpus || '', options) - const matches: MatchInfo[] = search + const matches: PositiveMatchInfo[] = search .filter((s): s is string => !!s) .map(rawPattern => { const normalizedPattern = normalizeTextForSearch(rawPattern!, options) const matchStart = normalizedCorpus.indexOf(normalizedPattern) return matchStart !== -1 ? { - searchText: rawPattern, startPos: matchStart, + endPos: matchStart + rawPattern.length, } - : undefined + : 'NO_MATCH' }) - .filter((m): m is MatchInfo => !!m) - return matches[0] + .filter((m): m is PositiveMatchInfo => m !== NO_MATCH) + return matches[0] ?? NO_MATCH } + +/** + * Check if a pattern matches within a corpus, also considering normalization + * + * NOTE: depending on normalization options, the string length can change, + * and in that case, match position can be incorrect. + */ +export const hasTextMatch = ( + rawCorpus: string | null | undefined, + search: (string | undefined)[], + options: NormalizerOptions = {}, +): boolean => findTextMatch(rawCorpus, search, options) !== NO_MATCH diff --git a/src/app/components/HighlightedText/text-trimming.ts b/src/app/components/HighlightedText/text-trimming.ts index 5587f99b6..44a0c5074 100644 --- a/src/app/components/HighlightedText/text-trimming.ts +++ b/src/app/components/HighlightedText/text-trimming.ts @@ -1,4 +1,4 @@ -import { findTextMatch, NormalizerOptions } from './text-matching' +import { MatchInfo, findTextMatch, NO_MATCH, NormalizerOptions } from './text-matching' export interface TrimAroundOptions extends NormalizerOptions { /** @@ -29,31 +29,48 @@ export function trimAroundMatch( corpus: string | undefined, pattern: string | undefined, options: TrimAroundOptions = {}, -): string | undefined { +): { + part: string | undefined + match: MatchInfo +} { const { fragmentLength = 80, ...matchOptions } = options if (!corpus) { // there is nothing to see here - return undefined + return { part: undefined, match: NO_MATCH } } // do we have a match? - const match = pattern ? findTextMatch(corpus, [pattern], matchOptions) : undefined + const match = pattern ? findTextMatch(corpus, [pattern], matchOptions) : NO_MATCH if (corpus.length <= fragmentLength) { // the whole corpus fits into the max size, no need to cut. - return corpus + return { part: corpus, match } } // how much extra space do we have? const buffer = fragmentLength - (pattern || '').length - const matchStart = match?.startPos ?? 0 + const matchStart = match === NO_MATCH ? 0 : match.startPos // We will start before the start of the match, by buffer / 2 chars const startPos = Math.max(Math.min(matchStart - Math.floor(buffer / 2), corpus.length - fragmentLength), 0) const endPos = Math.min(startPos + fragmentLength, corpus.length) + // Do the trimming + const prefix = startPos ? '…' : '' + const postFix = endPos < corpus.length - 1 ? '…' : '' + const part = prefix + corpus.substring(startPos, endPos) + postFix + // compile the result - return (startPos ? '…' : '') + corpus.substring(startPos, endPos) + (endPos < corpus.length - 1 ? '…' : '') + return { + part, + match: + match === NO_MATCH + ? NO_MATCH + : { + startPos: Math.max(0, matchStart - startPos + prefix.length), + endPos: Math.min(part.length, match.endPos - startPos + prefix.length), + }, + } }