From 2d585b8306f61dbdf7df148f952378ccada866a8 Mon Sep 17 00:00:00 2001 From: tallison Date: Tue, 26 Mar 2024 09:10:01 -0400 Subject: [PATCH] TIKA-4219 -- clean up...do not include font names in main package --- .../main/java/org/apache/tika/parser/epub/EpubParser.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/epub/EpubParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/epub/EpubParser.java index a572ad2cca..b9f74cf3e8 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/epub/EpubParser.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/epub/EpubParser.java @@ -452,9 +452,13 @@ private void handleEmbedded(ZipFile zipFile, String relativePath, HRefMediaPair xhtml.startElement("div", "class", "embedded"); try { + boolean outputHtml = true; + if (hRefMediaPair.media.contains("font") || hRefMediaPair.href.startsWith("fonts")) { + outputHtml = false; + } embeddedDocumentExtractor .parseEmbedded(stream, new EmbeddedContentHandler(xhtml), embeddedMetadata, - true); + outputHtml); } finally { IOUtils.closeQuietly(stream);