Skip to content

Commit

Permalink
Merge pull request #675 from dadoonet/fix/674-warn-error-tika
Browse files Browse the repository at this point in the history
Warn in case of Tika error
  • Loading branch information
dadoonet authored Feb 1, 2019
2 parents c48f908 + 95b37e0 commit feea255
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,16 @@ public static void generate(FsSettings fsSettings, InputStream inputStream, Stri
parsedContent = extractText(fsSettings, indexedChars, inputStream, metadata);
logger.trace("End of Tika extraction");
} catch (Throwable e) {
// Build a message from embedded errors
Throwable current = e;
StringBuilder sb = new StringBuilder();
while (current != null) {
sb.append(" -> ");
sb.append(current.getMessage());
current = current.getCause();
}

logger.warn("Failed to extract [" + indexedChars + "] characters of text for [" + filename + "] {}", sb.toString());
logger.debug("Failed to extract [" + indexedChars + "] characters of text for [" + filename + "]", e);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -697,8 +697,17 @@ public void testShiftJisEncoding() throws IOException {
assertThat(doc.getContent(), not(isEmptyOrNullString()));
}

/**
* Test protected document
*/
@Test
public void testProtectedDocument() throws IOException {
FsSettings fsSettings = FsSettings.builder(getCurrentTestName()).build();
Doc doc = extractFromFile("test-protected.docx", fsSettings);
assertThat(doc.getFile().getContentType(), is("application/x-tika-ooxml-protected"));
}

private Doc extractFromFileExtension(String extension) throws IOException {
logger.info("Test extraction of [{}] file", extension);
return extractFromFile("test." + extension);
}

Expand All @@ -707,6 +716,7 @@ private Doc extractFromFile(String filename) throws IOException {
}

private Doc extractFromFile(String filename, FsSettings fsSettings) throws IOException {
logger.info("Test extraction of [{}]", filename);
InputStream data = getBinaryContent(filename);
Doc doc = new Doc();
MessageDigest messageDigest = null;
Expand Down

0 comments on commit feea255

Please sign in to comment.