Skip to content

Commit

Permalink
Improve logging messages in LuceneDocumentsReader
Browse files Browse the repository at this point in the history
- Lower logging level when there is no usable reference idenifiers
- Add the directory path in logging messages from troubleshooting
- Rename docId -> docSegId since it represents the position of the
  document as opposed to he other identifiers to improve readability

Signed-off-by: Peter Nied <[email protected]>
  • Loading branch information
peternied committed Dec 9, 2024
1 parent a71f2a5 commit 4871b3c
Showing 1 changed file with 18 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -161,13 +161,17 @@ protected DirectoryReader wrapReader(DirectoryReader reader, boolean softDeletes
return reader;
}

protected RfsLuceneDocument getDocument(IndexReader reader, int docId, boolean isLive) {
protected RfsLuceneDocument getDocument(IndexReader reader, int docSegId, boolean isLive) {
Document document;
try {
document = reader.document(docId);
document = reader.document(docSegId);

Check failure on line 167 in RFS/src/main/java/org/opensearch/migrations/bulkload/common/LuceneDocumentsReader.java

View workflow job for this annotation

GitHub Actions / Run SonarQube Analysis

java:S1874

Remove this use of "document"; it is deprecated.
} catch (IOException e) {
log.atError().setCause(e).setMessage("Failed to read document at Lucene index location {}")
.addArgument(docId).log();
log.atError()
.setCause(e)
.setMessage("Failed to read document segment id {} from source {}")
.addArgument(docSegId)
.addArgument(indexDirectoryPath)
.log();
return null;
}

Expand Down Expand Up @@ -207,21 +211,21 @@ protected RfsLuceneDocument getDocument(IndexReader reader, int docId, boolean i
}
}
if (id == null) {
log.atWarn().setMessage("Document with index {}, doc id: {} does not have an referenceable id. Skipping")
.addArgument(reader.getContext().id().toString())
.addArgument(docId)
log.atWarn().setMessage("Skipping document segment id {} from source {}, it does not have an referenceable id.")
.addArgument(docSegId)
.addArgument(indexDirectoryPath)
.log();
return null; // Skip documents with missing id
}

if (sourceBytes == null || sourceBytes.bytes.length == 0) {
log.atWarn().setMessage("Document {} doesn't have the _source field enabled")
log.atWarn().setMessage("Skipping document segment id {} document id {} from source {}, it doesn't have the _source field enabled.")
.addArgument(docSegId)
.addArgument(id)
.addArgument(indexDirectoryPath)
.log();
return null; // Skip these
}

log.atDebug().setMessage("Reading document {}").addArgument(id).log();
} catch (RuntimeException e) {
StringBuilder errorMessage = new StringBuilder();
errorMessage.append("Unable to parse Document id from Document. The Document's Fields: ");
Expand All @@ -235,7 +239,10 @@ protected RfsLuceneDocument getDocument(IndexReader reader, int docId, boolean i
return null; // Skip these
}

log.atDebug().setMessage("Document {} read successfully").addArgument(id).log();
log.atDebug().setMessage("Document id {} from source {} read successfully.")
.addArgument(id)
.addArgument(indexDirectoryPath)
.log();
return new RfsLuceneDocument(id, type, sourceBytes.utf8ToString(), routing);
}
}

0 comments on commit 4871b3c

Please sign in to comment.