Skip to content

Commit

Permalink
Add error handling for timeaware finder to handle scenarios where fil… (
Browse files Browse the repository at this point in the history
apache#3537)

* Add error handling for timeaware finder to handle scenarios where files do not exist or folders not matching date format

* Check path exists before attempting ls
  • Loading branch information
Will-Lo authored and jack-moseley committed Aug 24, 2022
1 parent 711408a commit 2e6bda4
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ protected List<FileStatus> getFilesAtPath(FileSystem fs, Path path, PathFilter f
return FileListUtils
.listFilesToCopyAtPath(fs, path, fileFilter, applyFilterToDirectories, includeEmptyDirectories);
} catch (IOException e) {
log.info(String.format("Could not find any files on target path due to %s. Returning an empty list of files.", e.getClass().getCanonicalName()));
log.warn(String.format("Could not find any files on fs %s path %s due to the following exception. Returning an empty list of files.", fs.getUri(), path), e);
return Lists.newArrayList();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,16 @@ protected List<FileStatus> getFilesAtPath(FileSystem fs, Path path, PathFilter f
private List<FileStatus> recursivelyGetFilesAtDatePath(FileSystem fs, Path path, String traversedDatePath, PathFilter fileFilter,
int level, LocalDateTime startDate, LocalDateTime endDate, DateTimeFormatter formatter) throws IOException {
List<FileStatus> fileStatuses = Lists.newArrayList();
Iterator<FileStatus> folderIterator = Arrays.asList(fs.listStatus(path)).iterator();

if (!fs.exists(path)) {
return fileStatuses;
}
Iterator<FileStatus> folderIterator;
try {
folderIterator = Arrays.asList(fs.listStatus(path)).iterator();
} catch (IOException e) {
log.warn(String.format("Error while listing paths at %s due to ", path), e);
return fileStatuses;
}
// Check if at the lowest level/granularity of the date folder
if (this.datePattern.split(FileSystems.getDefault().getSeparator()).length == level) {
// Truncate the start date to the most granular unit of time in the datepattern
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.gobblin.data.management.copy;

import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
Expand Down Expand Up @@ -173,6 +174,10 @@ public void testGetFilesAtPath() throws IOException {
candidateFiles.add(filePath.toString());
}
}
// Edge case: test that files that do not match dateformat but within the folders searched by the timeaware finder is ignored
File f = new File(baseDir2.toString() + "/metadata.test");

f.createNewFile();

properties = new Properties();
properties.setProperty(TimeAwareRecursiveCopyableDataset.LOOKBACK_TIME_KEY, NUM_LOOKBACK_DAYS_STR);
Expand Down

0 comments on commit 2e6bda4

Please sign in to comment.