Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add File Permissions to generated documents #567

Merged
merged 2 commits into from
Jul 26, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,12 @@ public class Attributes {
static public final class FIELD_NAMES {
public static final String OWNER = "owner";
public static final String GROUP = "group";
public static final String PERMISSIONS = "permissions";
}

private String owner;
private String group;
private int permissions;

public String getOwner() {
return owner;
Expand All @@ -50,4 +52,12 @@ public String getGroup() {
public void setGroup(String group) {
this.group = group;
}

public int getPermissions() {
return permissions;
}

public void setPermissions(int permissions) {
this.permissions = permissions;
}
}
39 changes: 21 additions & 18 deletions core/src/main/java/fr/pilato/elasticsearch/crawler/fs/FsParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -238,27 +238,27 @@ private void addFilesRecursively(FileAbstractor<?> path, String filepath, LocalD

if (children != null) {
for (FileAbstractModel child : children) {
String filename = child.name;
String filename = child.getName();

// https://github.com/dadoonet/fscrawler/issues/1 : Filter documents
boolean isIndexable = isIndexable(filename, fsSettings.getFs().getIncludes(), fsSettings.getFs().getExcludes());

// It can happen that we a dir "foo" which does not match the include name like "*.txt"
// We need to go in it unless it has been explicitly excluded by the user
if (child.directory && !isExcluded(filename, fsSettings.getFs().getExcludes())) {
if (child.isDirectory() && !isExcluded(filename, fsSettings.getFs().getExcludes())) {
isIndexable = true;
}

logger.debug("[{}] can be indexed: [{}]", filename, isIndexable);
if (isIndexable) {
if (child.file) {
if (child.isFile()) {
logger.debug(" - file: {}", filename);
fsFiles.add(filename);
if (child.lastModifiedDate.isAfter(lastScanDate) ||
(child.creationDate != null && child.creationDate.isAfter(lastScanDate))) {
if (child.getLastModifiedDate().isAfter(lastScanDate) ||
(child.getCreationDate() != null && child.getCreationDate().isAfter(lastScanDate))) {
try {
indexFile(child, stats, filepath,
fsSettings.getFs().isIndexContent() || fsSettings.getFs().isStoreSource() ? path.getInputStream(child) : null, child.size);
fsSettings.getFs().isIndexContent() || fsSettings.getFs().isStoreSource() ? path.getInputStream(child) : null, child.getSize());
stats.addFile();
} catch (java.io.FileNotFoundException e) {
if (fsSettings.getFs().isContinueOnError()) {
Expand All @@ -269,18 +269,18 @@ private void addFilesRecursively(FileAbstractor<?> path, String filepath, LocalD
}
} else {
logger.debug(" - not modified: creation date {} , file date {}, last scan date {}",
child.creationDate, child.lastModifiedDate, lastScanDate);
child.getCreationDate(), child.getLastModifiedDate(), lastScanDate);
}
} else if (child.directory) {
} else if (child.isDirectory()) {
logger.debug(" - folder: {}", filename);
if (fsSettings.getFs().isIndexFolders()) {
fsFolders.add(child.fullpath);
indexDirectory(child.fullpath);
fsFolders.add(child.getFullpath());
indexDirectory(child.getFullpath());
}
addFilesRecursively(path, child.fullpath, lastScanDate);
addFilesRecursively(path, child.getFullpath(), lastScanDate);
} else {
logger.debug(" - other: {}", filename);
logger.debug("Not a file nor a dir. Skipping {}", child.fullpath);
logger.debug("Not a file nor a dir. Skipping {}", child.getFullpath());
}
} else {
logger.debug(" - ignored file/dir: {}", filename);
Expand Down Expand Up @@ -409,10 +409,10 @@ private Collection<String> getFolderDirectory(String path) throws Exception {
*/
private void indexFile(FileAbstractModel fileAbstractModel, ScanStatistic stats, String dirname, InputStream inputStream,
long filesize) throws Exception {
final String filename = fileAbstractModel.name;
final LocalDateTime lastmodified = fileAbstractModel.lastModifiedDate;
final String extension = fileAbstractModel.extension;
final long size = fileAbstractModel.size;
final String filename = fileAbstractModel.getName();
final LocalDateTime lastmodified = fileAbstractModel.getLastModifiedDate();
final String extension = fileAbstractModel.getExtension();
final long size = fileAbstractModel.getSize();

logger.debug("fetching content from [{}],[{}]", dirname, filename);

Expand Down Expand Up @@ -447,8 +447,11 @@ private void indexFile(FileAbstractModel fileAbstractModel, ScanStatistic stats,
// Attributes
if (fsSettings.getFs().isAttributesSupport()) {
doc.setAttributes(new Attributes());
doc.getAttributes().setOwner(fileAbstractModel.owner);
doc.getAttributes().setGroup(fileAbstractModel.group);
doc.getAttributes().setOwner(fileAbstractModel.getOwner());
doc.getAttributes().setGroup(fileAbstractModel.getGroup());
if (fileAbstractModel.getPermissions() >= 0) {
doc.getAttributes().setPermissions(fileAbstractModel.getPermissions());
}
}
// Attributes

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,82 @@
import java.time.LocalDateTime;

public class FileAbstractModel {
public String name;
public boolean file;
public boolean directory;
public LocalDateTime lastModifiedDate;
public LocalDateTime creationDate;
public String path;
public String fullpath;
public long size;
public String owner;
public String group;
public String extension;
private String name;
private boolean file;
private boolean directory;
private LocalDateTime lastModifiedDate;
private LocalDateTime creationDate;
private String path;
private String fullpath;
private long size;
private String owner;
private String group;
private int permissions;
private String extension;

public FileAbstractModel(String name, boolean file, LocalDateTime lastModifiedDate, LocalDateTime creationDate,
String extension, String path, String fullpath, long size, String owner, String group, int permissions) {
this.name = name;
this.file = file;
this.directory = !file;
this.lastModifiedDate = lastModifiedDate;
this.creationDate = creationDate;
this.path = path;
this.fullpath = fullpath;
this.size = size;
this.owner = owner;
this.group = group;
this.permissions = permissions;
this.extension = extension;
}

public String getName() {
return name;
}

public boolean isFile() {
return file;
}

public boolean isDirectory() {
return directory;
}

public LocalDateTime getLastModifiedDate() {
return lastModifiedDate;
}

public LocalDateTime getCreationDate() {
return creationDate;
}

public String getPath() {
return path;
}

public String getFullpath() {
return fullpath;
}

public long getSize() {
return size;
}

public String getOwner() {
return owner;
}

public String getGroup() {
return group;
}

public int getPermissions() {
return permissions;
}

public String getExtension() {
return extension;
}

@Override
public String toString() {
Expand All @@ -64,6 +129,7 @@ public String toString() {
", path='" + path + '\'' +
", owner='" + owner + '\'' +
", group='" + group + '\'' +
", permissions=" + permissions +
", extension='" + extension + '\'' +
", fullpath='" + fullpath + '\'' +
", size=" + size +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

import static fr.pilato.elasticsearch.crawler.fs.framework.FsCrawlerUtil.getCreationTime;
import static fr.pilato.elasticsearch.crawler.fs.framework.FsCrawlerUtil.getFileExtension;
import static fr.pilato.elasticsearch.crawler.fs.framework.FsCrawlerUtil.getFilePermissions;
import static fr.pilato.elasticsearch.crawler.fs.framework.FsCrawlerUtil.getGroupName;
import static fr.pilato.elasticsearch.crawler.fs.framework.FsCrawlerUtil.getOwnerName;

Expand All @@ -45,25 +46,23 @@ public FileAbstractorFile(FsSettings fsSettings) {

@Override
public FileAbstractModel toFileAbstractModel(String path, File file) {
FileAbstractModel model = new FileAbstractModel();
model.name = file.getName();
model.file = file.isFile();
model.directory = !model.file;
model.lastModifiedDate = LocalDateTime.ofInstant(Instant.ofEpochMilli(file.lastModified()), ZoneId.systemDefault());
model.creationDate = getCreationTime(file);
model.extension = getFileExtension(file);
model.path = path;
model.fullpath = file.getAbsolutePath();
model.size = file.length();
model.owner = getOwnerName(file);
model.group = getGroupName(file);

return model;
return new FileAbstractModel(
file.getName(),
file.isFile(),
LocalDateTime.ofInstant(Instant.ofEpochMilli(file.lastModified()), ZoneId.systemDefault()),
getCreationTime(file),
getFileExtension(file),
path,
file.getAbsolutePath(),
file.length(),
getOwnerName(file),
getGroupName(file),
getFilePermissions(file));
}

@Override
public InputStream getInputStream(FileAbstractModel file) throws Exception {
return new FileInputStream(new File(file.fullpath));
return new FileInputStream(new File(file.getFullpath()));
}

@Override
Expand Down Expand Up @@ -95,12 +94,12 @@ public boolean exists(String dir) {
}

@Override
public void open() throws Exception {
public void open() {
// Do nothing because we don't open resources in the File implementation.
}

@Override
public void close() throws Exception {
public void close() {
// Do nothing because we don't open resources in the File implementation.
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import fr.pilato.elasticsearch.crawler.fs.crawler.FileAbstractor;
import fr.pilato.elasticsearch.crawler.fs.settings.FsSettings;
import fr.pilato.elasticsearch.crawler.fs.settings.Server;
import org.apache.commons.io.FilenameUtils;

import java.io.InputStream;
import java.time.Instant;
Expand All @@ -47,23 +48,25 @@ public FileAbstractorSSH(FsSettings fsSettings) {

@Override
public FileAbstractModel toFileAbstractModel(String path, ChannelSftp.LsEntry file) {
FileAbstractModel model = new FileAbstractModel();
model.name = file.getFilename();
model.directory = file.getAttrs().isDir();
model.file = !model.directory;
// We are using here the local TimeZone as a reference. If the remote system is under another TZ, this might cause issues
model.lastModifiedDate = LocalDateTime.ofInstant(Instant.ofEpochMilli(file.getAttrs().getMTime()*1000L), ZoneId.systemDefault());
model.path = path;
model.fullpath = model.path.concat("/").concat(model.name);
model.size = file.getAttrs().getSize();
model.owner = Integer.toString(file.getAttrs().getUId());
model.group = Integer.toString(file.getAttrs().getGId());
return model;
return new FileAbstractModel(
file.getFilename(),
file.getAttrs().isDir(),
// We are using here the local TimeZone as a reference. If the remote system is under another TZ, this might cause issues
LocalDateTime.ofInstant(Instant.ofEpochMilli(file.getAttrs().getMTime()*1000L), ZoneId.systemDefault()),
// We don't have the creation date
null,
FilenameUtils.getExtension(file.getFilename()),
path,
path.concat("/").concat(file.getFilename()),
file.getAttrs().getSize(),
Integer.toString(file.getAttrs().getUId()),
Integer.toString(file.getAttrs().getGId()),
file.getAttrs().getPermissions());
}

@Override
public InputStream getInputStream(FileAbstractModel file) throws Exception {
return sftp.get(file.fullpath);
return sftp.get(file.getFullpath());
}

@SuppressWarnings("unchecked")
Expand Down
Loading