Skip to content

Commit

Permalink
Merge pull request #32 from thushan/ignore-system-files
Browse files Browse the repository at this point in the history
Adds ignoring of system files and folders.
  • Loading branch information
thushan authored Nov 27, 2023
2 parents aee7a6d + f45a8fa commit e586c89
Show file tree
Hide file tree
Showing 7 changed files with 151 additions and 32 deletions.
1 change: 1 addition & 0 deletions internal/cli/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ func init() {
flags.IntVarP(&af.UpdateSeconds, "update-seconds", "", 5, "Update progress every x seconds.")
flags.BoolVarP(&af.DisableSlicing, "disable-slicing", "", false, "Disable slicing (hashes full file).")
flags.BoolVarP(&af.IgnoreEmptyFiles, "ignore-emptyfiles", "", false, "Ignore & don't report on empty/zero byte files.")
flags.BoolVarP(&af.IgnoreHiddenItems, "ignore-hiddenitems", "", true, "Ignore hidden files & folders (ones that start with '.')")
flags.StringVarP(&af.OutputFile, "output-file", "o", "", "Export as JSON")
flags.BoolVarP(&af.Silent, "silent", "q", false, "Run in silent mode.")
flags.BoolVarP(&af.Verbose, "verbose", "", false, "Run in verbose mode.")
Expand Down
2 changes: 1 addition & 1 deletion internal/smash/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ func (app *App) Run() error {
}

sl := slicer.New(algorithms.Algorithm(app.Flags.Algorithm))
wk := indexer.NewConfigured(app.Flags.ExcludeDir, app.Flags.ExcludeFile)
wk := indexer.NewConfigured(app.Flags.ExcludeDir, app.Flags.ExcludeFile, app.Flags.IgnoreHiddenItems)
slo := slicer.SlicerOptions{
DisableSlicing: app.Flags.DisableSlicing,
DisableMeta: false, // TODO: Flag this
Expand Down
2 changes: 1 addition & 1 deletion internal/smash/configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ func (app *App) printConfiguration() {
theme.Println(b.Sprint("Locations: "), theme.ColourConfig(strings.Join(app.Locations, ", ")))

if len(f.ExcludeDir) > 0 || len(f.ExcludeFile) > 0 {
theme.StyleBold.Println(b.Sprint("Excluded"))
theme.StyleBold.Println("Excluded")
if len(f.ExcludeDir) > 0 {
theme.Println(b.Sprint(" Dirs: "), theme.ColourConfigA(strings.Join(f.ExcludeDir, ", ")))
}
Expand Down
29 changes: 15 additions & 14 deletions internal/smash/flags.go
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
package smash

type Flags struct {
OutputFile string `yaml:"output"`
Base []string `yaml:"base"`
ExcludeDir []string `yaml:"exclude-dir"`
ExcludeFile []string `yaml:"exclude-file"`
Algorithm int `yaml:"algorithm"`
MaxThreads int `yaml:"max-threads"`
MaxWorkers int `yaml:"max-workers"`
UpdateSeconds int `yaml:"update-seconds"`
DisableSlicing bool `yaml:"disable-slicing"`
IgnoreEmptyFiles bool `yaml:"ignore-emptyfiles"`
ShowVersion bool `yaml:"show-version"`
Silent bool `yaml:"silent"`
NoProgress bool `yaml:"no-progress"`
Verbose bool `yaml:"verbose"`
OutputFile string `yaml:"output"`
Base []string `yaml:"base"`
ExcludeDir []string `yaml:"exclude-dir"`
ExcludeFile []string `yaml:"exclude-file"`
Algorithm int `yaml:"algorithm"`
MaxThreads int `yaml:"max-threads"`
MaxWorkers int `yaml:"max-workers"`
UpdateSeconds int `yaml:"update-seconds"`
DisableSlicing bool `yaml:"disable-slicing"`
IgnoreEmptyFiles bool `yaml:"ignore-emptyfiles"`
IgnoreHiddenItems bool `yaml:"ignore-hiddenitems"`
ShowVersion bool `yaml:"show-version"`
Silent bool `yaml:"silent"`
NoProgress bool `yaml:"no-progress"`
Verbose bool `yaml:"verbose"`
}
42 changes: 34 additions & 8 deletions pkg/indexer/indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,34 @@ type IndexerConfig struct {
dirMatcher *regexp.Regexp
fileMatcher *regexp.Regexp

excludeSysFilter []string
excludeSysFileFilter []string
excludeSysDirFilter []string

ExcludeDirFilter []string
ExcludeFileFilter []string

IgnoreHiddenItems bool
}

func New() *IndexerConfig {
return &IndexerConfig{
IgnoreHiddenItems: true,
ExcludeFileFilter: nil,
ExcludeDirFilter: nil,
dirMatcher: nil,
fileMatcher: nil,
excludeSysFilter: []string{
excludeSysDirFilter: []string{
"System Volume Information", "$RECYCLE.BIN", "$MFT", /* Windows */
".Trash", ".Trash-1000", /* Linux */
".Trashes", /* macOS */
},
excludeSysFileFilter: []string{
"thumbs.db", "desktop.ini", /* Windows */
".ds_store", /* macOS */
},
}
}
func NewConfigured(excludeDirFilter []string, excludeFileFilter []string) *IndexerConfig {
func NewConfigured(excludeDirFilter []string, excludeFileFilter []string, ignoreHiddenItems bool) *IndexerConfig {
indexer := New()
if len(excludeFileFilter) > 0 {
indexer.ExcludeFileFilter = excludeFileFilter
Expand All @@ -46,6 +55,7 @@ func NewConfigured(excludeDirFilter []string, excludeFileFilter []string) *Index
indexer.ExcludeDirFilter = excludeDirFilter
indexer.dirMatcher = regexp.MustCompile(strings.Join(excludeDirFilter, "|"))
}
indexer.IgnoreHiddenItems = ignoreHiddenItems
return indexer
}

Expand All @@ -58,14 +68,27 @@ func (config *IndexerConfig) WalkDirectory(f fs.FS, root string, files chan File
return err
}
name := filepath.Clean(d.Name())

isSystemObj := config.IgnoreHiddenItems && config.isHidden(name)

if d.IsDir() {
if config.isSystemFolder(name) || (len(config.ExcludeDirFilter) > 0 && config.dirMatcher.MatchString(path)) {

isIgnoreDir := config.isIgnored(name, config.excludeSysDirFilter)
isExludeDir := len(config.ExcludeDirFilter) > 0 && config.dirMatcher.MatchString(path)

if isSystemObj || isIgnoreDir || isExludeDir {
return fs.SkipDir
}

} else {
if len(config.ExcludeFileFilter) > 0 && config.fileMatcher.MatchString(name) {

isIgnoreFile := config.isIgnored(name, config.excludeSysFileFilter)
isExludeFile := len(config.ExcludeFileFilter) > 0 && config.fileMatcher.MatchString(name)

if isSystemObj || isIgnoreFile || isExludeFile {
return nil
}

files <- FileFS{
FileSystem: f,
Path: path,
Expand All @@ -78,11 +101,14 @@ func (config *IndexerConfig) WalkDirectory(f fs.FS, root string, files chan File
return walkErr
}

func (config *IndexerConfig) isSystemFolder(folder string) bool {
for _, v := range config.excludeSysFilter {
if folder == v {
func (config *IndexerConfig) isIgnored(item string, collection []string) bool {
for _, v := range collection {
if strings.EqualFold(v, item) {
return true
}
}
return false
}
func (config *IndexerConfig) isHidden(name string) bool {
return len(name) > 1 && name[0] == '.'
}
102 changes: 94 additions & 8 deletions pkg/indexer/indexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ func TestIndexDirectoryWithFilesInRoot(t *testing.T) {
"DSC19842.ARW",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, nil, nil, t)
walkedFiles := walkDirectoryTestRunner(mockFiles, nil, nil, true, t)

expected := mockFiles
actual := walkedFiles
Expand All @@ -35,7 +35,7 @@ func TestIndexDirectoryWithFilesAcrossFolders(t *testing.T) {
"subfolder-2/DSC19848.ARW",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, nil, nil, t)
walkedFiles := walkDirectoryTestRunner(mockFiles, nil, nil, true, t)

expected := mockFiles
actual := walkedFiles
Expand All @@ -62,7 +62,7 @@ func TestIndexDirectoryWithDirExclusions(t *testing.T) {
"subfolder-2/DSC19848.ARW",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, t)
walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, t)

expected := []string{
mockFiles[0],
Expand Down Expand Up @@ -90,7 +90,7 @@ func TestIndexDirectoryWithFileExclusions(t *testing.T) {
"exclude.me",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, t)
walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, t)

expected := []string{
mockFiles[0],
Expand Down Expand Up @@ -120,7 +120,7 @@ func TestIndexDirectoryWithFileAndDirExclusions(t *testing.T) {
"exclude-dir/random.file",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, t)
walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, t)

expected := []string{
mockFiles[0],
Expand All @@ -138,6 +138,92 @@ func TestIndexDirectoryWithFileAndDirExclusions(t *testing.T) {
}
}

func TestIndexDirectoryWithHiddenFilesThatShouldBeIndexed(t *testing.T) {
exclude_dir := []string{}
exclude_file := []string{}

mockFiles := []string{
"DSC19841.ARW",
"DSC19842.ARW",
".tmux",
".config/smash/config.json",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, false, t)

expected := []string{
mockFiles[3],
mockFiles[2],
mockFiles[0],
mockFiles[1],
}

actual := walkedFiles

if len(actual) != len(expected) {
t.Errorf("expected %d, got %d files", len(expected), len(actual))
}

if !reflect.DeepEqual(actual, expected) {
t.Errorf("expected %v, got %v files", expected, actual)
}
}

func TestIndexDirectoryWithHiddenFiles(t *testing.T) {

exclude_dir := []string{"exclude-dir"}
exclude_file := []string{"exclude.me"}

mockFiles := []string{
"DSC19841.ARW",
"DSC19842.ARW",
".tmux",
".config/smash/config.json",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, t)

expected := []string{
mockFiles[0],
mockFiles[1],
}

actual := walkedFiles

if len(actual) != len(expected) {
t.Errorf("expected %d, got %d files", len(expected), len(actual))
}

if !reflect.DeepEqual(actual, expected) {
t.Errorf("expected %v, got %v files", expected, actual)
}
}
func TestIndexDirectoryWhichContainsSystemFiles(t *testing.T) {
exclude_dir := []string{}
exclude_file := []string{}

mockFiles := []string{
"DSC19841.ARW",
"THUMBS.DB",
"desktop.ini",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, t)

expected := []string{
mockFiles[0],
}

actual := walkedFiles

if len(actual) != len(expected) {
t.Errorf("expected %d, got %d files", len(expected), len(actual))
}

if !reflect.DeepEqual(actual, expected) {
t.Errorf("expected %v, got %v files", expected, actual)
}
}
func TestIndexDirectoryWhichContainsWindowsSystemFiles(t *testing.T) {
exclude_dir := []string{}
exclude_file := []string{}
Expand All @@ -149,7 +235,7 @@ func TestIndexDirectoryWhichContainsWindowsSystemFiles(t *testing.T) {
"$MFT/random.file",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, t)
walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, t)

expected := []string{
mockFiles[0],
Expand All @@ -174,14 +260,14 @@ func channelFileToSliceOfFiles(ch <-chan FileFS) []string {
return result
}

func walkDirectoryTestRunner(files []string, excludeDir []string, excludeFiles []string, t *testing.T) []string {
func walkDirectoryTestRunner(files []string, excludeDir []string, excludeFiles []string, ignoreHiddenItems bool, t *testing.T) []string {
fr := "mock://"
fs := createMockFS(files)
ch := make(chan FileFS)

go func() {
defer close(ch)
indexer := NewConfigured(excludeDir, excludeFiles)
indexer := NewConfigured(excludeDir, excludeFiles, ignoreHiddenItems)
err := indexer.WalkDirectory(fs, fr, ch)
if err != nil {
t.Errorf("WalkDirectory returned an error: %v", err)
Expand Down
5 changes: 5 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ $ go install github.com/thushan/smash@latest
Usage:
smash [flags] [locations-to-smash]
Usage:
smash [flags] [locations-to-smash]
Flags:
--algorithm algorithm Algorithm to use to hash files. Supported: xxhash, murmur3, md5, sha512, sha256 (full list, see readme) (default xxhash)
--base strings Base directories to use for comparison. Eg. --base=/c/dos,/c/dos/run/,/run/dos/run
Expand All @@ -57,6 +60,7 @@ Flags:
--exclude-file strings Files to exclude separated by comma. Eg. --exclude-file=.gitignore,*.csv
-h, --help help for smash
--ignore-emptyfiles Ignore & don't report on empty/zero byte files.
--ignore-hiddenitems Ignore hidden files & folders (ones that start with '.') (default true)
-p, --max-threads int Maximum threads to utilise. (default 16)
-w, --max-workers int Maximum workers to utilise when smashing. (default 8)
--no-progress Disable progress updates.
Expand All @@ -66,6 +70,7 @@ Flags:
--verbose Run in verbose mode.
-v, --version Show version information.
```

See the [full list of algorithms](./docs/algorithms.md) supported.
Expand Down

0 comments on commit e586c89

Please sign in to comment.