Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds ignoring of system files and folders. #32

Merged
merged 1 commit into from
Nov 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions internal/cli/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ func init() {
flags.IntVarP(&af.UpdateSeconds, "update-seconds", "", 5, "Update progress every x seconds.")
flags.BoolVarP(&af.DisableSlicing, "disable-slicing", "", false, "Disable slicing (hashes full file).")
flags.BoolVarP(&af.IgnoreEmptyFiles, "ignore-emptyfiles", "", false, "Ignore & don't report on empty/zero byte files.")
flags.BoolVarP(&af.IgnoreHiddenItems, "ignore-hiddenitems", "", true, "Ignore hidden files & folders (ones that start with '.')")
flags.StringVarP(&af.OutputFile, "output-file", "o", "", "Export as JSON")
flags.BoolVarP(&af.Silent, "silent", "q", false, "Run in silent mode.")
flags.BoolVarP(&af.Verbose, "verbose", "", false, "Run in verbose mode.")
Expand Down
2 changes: 1 addition & 1 deletion internal/smash/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ func (app *App) Run() error {
}

sl := slicer.New(algorithms.Algorithm(app.Flags.Algorithm))
wk := indexer.NewConfigured(app.Flags.ExcludeDir, app.Flags.ExcludeFile)
wk := indexer.NewConfigured(app.Flags.ExcludeDir, app.Flags.ExcludeFile, app.Flags.IgnoreHiddenItems)
slo := slicer.SlicerOptions{
DisableSlicing: app.Flags.DisableSlicing,
DisableMeta: false, // TODO: Flag this
Expand Down
2 changes: 1 addition & 1 deletion internal/smash/configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ func (app *App) printConfiguration() {
theme.Println(b.Sprint("Locations: "), theme.ColourConfig(strings.Join(app.Locations, ", ")))

if len(f.ExcludeDir) > 0 || len(f.ExcludeFile) > 0 {
theme.StyleBold.Println(b.Sprint("Excluded"))
theme.StyleBold.Println("Excluded")
if len(f.ExcludeDir) > 0 {
theme.Println(b.Sprint(" Dirs: "), theme.ColourConfigA(strings.Join(f.ExcludeDir, ", ")))
}
Expand Down
29 changes: 15 additions & 14 deletions internal/smash/flags.go
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
package smash

type Flags struct {
OutputFile string `yaml:"output"`
Base []string `yaml:"base"`
ExcludeDir []string `yaml:"exclude-dir"`
ExcludeFile []string `yaml:"exclude-file"`
Algorithm int `yaml:"algorithm"`
MaxThreads int `yaml:"max-threads"`
MaxWorkers int `yaml:"max-workers"`
UpdateSeconds int `yaml:"update-seconds"`
DisableSlicing bool `yaml:"disable-slicing"`
IgnoreEmptyFiles bool `yaml:"ignore-emptyfiles"`
ShowVersion bool `yaml:"show-version"`
Silent bool `yaml:"silent"`
NoProgress bool `yaml:"no-progress"`
Verbose bool `yaml:"verbose"`
OutputFile string `yaml:"output"`
Base []string `yaml:"base"`
ExcludeDir []string `yaml:"exclude-dir"`
ExcludeFile []string `yaml:"exclude-file"`
Algorithm int `yaml:"algorithm"`
MaxThreads int `yaml:"max-threads"`
MaxWorkers int `yaml:"max-workers"`
UpdateSeconds int `yaml:"update-seconds"`
DisableSlicing bool `yaml:"disable-slicing"`
IgnoreEmptyFiles bool `yaml:"ignore-emptyfiles"`
IgnoreHiddenItems bool `yaml:"ignore-hiddenitems"`
ShowVersion bool `yaml:"show-version"`
Silent bool `yaml:"silent"`
NoProgress bool `yaml:"no-progress"`
Verbose bool `yaml:"verbose"`
}
42 changes: 34 additions & 8 deletions pkg/indexer/indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,34 @@ type IndexerConfig struct {
dirMatcher *regexp.Regexp
fileMatcher *regexp.Regexp

excludeSysFilter []string
excludeSysFileFilter []string
excludeSysDirFilter []string

ExcludeDirFilter []string
ExcludeFileFilter []string

IgnoreHiddenItems bool
}

func New() *IndexerConfig {
return &IndexerConfig{
IgnoreHiddenItems: true,
ExcludeFileFilter: nil,
ExcludeDirFilter: nil,
dirMatcher: nil,
fileMatcher: nil,
excludeSysFilter: []string{
excludeSysDirFilter: []string{
"System Volume Information", "$RECYCLE.BIN", "$MFT", /* Windows */
".Trash", ".Trash-1000", /* Linux */
".Trashes", /* macOS */
},
excludeSysFileFilter: []string{
"thumbs.db", "desktop.ini", /* Windows */
".ds_store", /* macOS */
},
}
}
func NewConfigured(excludeDirFilter []string, excludeFileFilter []string) *IndexerConfig {
func NewConfigured(excludeDirFilter []string, excludeFileFilter []string, ignoreHiddenItems bool) *IndexerConfig {
indexer := New()
if len(excludeFileFilter) > 0 {
indexer.ExcludeFileFilter = excludeFileFilter
Expand All @@ -46,6 +55,7 @@ func NewConfigured(excludeDirFilter []string, excludeFileFilter []string) *Index
indexer.ExcludeDirFilter = excludeDirFilter
indexer.dirMatcher = regexp.MustCompile(strings.Join(excludeDirFilter, "|"))
}
indexer.IgnoreHiddenItems = ignoreHiddenItems
return indexer
}

Expand All @@ -58,14 +68,27 @@ func (config *IndexerConfig) WalkDirectory(f fs.FS, root string, files chan File
return err
}
name := filepath.Clean(d.Name())

isSystemObj := config.IgnoreHiddenItems && config.isHidden(name)

if d.IsDir() {
if config.isSystemFolder(name) || (len(config.ExcludeDirFilter) > 0 && config.dirMatcher.MatchString(path)) {

isIgnoreDir := config.isIgnored(name, config.excludeSysDirFilter)
isExludeDir := len(config.ExcludeDirFilter) > 0 && config.dirMatcher.MatchString(path)

if isSystemObj || isIgnoreDir || isExludeDir {
return fs.SkipDir
}

} else {
if len(config.ExcludeFileFilter) > 0 && config.fileMatcher.MatchString(name) {

isIgnoreFile := config.isIgnored(name, config.excludeSysFileFilter)
isExludeFile := len(config.ExcludeFileFilter) > 0 && config.fileMatcher.MatchString(name)

if isSystemObj || isIgnoreFile || isExludeFile {
return nil
}

files <- FileFS{
FileSystem: f,
Path: path,
Expand All @@ -78,11 +101,14 @@ func (config *IndexerConfig) WalkDirectory(f fs.FS, root string, files chan File
return walkErr
}

func (config *IndexerConfig) isSystemFolder(folder string) bool {
for _, v := range config.excludeSysFilter {
if folder == v {
func (config *IndexerConfig) isIgnored(item string, collection []string) bool {
for _, v := range collection {
if strings.EqualFold(v, item) {
return true
}
}
return false
}
func (config *IndexerConfig) isHidden(name string) bool {
return len(name) > 1 && name[0] == '.'
}
102 changes: 94 additions & 8 deletions pkg/indexer/indexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ func TestIndexDirectoryWithFilesInRoot(t *testing.T) {
"DSC19842.ARW",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, nil, nil, t)
walkedFiles := walkDirectoryTestRunner(mockFiles, nil, nil, true, t)

expected := mockFiles
actual := walkedFiles
Expand All @@ -35,7 +35,7 @@ func TestIndexDirectoryWithFilesAcrossFolders(t *testing.T) {
"subfolder-2/DSC19848.ARW",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, nil, nil, t)
walkedFiles := walkDirectoryTestRunner(mockFiles, nil, nil, true, t)

expected := mockFiles
actual := walkedFiles
Expand All @@ -62,7 +62,7 @@ func TestIndexDirectoryWithDirExclusions(t *testing.T) {
"subfolder-2/DSC19848.ARW",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, t)
walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, t)

expected := []string{
mockFiles[0],
Expand Down Expand Up @@ -90,7 +90,7 @@ func TestIndexDirectoryWithFileExclusions(t *testing.T) {
"exclude.me",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, t)
walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, t)

expected := []string{
mockFiles[0],
Expand Down Expand Up @@ -120,7 +120,7 @@ func TestIndexDirectoryWithFileAndDirExclusions(t *testing.T) {
"exclude-dir/random.file",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, t)
walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, t)

expected := []string{
mockFiles[0],
Expand All @@ -138,6 +138,92 @@ func TestIndexDirectoryWithFileAndDirExclusions(t *testing.T) {
}
}

func TestIndexDirectoryWithHiddenFilesThatShouldBeIndexed(t *testing.T) {
exclude_dir := []string{}
exclude_file := []string{}

mockFiles := []string{
"DSC19841.ARW",
"DSC19842.ARW",
".tmux",
".config/smash/config.json",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, false, t)

expected := []string{
mockFiles[3],
mockFiles[2],
mockFiles[0],
mockFiles[1],
}

actual := walkedFiles

if len(actual) != len(expected) {
t.Errorf("expected %d, got %d files", len(expected), len(actual))
}

if !reflect.DeepEqual(actual, expected) {
t.Errorf("expected %v, got %v files", expected, actual)
}
}

func TestIndexDirectoryWithHiddenFiles(t *testing.T) {

exclude_dir := []string{"exclude-dir"}
exclude_file := []string{"exclude.me"}

mockFiles := []string{
"DSC19841.ARW",
"DSC19842.ARW",
".tmux",
".config/smash/config.json",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, t)

expected := []string{
mockFiles[0],
mockFiles[1],
}

actual := walkedFiles

if len(actual) != len(expected) {
t.Errorf("expected %d, got %d files", len(expected), len(actual))
}

if !reflect.DeepEqual(actual, expected) {
t.Errorf("expected %v, got %v files", expected, actual)
}
}
func TestIndexDirectoryWhichContainsSystemFiles(t *testing.T) {
exclude_dir := []string{}
exclude_file := []string{}

mockFiles := []string{
"DSC19841.ARW",
"THUMBS.DB",
"desktop.ini",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, t)

expected := []string{
mockFiles[0],
}

actual := walkedFiles

if len(actual) != len(expected) {
t.Errorf("expected %d, got %d files", len(expected), len(actual))
}

if !reflect.DeepEqual(actual, expected) {
t.Errorf("expected %v, got %v files", expected, actual)
}
}
func TestIndexDirectoryWhichContainsWindowsSystemFiles(t *testing.T) {
exclude_dir := []string{}
exclude_file := []string{}
Expand All @@ -149,7 +235,7 @@ func TestIndexDirectoryWhichContainsWindowsSystemFiles(t *testing.T) {
"$MFT/random.file",
}

walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, t)
walkedFiles := walkDirectoryTestRunner(mockFiles, exclude_dir, exclude_file, true, t)

expected := []string{
mockFiles[0],
Expand All @@ -174,14 +260,14 @@ func channelFileToSliceOfFiles(ch <-chan FileFS) []string {
return result
}

func walkDirectoryTestRunner(files []string, excludeDir []string, excludeFiles []string, t *testing.T) []string {
func walkDirectoryTestRunner(files []string, excludeDir []string, excludeFiles []string, ignoreHiddenItems bool, t *testing.T) []string {
fr := "mock://"
fs := createMockFS(files)
ch := make(chan FileFS)

go func() {
defer close(ch)
indexer := NewConfigured(excludeDir, excludeFiles)
indexer := NewConfigured(excludeDir, excludeFiles, ignoreHiddenItems)
err := indexer.WalkDirectory(fs, fr, ch)
if err != nil {
t.Errorf("WalkDirectory returned an error: %v", err)
Expand Down
5 changes: 5 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ $ go install github.com/thushan/smash@latest
Usage:
smash [flags] [locations-to-smash]

Usage:
smash [flags] [locations-to-smash]

Flags:
--algorithm algorithm Algorithm to use to hash files. Supported: xxhash, murmur3, md5, sha512, sha256 (full list, see readme) (default xxhash)
--base strings Base directories to use for comparison. Eg. --base=/c/dos,/c/dos/run/,/run/dos/run
Expand All @@ -57,6 +60,7 @@ Flags:
--exclude-file strings Files to exclude separated by comma. Eg. --exclude-file=.gitignore,*.csv
-h, --help help for smash
--ignore-emptyfiles Ignore & don't report on empty/zero byte files.
--ignore-hiddenitems Ignore hidden files & folders (ones that start with '.') (default true)
-p, --max-threads int Maximum threads to utilise. (default 16)
-w, --max-workers int Maximum workers to utilise when smashing. (default 8)
--no-progress Disable progress updates.
Expand All @@ -66,6 +70,7 @@ Flags:
--verbose Run in verbose mode.
-v, --version Show version information.


```

See the [full list of algorithms](./docs/algorithms.md) supported.
Expand Down