diff --git a/docs/transformers/transform_newdomaintracker.md b/docs/transformers/transform_newdomaintracker.md index a4bde2ad..93be1bca 100644 --- a/docs/transformers/transform_newdomaintracker.md +++ b/docs/transformers/transform_newdomaintracker.md @@ -25,11 +25,16 @@ The **New Domain Tracker** transformer identifies domains that are newly observe * `cache-size` (integer) > Maximum number of domains to track +* `white-domains-file` (string) + > path file to domain white list, domains list can be a partial domain name with regexp expression + + ```yaml transforms: new-domain-tracker: ttl: 3600 cache-size: 100000 + white-domains-file: "" ``` ## Cache @@ -38,6 +43,25 @@ The New Domain Tracker uses an **LRU Cache** to manage memory consumption effici The LRU Cache ensures finite memory usage but may cause some domains to be forgotten if the cache size is too small. +## Whitelist + +Example of configuration to load a whitelist of domains to ignore. + +```yaml +transforms: + new-domain-tracker: + ttl: 3600 + cache-size: 100000 + white-domains-file: /tmp/whitelist_domain.txt +``` + +Example of content for the file `/tmp/whitelist_domain.txt` + +``` +(mail|wwww).google.com +github.com +``` + ## Persistence To ensure continuity across application restarts, you can enable the persistence feature by specifying a file path (persistence). The transformer will save the domain cache to this file and reload it on startup. diff --git a/tests/testsdata/newdomain_whitelist_regex.txt b/tests/testsdata/newdomain_whitelist_regex.txt new file mode 100644 index 00000000..0196c3c2 --- /dev/null +++ b/tests/testsdata/newdomain_whitelist_regex.txt @@ -0,0 +1,2 @@ +.*\.google\.com +github\.com \ No newline at end of file diff --git a/transformers/newdomain.go b/transformers/newdomain.go index 6413cdb8..51f17159 100644 --- a/transformers/newdomain.go +++ b/transformers/newdomain.go @@ -16,14 +16,15 @@ import ( // NewDomainTracker transformer to detect newly observed domains type NewDomainTracker struct { - ttl time.Duration // Time window to consider a domain as "new" - cache *lru.Cache // LRU Cache to store observed domains - logInfo func(msg string, v ...interface{}) - logError func(msg string, v ...interface{}) + ttl time.Duration // Time window to consider a domain as "new" + cache *lru.Cache // LRU Cache to store observed domains + whitelist map[string]*regexp.Regexp // Whitelisted domains + logInfo func(msg string, v ...interface{}) + logError func(msg string, v ...interface{}) } // NewNewDomainTracker initializes the NewDomainTracker transformer -func NewNewDomainTracker(ttl time.Duration, maxSize int, logInfo, logError func(msg string, v ...interface{})) (*NewDomainTracker, error) { +func NewNewDomainTracker(ttl time.Duration, maxSize int, whitelist map[string]*regexp.Regexp, logInfo, logError func(msg string, v ...interface{})) (*NewDomainTracker, error) { cache, err := lru.New(maxSize) if err != nil { return nil, err @@ -34,20 +35,38 @@ func NewNewDomainTracker(ttl time.Duration, maxSize int, logInfo, logError func( } return &NewDomainTracker{ - ttl: ttl, - cache: cache, - logInfo: logInfo, - logError: logError, + ttl: ttl, + cache: cache, + whitelist: whitelist, + logInfo: logInfo, + logError: logError, }, nil } +// isWhitelisted checks if a domain or its subdomain is in the whitelist +func (ndt *NewDomainTracker) isWhitelisted(domain string) bool { + for _, d := range ndt.whitelist { + if d.MatchString(domain) { + return true + } + } + return false +} + // IsNewDomain checks if the domain is newly observed func (ndt *NewDomainTracker) IsNewDomain(domain string) bool { + // Check if the domain is whitelisted + if ndt.isWhitelisted(domain) { + return false + } + now := time.Now() // Check if the domain exists in the cache if lastSeen, exists := ndt.cache.Get(domain); exists { + fmt.Println("exists") if now.Sub(lastSeen.(time.Time)) < ndt.ttl { + fmt.Println(now.Sub(lastSeen.(time.Time)), ndt.ttl) // Domain was recently seen, not new return false } @@ -91,7 +110,7 @@ func (t *NewDomainTrackerTransform) GetTransforms() ([]Subtransform, error) { // Initialize the domain tracker ttl := time.Duration(t.config.NewDomainTracker.TTL) * time.Second maxSize := t.config.NewDomainTracker.CacheSize - tracker, err := NewNewDomainTracker(ttl, maxSize, t.LogInfo, t.LogError) + tracker, err := NewNewDomainTracker(ttl, maxSize, t.listDomainsRegex, t.LogInfo, t.LogError) if err != nil { return nil, err } diff --git a/transformers/newdomain_test.go b/transformers/newdomain_test.go index 5078bfee..fddf67e3 100644 --- a/transformers/newdomain_test.go +++ b/transformers/newdomain_test.go @@ -9,7 +9,7 @@ import ( "github.com/dmachard/go-logger" ) -func TestNewDomainTracker(t *testing.T) { +func TestNewDomainTracker_IsNew(t *testing.T) { // config config := pkgconfig.GetFakeConfigTransformers() config.NewDomainTracker.Enable = true @@ -43,5 +43,34 @@ func TestNewDomainTracker(t *testing.T) { if result, _ := tracker.trackNewDomain(&dm); result != ReturnKeep { t.Errorf("3. this domain should be new!!") } +} + +func TestNewDomainTracker_Whitelist(t *testing.T) { + // config + config := pkgconfig.GetFakeConfigTransformers() + config.NewDomainTracker.Enable = true + config.NewDomainTracker.TTL = 2 + config.NewDomainTracker.CacheSize = 10 + config.NewDomainTracker.WhiteDomainsFile = "../tests/testsdata/newdomain_whitelist_regex.txt" + + // init subproccesor + outChans := []chan dnsutils.DNSMessage{} + tracker := NewNewDomainTrackerTransform(config, logger.New(false), "test", 0, outChans) + _, err := tracker.GetTransforms() + if err != nil { + t.Error("fail to init transform", err) + } + // first test, check domain in whilist + dm := dnsutils.GetFakeDNSMessage() + dm.DNS.Qname = testURL1 + if result, _ := tracker.trackNewDomain(&dm); result != ReturnDrop { + t.Errorf("2. this domain should NOT be new!!") + } + + // second test, check domain in whilist + dm = dnsutils.GetFakeDNSMessage() + if result, _ := tracker.trackNewDomain(&dm); result != ReturnKeep { + t.Errorf("2. this domain should be new!!") + } } diff --git a/transformers/transformers.go b/transformers/transformers.go index d56ce2f4..ef422cf5 100644 --- a/transformers/transformers.go +++ b/transformers/transformers.go @@ -108,7 +108,7 @@ func (p *Transforms) Prepare() error { for _, transform := range p.availableTransforms { subtransforms, err := transform.GetTransforms() if err != nil { - p.LogError("error on init subtransforms:", err) + p.LogError("error on init subtransforms: %v", err) continue } for _, subtransform := range subtransforms {