From a7fa97a214b749952d5a5c7270cd0c5be7e6433d Mon Sep 17 00:00:00 2001 From: Kailash Nadh Date: Wed, 10 Nov 2021 20:29:21 +0530 Subject: [PATCH] Add scanning of full bounce email body for bounce headers. Closes #492. --- go.mod | 3 ++- go.sum | 6 ++++-- internal/bounce/mailbox/pop.go | 37 +++++++++++++++++++++++++++++----- 3 files changed, 38 insertions(+), 8 deletions(-) diff --git a/go.mod b/go.mod index 0e6cb76c1..19a3c33d7 100644 --- a/go.mod +++ b/go.mod @@ -6,13 +6,14 @@ require ( github.com/Masterminds/sprig/v3 v3.2.2 github.com/dgrijalva/jwt-go v3.2.0+incompatible // indirect github.com/disintegration/imaging v1.6.2 + github.com/emersion/go-message v0.15.0 github.com/fsnotify/fsnotify v1.5.1 // indirect github.com/gofrs/uuid v4.0.0+incompatible github.com/google/uuid v1.3.0 // indirect github.com/huandu/xstrings v1.3.2 // indirect github.com/imdario/mergo v0.3.12 // indirect github.com/jmoiron/sqlx v1.3.4 - github.com/knadh/go-pop3 v0.1.0 + github.com/knadh/go-pop3 v0.3.0 github.com/knadh/goyesql/v2 v2.1.2 github.com/knadh/koanf v1.2.3 github.com/knadh/smtppool v0.3.1 diff --git a/go.sum b/go.sum index 4d3d503c4..aeaf37dbb 100644 --- a/go.sum +++ b/go.sum @@ -76,8 +76,10 @@ github.com/joho/godotenv v1.3.0 h1:Zjp+RcGpHhGlrMbJzXTrZZPrWj+1vfm90La1wgB6Bhc= github.com/joho/godotenv v1.3.0/go.mod h1:7hK45KPybAkOC6peb+G5yklZfMxEjkZhHbwpqxOKXbg= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= -github.com/knadh/go-pop3 v0.1.0 h1:MECWomq2uEGeuR7O2TjfzD63H47UFLKOqH1bSH7yhRU= -github.com/knadh/go-pop3 v0.1.0/go.mod h1:a5kUJzrBB6kec+tNJl+3Z64ROgByKBdcyub+mhZMAfI= +github.com/knadh/go-pop3 v0.2.0 h1:fr4hi7hmX+yhHC8XW13+h5O1fnWQnG3cmOUt6w0Bgz4= +github.com/knadh/go-pop3 v0.2.0/go.mod h1:a5kUJzrBB6kec+tNJl+3Z64ROgByKBdcyub+mhZMAfI= +github.com/knadh/go-pop3 v0.3.0 h1:h6wh28lyT/vUBMSiSwDDUXZjHH6zL8CM8WYCPbETM4Y= +github.com/knadh/go-pop3 v0.3.0/go.mod h1:a5kUJzrBB6kec+tNJl+3Z64ROgByKBdcyub+mhZMAfI= github.com/knadh/goyesql/v2 v2.1.2 h1:XQrGiXSyeaRchdJE7odfzmodn3eAyhD5D6SxAkU2+4Q= github.com/knadh/goyesql/v2 v2.1.2/go.mod h1:is+wK/XQBukYK3DdKfpJRyDH9U/ZTMyX2u6DFijjRnI= github.com/knadh/koanf v1.2.3 h1:2Rkr0YhhYk+4QEOm800Q3Pu0Wi87svTxM6uuEb4WhYw= diff --git a/internal/bounce/mailbox/pop.go b/internal/bounce/mailbox/pop.go index a8f32b434..90f82aa83 100644 --- a/internal/bounce/mailbox/pop.go +++ b/internal/bounce/mailbox/pop.go @@ -2,8 +2,10 @@ package mailbox import ( "encoding/json" + "regexp" "time" + "github.com/emersion/go-message" "github.com/knadh/go-pop3" "github.com/knadh/listmonk/models" ) @@ -14,6 +16,11 @@ type POP struct { client *pop3.Client } +var ( + reCampUUID = regexp.MustCompile(`(?m)(?m:^` + models.EmailHeaderCampaignUUID + `:\s+?)([a-z0-9\-]{36})`) + reSubUUID = regexp.MustCompile(`(?m)(?m:^` + models.EmailHeaderSubscriberUUID + `:\s+?)([a-z0-9\-]{36})`) +) + // NewPOP returns a new instance of the POP mailbox client. func NewPOP(opt Opt) *POP { return &POP{ @@ -61,21 +68,41 @@ func (p *POP) Scan(limit int, ch chan models.Bounce) error { // Download messages. for id := 1; id <= count; id++ { - // Download just one line of the body as the body is not required at all. - m, err := c.Top(id, 1) + // Retrieve the raw bytes of the message. + b, err := c.RetrRaw(id) + if err != nil { + return err + } + + // Parse the message. + m, err := message.Read(b) if err != nil { return err } + // Check if the identifiers are available in the parsed message. var ( campUUID = m.Header.Get(models.EmailHeaderCampaignUUID) subUUID = m.Header.Get(models.EmailHeaderSubscriberUUID) - date, _ = time.Parse("Mon, 02 Jan 2006 15:04:05 -0700", m.Header.Get("Date")) ) + // If they are not, try to extract them from the message body. + if campUUID == "" { + if u := reCampUUID.FindSubmatch(b.Bytes()); len(u) == 2 { + campUUID = string(u[1]) + } + } + if subUUID == "" { + if u := reSubUUID.FindSubmatch(b.Bytes()); len(u) == 2 { + subUUID = string(u[1]) + } + } + if campUUID == "" || subUUID == "" { continue } + + date, _ := time.Parse("Mon, 02 Jan 2006 15:04:05 -0700", m.Header.Get("Date")) if date.IsZero() { date = time.Now() } @@ -98,8 +125,8 @@ func (p *POP) Scan(limit int, ch chan models.Bounce) error { select { case ch <- models.Bounce{ Type: "hard", - CampaignUUID: m.Header.Get(models.EmailHeaderCampaignUUID), - SubscriberUUID: m.Header.Get(models.EmailHeaderSubscriberUUID), + CampaignUUID: campUUID, + SubscriberUUID: subUUID, Source: p.opt.Host, CreatedAt: date, Meta: json.RawMessage(meta),