diff --git a/README.md b/README.md index 3ab6b122..1ec6d382 100644 --- a/README.md +++ b/README.md @@ -123,6 +123,10 @@ This option is disabled by default. If set to `true`, the bot will check the mes This option is disabled by default. If set to `true`, the bot will check the message for the presence of any image. If the message contains images but no text, it will be marked as spam. +**Video only check** + +This option is disabled by default. If set to `true`, the bot will check the message for the presence of any video or video notes. If the message contains videos but no text, it will be marked as spam. + **Multi-language words** Using words that mix characters from multiple languages is a common spam technique. To detect such messages, the bot can check the message for the presence of such words. This option is disabled by default and can be enabled with the `--multi-lang=, [$MULTI_LANG]` parameter. Setting it to a number above `0` will enable this check, and the bot will mark the message as spam if it contains words with characters from more than one language in more than the specified number of words. @@ -269,6 +273,7 @@ cas: meta: --meta.links-limit= max links in message, disabled by default (default: -1) [$META_LINKS_LIMIT] --meta.image-only enable image only check [$META_IMAGE_ONLY] + --meta.video-only enable video only check [$META_VIDEO_ONLY] --meta.links-only enable links only check [$META_LINKS_ONLY] openai: diff --git a/app/bot/bot.go b/app/bot/bot.go index fab047ea..a2360dce 100644 --- a/app/bot/bot.go +++ b/app/bot/bot.go @@ -56,6 +56,9 @@ type Message struct { Sent time.Time SenderChat SenderChat `json:"sender_chat,omitempty"` } `json:",omitempty"` + + WithVideo bool `json:",omitempty"` + WithVideoNote bool `json:",omitempty"` } // Entity represents one special entity in a text message. diff --git a/app/bot/spam.go b/app/bot/spam.go index 0ea33d1e..6d85f812 100644 --- a/app/bot/spam.go +++ b/app/bot/spam.go @@ -83,6 +83,9 @@ func (s *SpamFilter) OnMessage(msg Message) (response Response) { if msg.Image != nil { spamReq.Meta.Images = 1 } + if msg.WithVideo || msg.WithVideoNote { + spamReq.Meta.HasVideo = true + } spamReq.Meta.Links = strings.Count(msg.Text, "http://") + strings.Count(msg.Text, "https://") isSpam, checkResults := s.Check(spamReq) crs := []string{} diff --git a/app/events/events.go b/app/events/events.go index 28de049d..c5ce4c32 100644 --- a/app/events/events.go +++ b/app/events/events.go @@ -268,6 +268,10 @@ func transform(msg *tbapi.Message) *bot.Message { Caption: msg.Caption, Entities: transformEntities(msg.CaptionEntities), } + case msg.Video != nil: + message.WithVideo = true + case msg.VideoNote != nil: + message.WithVideoNote = true } // fill in the message's reply-to message diff --git a/app/main.go b/app/main.go index 5d85dc63..f2df5cb1 100644 --- a/app/main.go +++ b/app/main.go @@ -69,6 +69,7 @@ type options struct { LinksLimit int `long:"links-limit" env:"LINKS_LIMIT" default:"-1" description:"max links in message, disabled by default"` ImageOnly bool `long:"image-only" env:"IMAGE_ONLY" description:"enable image only check"` LinksOnly bool `long:"links-only" env:"LINKS_ONLY" description:"enable links only check"` + VideosOnly bool `long:"video-only" env:"VIDEO_ONLY" description:"enable video only check"` } `group:"meta" namespace:"meta" env-namespace:"META"` OpenAI struct { @@ -361,6 +362,7 @@ func activateServer(ctx context.Context, opts options, sf *bot.SpamFilter, loc * MetaLinksLimit: opts.Meta.LinksLimit, MetaLinksOnly: opts.Meta.LinksOnly, MetaImageOnly: opts.Meta.ImageOnly, + MetaVideoOnly: opts.Meta.VideosOnly, MultiLangLimit: opts.MultiLangWords, OpenAIEnabled: opts.OpenAI.Token != "" || opts.OpenAI.APIBase != "", SamplesDataPath: opts.Files.SamplesDataPath, @@ -450,6 +452,10 @@ func makeDetector(opts options) *tgspam.Detector { log.Printf("[INFO] image only check enabled") metaChecks = append(metaChecks, tgspam.ImagesCheck()) } + if opts.Meta.VideosOnly { + log.Printf("[INFO] videos only check enabled") + metaChecks = append(metaChecks, tgspam.VideosCheck()) + } if opts.Meta.LinksLimit >= 0 { log.Printf("[INFO] links check enabled, limit: %d", opts.Meta.LinksLimit) metaChecks = append(metaChecks, tgspam.LinksCheck(opts.Meta.LinksLimit)) diff --git a/app/webapi/webapi.go b/app/webapi/webapi.go index aba7ef12..7dacc22b 100644 --- a/app/webapi/webapi.go +++ b/app/webapi/webapi.go @@ -69,6 +69,7 @@ type Settings struct { MetaLinksLimit int `json:"meta_links_limit"` MetaLinksOnly bool `json:"meta_links_only"` MetaImageOnly bool `json:"meta_image_only"` + MetaVideoOnly bool `json:"meta_video_only"` MultiLangLimit int `json:"multi_lang_limit"` OpenAIEnabled bool `json:"openai_enabled"` SamplesDataPath string `json:"samples_data_path"` diff --git a/lib/spamcheck/spamcheck.go b/lib/spamcheck/spamcheck.go index 76bb9edc..c4c6979d 100644 --- a/lib/spamcheck/spamcheck.go +++ b/lib/spamcheck/spamcheck.go @@ -12,13 +12,14 @@ type Request struct { // MetaData is a meta-info about the message, provided by the client. type MetaData struct { - Images int `json:"images"` // number of images in the message - Links int `json:"links"` // number of links in the message + Images int `json:"images"` // number of images in the message + Links int `json:"links"` // number of links in the message + HasVideo bool `json:"has_video"` // true if the message has a video or video note } func (r *Request) String() string { - return fmt.Sprintf("msg:%q, user:%q, id:%s, images:%d, links:%d", - r.Msg, r.UserName, r.UserID, r.Meta.Images, r.Meta.Links) + return fmt.Sprintf("msg:%q, user:%q, id:%s, images:%d, links:%d, has_video:%v", + r.Msg, r.UserName, r.UserID, r.Meta.Images, r.Meta.Links, r.Meta.HasVideo) } // Response is a result of spam check. diff --git a/lib/spamcheck/spamcheck_test.go b/lib/spamcheck/spamcheck_test.go index 3b78982d..e521b718 100644 --- a/lib/spamcheck/spamcheck_test.go +++ b/lib/spamcheck/spamcheck_test.go @@ -48,18 +48,18 @@ func TestRequestString(t *testing.T) { }{ { name: "Normal message", - request: Request{"Hello, world!", "123", "Alice", MetaData{2, 1}}, - expected: `msg:"Hello, world!", user:"Alice", id:123, images:2, links:1`, + request: Request{"Hello, world!", "123", "Alice", MetaData{2, 1, false}}, + expected: `msg:"Hello, world!", user:"Alice", id:123, images:2, links:1, has_video:false`, }, { name: "Spam message", - request: Request{"Spam message", "456", "Bob", MetaData{0, 3}}, - expected: `msg:"Spam message", user:"Bob", id:456, images:0, links:3`, + request: Request{"Spam message", "456", "Bob", MetaData{0, 3, true}}, + expected: `msg:"Spam message", user:"Bob", id:456, images:0, links:3, has_video:true`, }, { name: "Empty fields", - request: Request{"", "", "", MetaData{0, 0}}, - expected: `msg:"", user:"", id:, images:0, links:0`, + request: Request{"", "", "", MetaData{0, 0, false}}, + expected: `msg:"", user:"", id:, images:0, links:0, has_video:false`, }, } diff --git a/lib/tgspam/metachecks.go b/lib/tgspam/metachecks.go index c582c3dc..03867e42 100644 --- a/lib/tgspam/metachecks.go +++ b/lib/tgspam/metachecks.go @@ -71,3 +71,18 @@ func ImagesCheck() MetaCheck { return spamcheck.Response{Spam: false, Name: "images", Details: "no images without text"} } } + +// VideosCheck is a function that returns a MetaCheck function. +// It checks if the message has a video or video note and the message is empty (i.e. it contains only videos). +func VideosCheck() MetaCheck { + return func(req spamcheck.Request) spamcheck.Response { + if req.Meta.HasVideo && req.Msg == "" { + return spamcheck.Response{ + Name: "videos", + Spam: true, + Details: "videos without text", + } + } + return spamcheck.Response{Spam: false, Name: "videos", Details: "no videos without text"} + } +} diff --git a/lib/tgspam/metachecks_test.go b/lib/tgspam/metachecks_test.go index 7d993775..77fb61a3 100644 --- a/lib/tgspam/metachecks_test.go +++ b/lib/tgspam/metachecks_test.go @@ -185,3 +185,67 @@ func TestImagesCheck(t *testing.T) { }) } } + +func TestVideosCheck(t *testing.T) { + tests := []struct { + name string + req spamcheck.Request + expected spamcheck.Response + }{ + { + name: "No videos and text", + req: spamcheck.Request{ + Msg: "This is a message with text.", + Meta: spamcheck.MetaData{ + HasVideo: false, + }, + }, + expected: spamcheck.Response{Name: "videos", Spam: false, Details: "no videos without text"}, + }, + { + name: "Videos with text", + req: spamcheck.Request{ + Msg: "This is a message with text and a video.", + Meta: spamcheck.MetaData{ + HasVideo: true, + }, + }, + expected: spamcheck.Response{Name: "videos", Spam: false, Details: "no videos without text"}, + }, + { + name: "Videos without text", + req: spamcheck.Request{ + Msg: "", + Meta: spamcheck.MetaData{ + HasVideo: true, + }, + }, + expected: spamcheck.Response{ + Name: "videos", + Spam: true, + Details: "videos without text", + }, + }, + { + name: "Video note without text", + req: spamcheck.Request{ + Msg: "", + Meta: spamcheck.MetaData{ + HasVideo: true, + }, + }, + expected: spamcheck.Response{ + Name: "videos", + Spam: true, + Details: "videos without text", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + check := VideosCheck() + assert.Equal(t, tt.expected, check(tt.req)) + }) + } +}