Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add new meta check for video only #166

Merged
merged 1 commit into from
Nov 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,10 @@ This option is disabled by default. If set to `true`, the bot will check the mes

This option is disabled by default. If set to `true`, the bot will check the message for the presence of any image. If the message contains images but no text, it will be marked as spam.

**Video only check**

This option is disabled by default. If set to `true`, the bot will check the message for the presence of any video or video notes. If the message contains videos but no text, it will be marked as spam.

**Multi-language words**

Using words that mix characters from multiple languages is a common spam technique. To detect such messages, the bot can check the message for the presence of such words. This option is disabled by default and can be enabled with the `--multi-lang=, [$MULTI_LANG]` parameter. Setting it to a number above `0` will enable this check, and the bot will mark the message as spam if it contains words with characters from more than one language in more than the specified number of words.
Expand Down Expand Up @@ -269,6 +273,7 @@ cas:
meta:
--meta.links-limit= max links in message, disabled by default (default: -1) [$META_LINKS_LIMIT]
--meta.image-only enable image only check [$META_IMAGE_ONLY]
--meta.video-only enable video only check [$META_VIDEO_ONLY]
--meta.links-only enable links only check [$META_LINKS_ONLY]

openai:
Expand Down
3 changes: 3 additions & 0 deletions app/bot/bot.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ type Message struct {
Sent time.Time
SenderChat SenderChat `json:"sender_chat,omitempty"`
} `json:",omitempty"`

WithVideo bool `json:",omitempty"`
WithVideoNote bool `json:",omitempty"`
}

// Entity represents one special entity in a text message.
Expand Down
3 changes: 3 additions & 0 deletions app/bot/spam.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ func (s *SpamFilter) OnMessage(msg Message) (response Response) {
if msg.Image != nil {
spamReq.Meta.Images = 1
}
if msg.WithVideo || msg.WithVideoNote {
spamReq.Meta.HasVideo = true
}
spamReq.Meta.Links = strings.Count(msg.Text, "http://") + strings.Count(msg.Text, "https://")
isSpam, checkResults := s.Check(spamReq)
crs := []string{}
Expand Down
4 changes: 4 additions & 0 deletions app/events/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,10 @@ func transform(msg *tbapi.Message) *bot.Message {
Caption: msg.Caption,
Entities: transformEntities(msg.CaptionEntities),
}
case msg.Video != nil:
message.WithVideo = true
case msg.VideoNote != nil:
message.WithVideoNote = true
}

// fill in the message's reply-to message
Expand Down
6 changes: 6 additions & 0 deletions app/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ type options struct {
LinksLimit int `long:"links-limit" env:"LINKS_LIMIT" default:"-1" description:"max links in message, disabled by default"`
ImageOnly bool `long:"image-only" env:"IMAGE_ONLY" description:"enable image only check"`
LinksOnly bool `long:"links-only" env:"LINKS_ONLY" description:"enable links only check"`
VideosOnly bool `long:"video-only" env:"VIDEO_ONLY" description:"enable video only check"`
} `group:"meta" namespace:"meta" env-namespace:"META"`

OpenAI struct {
Expand Down Expand Up @@ -361,6 +362,7 @@ func activateServer(ctx context.Context, opts options, sf *bot.SpamFilter, loc *
MetaLinksLimit: opts.Meta.LinksLimit,
MetaLinksOnly: opts.Meta.LinksOnly,
MetaImageOnly: opts.Meta.ImageOnly,
MetaVideoOnly: opts.Meta.VideosOnly,
MultiLangLimit: opts.MultiLangWords,
OpenAIEnabled: opts.OpenAI.Token != "" || opts.OpenAI.APIBase != "",
SamplesDataPath: opts.Files.SamplesDataPath,
Expand Down Expand Up @@ -450,6 +452,10 @@ func makeDetector(opts options) *tgspam.Detector {
log.Printf("[INFO] image only check enabled")
metaChecks = append(metaChecks, tgspam.ImagesCheck())
}
if opts.Meta.VideosOnly {
log.Printf("[INFO] videos only check enabled")
metaChecks = append(metaChecks, tgspam.VideosCheck())
}
if opts.Meta.LinksLimit >= 0 {
log.Printf("[INFO] links check enabled, limit: %d", opts.Meta.LinksLimit)
metaChecks = append(metaChecks, tgspam.LinksCheck(opts.Meta.LinksLimit))
Expand Down
1 change: 1 addition & 0 deletions app/webapi/webapi.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ type Settings struct {
MetaLinksLimit int `json:"meta_links_limit"`
MetaLinksOnly bool `json:"meta_links_only"`
MetaImageOnly bool `json:"meta_image_only"`
MetaVideoOnly bool `json:"meta_video_only"`
MultiLangLimit int `json:"multi_lang_limit"`
OpenAIEnabled bool `json:"openai_enabled"`
SamplesDataPath string `json:"samples_data_path"`
Expand Down
9 changes: 5 additions & 4 deletions lib/spamcheck/spamcheck.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,14 @@ type Request struct {

// MetaData is a meta-info about the message, provided by the client.
type MetaData struct {
Images int `json:"images"` // number of images in the message
Links int `json:"links"` // number of links in the message
Images int `json:"images"` // number of images in the message
Links int `json:"links"` // number of links in the message
HasVideo bool `json:"has_video"` // true if the message has a video or video note
}

func (r *Request) String() string {
return fmt.Sprintf("msg:%q, user:%q, id:%s, images:%d, links:%d",
r.Msg, r.UserName, r.UserID, r.Meta.Images, r.Meta.Links)
return fmt.Sprintf("msg:%q, user:%q, id:%s, images:%d, links:%d, has_video:%v",
r.Msg, r.UserName, r.UserID, r.Meta.Images, r.Meta.Links, r.Meta.HasVideo)
}

// Response is a result of spam check.
Expand Down
12 changes: 6 additions & 6 deletions lib/spamcheck/spamcheck_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,18 +48,18 @@ func TestRequestString(t *testing.T) {
}{
{
name: "Normal message",
request: Request{"Hello, world!", "123", "Alice", MetaData{2, 1}},
expected: `msg:"Hello, world!", user:"Alice", id:123, images:2, links:1`,
request: Request{"Hello, world!", "123", "Alice", MetaData{2, 1, false}},
expected: `msg:"Hello, world!", user:"Alice", id:123, images:2, links:1, has_video:false`,
},
{
name: "Spam message",
request: Request{"Spam message", "456", "Bob", MetaData{0, 3}},
expected: `msg:"Spam message", user:"Bob", id:456, images:0, links:3`,
request: Request{"Spam message", "456", "Bob", MetaData{0, 3, true}},
expected: `msg:"Spam message", user:"Bob", id:456, images:0, links:3, has_video:true`,
},
{
name: "Empty fields",
request: Request{"", "", "", MetaData{0, 0}},
expected: `msg:"", user:"", id:, images:0, links:0`,
request: Request{"", "", "", MetaData{0, 0, false}},
expected: `msg:"", user:"", id:, images:0, links:0, has_video:false`,
},
}

Expand Down
15 changes: 15 additions & 0 deletions lib/tgspam/metachecks.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,18 @@ func ImagesCheck() MetaCheck {
return spamcheck.Response{Spam: false, Name: "images", Details: "no images without text"}
}
}

// VideosCheck is a function that returns a MetaCheck function.
// It checks if the message has a video or video note and the message is empty (i.e. it contains only videos).
func VideosCheck() MetaCheck {
return func(req spamcheck.Request) spamcheck.Response {
if req.Meta.HasVideo && req.Msg == "" {
return spamcheck.Response{
Name: "videos",
Spam: true,
Details: "videos without text",
}
}
return spamcheck.Response{Spam: false, Name: "videos", Details: "no videos without text"}
}
}
64 changes: 64 additions & 0 deletions lib/tgspam/metachecks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,3 +185,67 @@ func TestImagesCheck(t *testing.T) {
})
}
}

func TestVideosCheck(t *testing.T) {
tests := []struct {
name string
req spamcheck.Request
expected spamcheck.Response
}{
{
name: "No videos and text",
req: spamcheck.Request{
Msg: "This is a message with text.",
Meta: spamcheck.MetaData{
HasVideo: false,
},
},
expected: spamcheck.Response{Name: "videos", Spam: false, Details: "no videos without text"},
},
{
name: "Videos with text",
req: spamcheck.Request{
Msg: "This is a message with text and a video.",
Meta: spamcheck.MetaData{
HasVideo: true,
},
},
expected: spamcheck.Response{Name: "videos", Spam: false, Details: "no videos without text"},
},
{
name: "Videos without text",
req: spamcheck.Request{
Msg: "",
Meta: spamcheck.MetaData{
HasVideo: true,
},
},
expected: spamcheck.Response{
Name: "videos",
Spam: true,
Details: "videos without text",
},
},
{
name: "Video note without text",
req: spamcheck.Request{
Msg: "",
Meta: spamcheck.MetaData{
HasVideo: true,
},
},
expected: spamcheck.Response{
Name: "videos",
Spam: true,
Details: "videos without text",
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
check := VideosCheck()
assert.Equal(t, tt.expected, check(tt.req))
})
}
}
Loading