Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Classifier unlearn #220

Merged
merged 4 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 110 additions & 0 deletions app/bot/mocks/detector.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 12 additions & 17 deletions app/bot/spam.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ type Detector interface {
LoadStopWords(readers ...io.Reader) (tgspam.LoadResult, error)
UpdateSpam(msg string) error
UpdateHam(msg string) error
RemoveHam(msg string) error
RemoveSpam(msg string) error
AddApprovedUser(user approved.UserInfo) error
RemoveApprovedUser(id string) error
ApprovedUsers() (res []approved.UserInfo)
Expand All @@ -55,7 +57,6 @@ type Detector interface {
type SamplesStore interface {
Read(ctx context.Context, t storage.SampleType, o storage.SampleOrigin) ([]string, error)
Reader(ctx context.Context, t storage.SampleType, o storage.SampleOrigin) (io.ReadCloser, error)
DeleteMessage(ctx context.Context, message string) error
Stats(ctx context.Context) (*storage.SamplesStats, error)
}

Expand Down Expand Up @@ -112,7 +113,7 @@ func (s *SpamFilter) OnMessage(msg Message, checkOnly bool) (response Response)
// UpdateSpam appends a message to the spam samples file and updates the classifier
func (s *SpamFilter) UpdateSpam(msg string) error {
cleanMsg := strings.ReplaceAll(msg, "\n", " ")
log.Printf("[DEBUG] update spam samples with %q", cleanMsg)
log.Printf("[INFO] update spam samples with %q", cleanMsg)
if err := s.Detector.UpdateSpam(cleanMsg); err != nil {
return fmt.Errorf("can't update spam samples: %w", err)
}
Expand All @@ -122,7 +123,7 @@ func (s *SpamFilter) UpdateSpam(msg string) error {
// UpdateHam appends a message to the ham samples file and updates the classifier
func (s *SpamFilter) UpdateHam(msg string) error {
cleanMsg := strings.ReplaceAll(msg, "\n", " ")
log.Printf("[DEBUG] update ham samples with %q", cleanMsg)
log.Printf("[INFO] update ham samples with %q", cleanMsg)
if err := s.Detector.UpdateHam(cleanMsg); err != nil {
return fmt.Errorf("can't update ham samples: %w", err)
}
Expand Down Expand Up @@ -235,26 +236,20 @@ func (s *SpamFilter) DynamicSamples() (spam, ham []string, err error) {

// RemoveDynamicSpamSample removes a sample from the spam dynamic samples file and reloads samples after this
func (s *SpamFilter) RemoveDynamicSpamSample(sample string) error {
log.Printf("[DEBUG] remove dynamic spam sample: %q", sample)

if err := s.params.SamplesStore.DeleteMessage(context.TODO(), sample); err != nil {
return fmt.Errorf("failed to delete message: %w", err)
}
if err := s.ReloadSamples(); err != nil {
return fmt.Errorf("failed to reload samples: %w", err)
cleanMsg := strings.ReplaceAll(sample, "\n", " ")
log.Printf("[INFO] remove dynamic spam sample: %q", sample)
if err := s.Detector.RemoveSpam(cleanMsg); err != nil {
return fmt.Errorf("can't remove spam sample %q: %w", sample, err)
}
return nil
}

// RemoveDynamicHamSample removes a sample from the ham dynamic samples file and reloads samples after this
func (s *SpamFilter) RemoveDynamicHamSample(sample string) error {
log.Printf("[DEBUG] remove dynamic ham sample: %q", sample)

if err := s.params.SamplesStore.DeleteMessage(context.TODO(), sample); err != nil {
return fmt.Errorf("failed to delete message: %w", err)
}
if err := s.ReloadSamples(); err != nil {
return fmt.Errorf("failed to reload samples: %w", err)
cleanMsg := strings.ReplaceAll(sample, "\n", " ")
log.Printf("[INFO] remove dynamic ham sample: %q", sample)
if err := s.Detector.RemoveHam(cleanMsg); err != nil {
return fmt.Errorf("can't remove hma sample %q: %w", sample, err)
}
return nil
}
74 changes: 26 additions & 48 deletions app/bot/spam_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -533,12 +533,6 @@ func TestSpamFilter_RemoveDynamicSample(t *testing.T) {
deleteErr: errors.New("delete error"),
expectError: true,
},
{
name: "reload error",
sample: "spam message",
loadErr: errors.New("reload error"),
expectError: true,
},
}

for _, tc := range tests {
Expand All @@ -550,19 +544,21 @@ func TestSpamFilter_RemoveDynamicSample(t *testing.T) {
LoadStopWordsFunc: func(readers ...io.Reader) (tgspam.LoadResult, error) {
return tgspam.LoadResult{}, nil
},
RemoveSpamFunc: func(msg string) error {
assert.Equal(t, tc.sample, msg)
return tc.deleteErr
},
RemoveHamFunc: func(msg string) error {
assert.Equal(t, tc.sample, msg)
return tc.deleteErr
},
}

samplesStore := &mocks.SamplesStoreMock{
DeleteMessageFunc: func(ctx context.Context, message string) error {
assert.Equal(t, tc.sample, message)
return tc.deleteErr
},
StatsFunc: func(ctx context.Context) (*storage.SamplesStats, error) {
return &storage.SamplesStats{PresetSpam: 1, PresetHam: 1}, nil
},
ReaderFunc: func(ctx context.Context, t storage.SampleType, o storage.SampleOrigin) (io.ReadCloser, error) {
return io.NopCloser(strings.NewReader("")), nil
},
}

dictStore := &mocks.DictStoreMock{
Expand All @@ -583,8 +579,8 @@ func TestSpamFilter_RemoveDynamicSample(t *testing.T) {
return
}
assert.NoError(t, err)
assert.Equal(t, 1, len(samplesStore.DeleteMessageCalls()))
assert.Equal(t, tc.sample, samplesStore.DeleteMessageCalls()[0].Message)
assert.Len(t, det.RemoveSpamCalls(), 1)
assert.Equal(t, tc.sample, det.RemoveSpamCalls()[0].Msg)
})
}
}
Expand Down Expand Up @@ -682,7 +678,7 @@ func TestSpamFilter_DynamicSamples(t *testing.T) {

// verify all Read calls were made
calls := samplesStore.ReadCalls()
require.Equal(t, 2, len(calls))
require.Len(t, calls, 2)
assert.Equal(t, storage.SampleTypeSpam, calls[0].T)
assert.Equal(t, storage.SampleTypeHam, calls[1].T)
assert.Equal(t, storage.SampleOriginUser, calls[0].O)
Expand All @@ -697,7 +693,6 @@ func TestSpamFilter_RemoveDynamicSamples(t *testing.T) {
sample string
sampleType string // "spam" or "ham"
deleteErr error
reloadErr error
expectError bool
}{
{
Expand All @@ -712,13 +707,6 @@ func TestSpamFilter_RemoveDynamicSamples(t *testing.T) {
deleteErr: errors.New("delete error"),
expectError: true,
},
{
name: "remove spam reload error",
sample: "spam sample",
sampleType: "spam",
reloadErr: errors.New("reload error"),
expectError: true,
},
{
name: "remove ham success",
sample: "ham sample",
Expand All @@ -736,11 +724,13 @@ func TestSpamFilter_RemoveDynamicSamples(t *testing.T) {
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
det := &mocks.DetectorMock{
LoadSamplesFunc: func(exclReader io.Reader, spamReaders []io.Reader, hamReaders []io.Reader) (tgspam.LoadResult, error) {
return tgspam.LoadResult{}, tc.reloadErr
RemoveHamFunc: func(msg string) error {
assert.Equal(t, tc.sample, msg)
return tc.deleteErr
},
LoadStopWordsFunc: func(readers ...io.Reader) (tgspam.LoadResult, error) {
return tgspam.LoadResult{}, nil
RemoveSpamFunc: func(msg string) error {
assert.Equal(t, tc.sample, msg)
return tc.deleteErr
},
}

Expand All @@ -749,19 +739,9 @@ func TestSpamFilter_RemoveDynamicSamples(t *testing.T) {
assert.Equal(t, tc.sample, message)
return tc.deleteErr
},
StatsFunc: func(ctx context.Context) (*storage.SamplesStats, error) {
return &storage.SamplesStats{PresetSpam: 1, PresetHam: 1}, nil
},
ReaderFunc: func(ctx context.Context, t storage.SampleType, o storage.SampleOrigin) (io.ReadCloser, error) {
return io.NopCloser(strings.NewReader("")), nil
},
}

dictStore := &mocks.DictStoreMock{
ReaderFunc: func(ctx context.Context, t storage.DictionaryType) (io.ReadCloser, error) {
return io.NopCloser(strings.NewReader("")), nil
},
}
dictStore := &mocks.DictStoreMock{}

s := NewSpamFilter(det, SpamConfig{
SamplesStore: samplesStore,
Expand All @@ -782,16 +762,14 @@ func TestSpamFilter_RemoveDynamicSamples(t *testing.T) {
return
}
assert.NoError(t, err)

// verify DeleteMessage was called
calls := samplesStore.DeleteMessageCalls()
require.Equal(t, 1, len(calls))
assert.Equal(t, tc.sample, calls[0].Message)

// verify stats and reload happened
assert.Equal(t, 1, len(samplesStore.StatsCalls()))
assert.Equal(t, 1, len(det.LoadSamplesCalls()))
assert.Equal(t, 1, len(det.LoadStopWordsCalls()))
if tc.sampleType == "spam" {
assert.Len(t, det.RemoveSpamCalls(), 1)
assert.Equal(t, tc.sample, det.RemoveSpamCalls()[0].Msg)
}
if tc.sampleType == "ham" {
assert.Len(t, det.RemoveHamCalls(), 1)
assert.Equal(t, tc.sample, det.RemoveHamCalls()[0].Msg)
}
})
}
}
Loading
Loading