Skip to content

Commit

Permalink
moderation: added query match rule
Browse files Browse the repository at this point in the history
  • Loading branch information
0einstein0 committed Nov 7, 2024
1 parent 9adce72 commit 1e7a087
Show file tree
Hide file tree
Showing 4 changed files with 166 additions and 1 deletion.
37 changes: 37 additions & 0 deletions site/tests/moderation/test_moderation_queries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2024 CERN.
#
# ZenodoRDM is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.

"""Test ModerationQuery model class."""


from invenio_db import db
from invenio_search import current_search_client

from zenodo_rdm.moderation.models import ModerationQuery


def test_moderation_query_creation(app):
"""test to create and test a ModerationQuery."""
with app.app_context():
query_string = "metadata.title:SimpleTest"
notes = "test query"
score = 5
active = True

query = ModerationQuery.create(
query_string, notes=notes, score=score, active=active
)
db.session.commit()

assert all(
[
query.query_string == query_string,
query.notes == notes,
query.score == score,
query.active == active,
]
)
21 changes: 20 additions & 1 deletion site/zenodo_rdm/moderation/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,14 @@

"""Moderation config."""

from .rules import files_rule, links_rule, text_sanitization_rule, verified_user_rule
from .rules import (
community_match_query_rule,
files_rule,
links_rule,
record_match_query_rule,
text_sanitization_rule,
verified_user_rule,
)

MODERATION_BANNED_LINK_DOMAINS = []
"""Banned domains for links."""
Expand Down Expand Up @@ -41,17 +48,29 @@
MODERATION_SPAM_FILE_EXTS = {"jpg", "jpeg", "pdf", "png", "jfif", "docx", "webp"}
"""Frequest spam file extensions."""

MODERATION_RECORD_PERCOLATOR_INDEX = (
"moderation-queries-rdmrecords-records-record-v7.0.0"
)
"""Percolator index for moderation queries for records."""

MODERATION_COMMUNITY_PERCOLATOR_INDEX = (
"moderation-queries-communities-communities-v2.0.0"
)
"""Percolator index for moderation queries for communities."""

MODERATION_RECORD_SCORE_RULES = [
verified_user_rule,
links_rule,
files_rule,
text_sanitization_rule,
record_match_query_rule,
]
"""Scoring rules for record moderation."""

MODERATION_COMMUNITY_SCORE_RULES = [
links_rule,
text_sanitization_rule,
verified_user_rule,
community_match_query_rule,
]
"""Scoring rules for communtiy moderation."""
67 changes: 67 additions & 0 deletions site/zenodo_rdm/moderation/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2024 CERN.
#
# ZenodoRDM is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.

"""Moderation models."""

from flask import current_app
from invenio_db import db
from invenio_search import current_search_client


class ModerationQuery(db.Model):
"""Moderation queries model."""

__tablename__ = "moderation_queries"

id = db.Column(db.Integer, primary_key=True, autoincrement=True)
"""Primary key identifier for the moderation query."""

score = db.Column(db.Integer, default=0)
"""Score associated with the query."""

query_string = db.Column(db.Text, nullable=False)
"""Query string containing the filter criteria."""

notes = db.Column(db.Text, nullable=True)
"""Additional notes or comments regarding the moderation query."""

active = db.Column(db.Boolean, default=True)
"""Indicates whether the moderation query is currently active."""

@classmethod
def create(cls, query_string, notes=None, score=0, active=True):
"""Create a new moderation query."""
query = cls(query_string=query_string, notes=notes, score=score, active=active)
db.session.add(query)

try:
current_search_client.index(
index="moderation-queries",
body={
"query": {"query_string": {"query": query_string}},
"active": active,
"score": score,
"notes": notes,
},
)
except Exception as e:
current_app.logger.exception(e)

return query

@classmethod
def get(cls, query_id=None):
"""Retrieve a moderation query by ID or return all queries if no ID is provided."""
if query_id is not None:
return cls.query.filter_by(id=query_id).one_or_none()
return cls.query.all()

def __repr__(self):
"""Get a string representation of the moderation query."""
return (
f"<ModerationQuery id={self.id}, score={self.score}, active={self.active}>"
)
42 changes: 42 additions & 0 deletions site/zenodo_rdm/moderation/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import re

from flask import current_app
from invenio_search import current_search_client

from zenodo_rdm.moderation.proxies import current_domain_tree

Expand Down Expand Up @@ -123,3 +124,44 @@ def files_rule(identity, draft=None, record=None):
score += current_scores.ham_files

return score


def match_query_rule(identity, draft=None, record=None, index=None):
"""Calculate a score based on matched percolate queries against the given document in the specified index."""
if not index:
raise ValueError("Index must be specified for matching query rule.")

document = record.dumps() if record else draft.dumps()

matched_queries = current_search_client.search(
index=index,
body={"query": {"percolate": {"field": "query", "document": document}}},
)

score = 0

for hit in matched_queries["hits"]["hits"]:
query_score = hit["_source"].get("score", 0)
score += query_score

return score


def record_match_query_rule(identity, draft=None, record=None):
"""Match query rule for records."""
return match_query_rule(
identity,
draft=draft,
record=record,
index=current_app.config.get("MODERATION_RECORD_PERCOLATOR_INDEX"),
)


def community_match_query_rule(identity, draft=None, record=None):
"""Match query rule for communities."""
return match_query_rule(
identity,
draft=draft,
record=record,
index=current_app.config.get("MODERATION_COMMUNITY_PERCOLATOR_INDEX"),
)

0 comments on commit 1e7a087

Please sign in to comment.