diff --git a/site/tests/moderation/test_moderation_queries.py b/site/tests/moderation/test_moderation_queries.py new file mode 100644 index 00000000..88aa8c3b --- /dev/null +++ b/site/tests/moderation/test_moderation_queries.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2024 CERN. +# +# ZenodoRDM is free software; you can redistribute it and/or modify +# it under the terms of the MIT License; see LICENSE file for more details. + +"""Test ModerationQuery model class.""" + + +from invenio_db import db +from invenio_search import current_search_client + +from zenodo_rdm.moderation.models import ModerationQuery + + +def test_moderation_query_creation(app): + """test to create and test a ModerationQuery.""" + with app.app_context(): + query_string = "metadata.title:SimpleTest" + notes = "test query" + score = 5 + active = True + + query = ModerationQuery.create( + query_string, notes=notes, score=score, active=active + ) + db.session.commit() + + assert all( + [ + query.query_string == query_string, + query.notes == notes, + query.score == score, + query.active == active, + ] + ) diff --git a/site/zenodo_rdm/moderation/config.py b/site/zenodo_rdm/moderation/config.py index 5889e3ee..ba71e0d7 100644 --- a/site/zenodo_rdm/moderation/config.py +++ b/site/zenodo_rdm/moderation/config.py @@ -7,7 +7,13 @@ """Moderation config.""" -from .rules import files_rule, links_rule, text_sanitization_rule, verified_user_rule +from .rules import ( + files_rule, + links_rule, + match_query_rule, + text_sanitization_rule, + verified_user_rule, +) MODERATION_BANNED_LINK_DOMAINS = [] """Banned domains for links.""" @@ -46,6 +52,7 @@ links_rule, files_rule, text_sanitization_rule, + match_query_rule, ] """Scoring rules for record moderation.""" @@ -53,5 +60,6 @@ links_rule, text_sanitization_rule, verified_user_rule, + match_query_rule, ] """Scoring rules for communtiy moderation.""" diff --git a/site/zenodo_rdm/moderation/models.py b/site/zenodo_rdm/moderation/models.py new file mode 100644 index 00000000..99ededc0 --- /dev/null +++ b/site/zenodo_rdm/moderation/models.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2024 CERN. +# +# ZenodoRDM is free software; you can redistribute it and/or modify +# it under the terms of the MIT License; see LICENSE file for more details. + +"""Moderation models.""" + +from flask import current_app +from invenio_db import db +from invenio_search import current_search_client + + +class ModerationQuery(db.Model): + """Moderation queries model.""" + + __tablename__ = "moderation_queries" + + id = db.Column(db.Integer, primary_key=True, autoincrement=True) + """Primary key identifier for the moderation query.""" + + score = db.Column(db.Integer, default=0) + """Score associated with the query.""" + + query_string = db.Column(db.Text, nullable=False) + """Query string containing the filter criteria.""" + + notes = db.Column(db.Text, nullable=True) + """Additional notes or comments regarding the moderation query.""" + + active = db.Column(db.Boolean, default=True) + """Indicates whether the moderation query is currently active.""" + + @classmethod + def create(cls, query_string, notes=None, score=0, active=True): + """Create a new moderation query.""" + query = cls(query_string=query_string, notes=notes, score=score, active=active) + db.session.add(query) + + try: + current_search_client.index( + index="moderation-queries", + body={ + "query": {"query_string": {"query": query_string}}, + "active": active, + "score": score, + "notes": notes, + }, + ) + except Exception as e: + print(query_string) + print(f"Failed to index query: {e}") + + return query + + @classmethod + def get(cls, query_id=None): + """Retrieve a moderation query by ID or return all queries if no ID is provided.""" + if query_id is not None: + return cls.query.filter_by(id=query_id).one_or_none() + return cls.query.all() + + def __repr__(self): + """Get a string representation of the moderation query.""" + return ( + f"" + ) diff --git a/site/zenodo_rdm/moderation/rules.py b/site/zenodo_rdm/moderation/rules.py index a0091e2c..2ee5a8fa 100644 --- a/site/zenodo_rdm/moderation/rules.py +++ b/site/zenodo_rdm/moderation/rules.py @@ -10,6 +10,7 @@ import re from flask import current_app +from invenio_search import current_search_client from zenodo_rdm.moderation.proxies import current_domain_tree @@ -123,3 +124,19 @@ def files_rule(identity, draft=None, record=None): score += current_scores.ham_files return score + + +def match_query_rule(identity, draft=None, record=None): + """Calculate a score based on matched percolate queries against the record.""" + matched_queries = current_search_client.search( + index="moderation-queries", + body={"query": {"percolate": {"field": "query", "document": record.dumps()}}}, + ) + + score = 0 + + for hit in matched_queries["hits"]["hits"]: + query_score = hit["_source"].get("score", 0) + score += query_score + + return score