Skip to content

Commit

Permalink
moderation: added query match rule
Browse files Browse the repository at this point in the history
  • Loading branch information
0einstein0 committed Nov 7, 2024
1 parent a6e1e5b commit 857502d
Show file tree
Hide file tree
Showing 4 changed files with 131 additions and 1 deletion.
37 changes: 37 additions & 0 deletions site/tests/moderation/test_moderation_queries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2024 CERN.
#
# ZenodoRDM is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.

"""Test ModerationQuery model class."""


from invenio_db import db
from invenio_search import current_search_client

from zenodo_rdm.moderation.models import ModerationQuery


def test_moderation_query_creation(app):
"""test to create and test a ModerationQuery."""
with app.app_context():
query_string = "metadata.title:SimpleTest"
notes = "test query"
score = 5
active = True

query = ModerationQuery.create(
query_string, notes=notes, score=score, active=active
)
db.session.commit()

assert all(
[
query.query_string == query_string,
query.notes == notes,
query.score == score,
query.active == active,
]
)
10 changes: 9 additions & 1 deletion site/zenodo_rdm/moderation/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,13 @@

"""Moderation config."""

from .rules import files_rule, links_rule, text_sanitization_rule, verified_user_rule
from .rules import (
files_rule,
links_rule,
match_query_rule,
text_sanitization_rule,
verified_user_rule,
)

MODERATION_BANNED_LINK_DOMAINS = []
"""Banned domains for links."""
Expand Down Expand Up @@ -46,12 +52,14 @@
links_rule,
files_rule,
text_sanitization_rule,
match_query_rule,
]
"""Scoring rules for record moderation."""

MODERATION_COMMUNITY_SCORE_RULES = [
links_rule,
text_sanitization_rule,
verified_user_rule,
match_query_rule,
]
"""Scoring rules for communtiy moderation."""
68 changes: 68 additions & 0 deletions site/zenodo_rdm/moderation/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2024 CERN.
#
# ZenodoRDM is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.

"""Moderation models."""

from flask import current_app
from invenio_db import db
from invenio_search import current_search_client


class ModerationQuery(db.Model):
"""Moderation queries model."""

__tablename__ = "moderation_queries"

id = db.Column(db.Integer, primary_key=True, autoincrement=True)
"""Primary key identifier for the moderation query."""

score = db.Column(db.Integer, default=0)
"""Score associated with the query."""

query_string = db.Column(db.Text, nullable=False)
"""Query string containing the filter criteria."""

notes = db.Column(db.Text, nullable=True)
"""Additional notes or comments regarding the moderation query."""

active = db.Column(db.Boolean, default=True)
"""Indicates whether the moderation query is currently active."""

@classmethod
def create(cls, query_string, notes=None, score=0, active=True):
"""Create a new moderation query."""
query = cls(query_string=query_string, notes=notes, score=score, active=active)
db.session.add(query)

try:
current_search_client.index(
index="moderation-queries",
body={
"query": {"query_string": {"query": query_string}},
"active": active,
"score": score,
"notes": notes,
},
)
except Exception as e:
print(query_string)
print(f"Failed to index query: {e}")

return query

@classmethod
def get(cls, query_id=None):
"""Retrieve a moderation query by ID or return all queries if no ID is provided."""
if query_id is not None:
return cls.query.filter_by(id=query_id).one_or_none()
return cls.query.all()

def __repr__(self):
"""Get a string representation of the moderation query."""
return (
f"<ModerationQuery id={self.id}, score={self.score}, active={self.active}>"
)
17 changes: 17 additions & 0 deletions site/zenodo_rdm/moderation/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import re

from flask import current_app
from invenio_search import current_search_client

from zenodo_rdm.moderation.proxies import current_domain_tree

Expand Down Expand Up @@ -123,3 +124,19 @@ def files_rule(identity, draft=None, record=None):
score += current_scores.ham_files

return score


def match_query_rule(identity, draft=None, record=None):
"""Calculate a score based on matched percolate queries against the record."""
matched_queries = current_search_client.search(
index="moderation-queries",
body={"query": {"percolate": {"field": "query", "document": record.dumps()}}},
)

score = 0

for hit in matched_queries["hits"]["hits"]:
query_score = hit["_source"].get("score", 0)
score += query_score

return score

0 comments on commit 857502d

Please sign in to comment.