SuffolkLITLab · nonprofittechy · Jan 3, 2025 · Jan 3, 2025 · Jan 3, 2025 · Jan 3, 2025
diff --git a/docassemble/GithubFeedbackForm/data/questions/feedback.yml b/docassemble/GithubFeedbackForm/data/questions/feedback.yml
@@ -311,26 +311,33 @@ need:
   - package_version
   - filename
 code: |
-  if not task_performed('issue noted', persistent=True):
-    saved_uuid
-    if showifdef('would_be_on_panel', False):
-      add_panel_participant(panel_email)
-    if should_send_to_github:
-      issue_url
-      if issue_url:
-        if saved_uuid:
-          set_feedback_github_url(saved_uuid, issue_url)
-      else:
-        al_error_email
-        log(f"This form was not able to add an issue on the {github_user}/{github_repo} repo. Check your config.")
-        if al_error_email and not is_likely_spam(issue_template.content):
-          log(f"Unable to create issue on repo {github_repo}, falling back to emailing {al_error_email}")
-          send_email(to=al_error_email, subject=f"{github_repo} - {issue_template.subject_as_html(trim=True)}", template=issue_template)
-        else:
-          log(f"~~~USER FEEDBACK~~~ {github_repo} -{issue_template.subject_as_html(trim=True)} - {issue_template.content_as_html(trim=True)}")
+  if is_likely_spam(issue_template.content):
+    log("Not saving feedback because it looks like spam")
     mark_task_as_performed('issue noted', persistent=True)
+    issue_url = None
+    saved_uuid = None
+    note_issue = False
   else:
-    log("Already sent feedback to github from a feedback interview, not going to send again")
+    if not task_performed('issue noted', persistent=True):
+      saved_uuid
+      if showifdef('would_be_on_panel', False):
+        add_panel_participant(panel_email)
+      if should_send_to_github:
+        issue_url
+        if issue_url:
+          if saved_uuid:
+            set_feedback_github_url(saved_uuid, issue_url)
+        else:
+          al_error_email
+          log(f"This form was not able to add an issue on the {github_user}/{github_repo} repo. Check your config.")
+          if al_error_email and not is_likely_spam(issue_template.content):
+            log(f"Unable to create issue on repo {github_repo}, falling back to emailing {al_error_email}")
+            send_email(to=al_error_email, subject=f"{github_repo} - {issue_template.subject_as_html(trim=True)}", template=issue_template)
+          else:
+            log(f"~~~USER FEEDBACK~~~ {github_repo} -{issue_template.subject_as_html(trim=True)} - {issue_template.content_as_html(trim=True)}")
+      mark_task_as_performed('issue noted', persistent=True)
+    else:
+      log("Already sent feedback to github from a feedback interview, not going to send again")
   note_issue = True
 ---
 code: |

diff --git a/docassemble/GithubFeedbackForm/github_issue.py b/docassemble/GithubFeedbackForm/github_issue.py
@@ -6,6 +6,11 @@
 from docassemble.base.util import log, get_config, interview_url
 import re
 
+try:
+    import google.generativeai as genai
+except:
+    pass
+
 # reference: https://gist.github.com/JeffPaine/3145490
 # https://docs.github.com/en/free-pro-team@latest/rest/reference/issues#create-an-issue
 
@@ -16,6 +21,7 @@
     "make_github_issue",
     "feedback_link",
     "is_likely_spam",
+    "is_likely_spam_from_genai",
     "prefill_github_issue_url",
 ]
 USERNAME = get_config("github issues", {}).get("username")
@@ -168,8 +174,72 @@ def feedback_link(
     )
 
 
+def is_likely_spam_from_genai(
+    body: Optional[str],
+    context: Optional[str] = None,
+    gemini_api_key: Optional[str] = None,
+    model="gemini-2.0-flash-exp",
+) -> bool:
+    """
+    Check if the body of the issue is likely spam with the help of Google Gemini Flash experimental.
+
+    Args:
+        body (Optional[str]): the body of the issue
+        context (Optional[str]): the context of the issue to help rate it as spam or not, defaults to a guided interview in the legal context
+        gemini_api_key (Optional[str]): the token for the Google Gemini Flash API, can be specified in the global config as `google gemini api key`
+        model (Optional[str]): the model to use for the spam detection, defaults to "gemini-2.0-flash-exp", can be specified in the global config
+            as `github issues: spam model`
+    """
+    if not body:
+        return False
+
+    if not context:
+        context = "a guided interview in the legal context"
+
+    if not gemini_api_key:
+        gemini_api_key = get_config("google gemini api key")
+
+    if not gemini_api_key:
+        log("Not using Google Gemini Flash to check for spam: no token provided")
+        return False
+
+    if not model:
+        model = get_config("github issues", {}).get(
+            "spam model", "gemini-2.0-flash-exp"
+        )
+
+    try:
+        genai.configure(api_key=gemini_api_key)
+        model = genai.GenerativeModel(
+            model_name=model,
+            system_instruction=f"""
+                You are reviewing a feedback form for {context}. Your job is to allow as many
+                relevant feedback responses as possible while filtering out irrelevant and spam feedback,
+                especially targeted advertising that isn't pointing out a problem on the guided interview.
+
+                Rate the user's feedback as 'spam' or 'not spam' based on the context of the guided interview.
+                Answer only with the exact keywords: 'spam' or 'not spam'.
+                """,
+        )
+    except Exception as e:
+        log(f"Error configuring Google Gemini Flash: {e}")
+        return False
+
+    try:
+        response = model.generate_content(body)
+        if response.text.strip() == "spam":
+            return True
+    except Exception as e:
+        log(f"Error using Google Gemini Flash: {e}")
+        return False
+    return False
+
+
 def is_likely_spam(
-    body: Optional[str], keywords: Optional[List[str]] = None, filter_urls: bool = True
+    body: Optional[str],
+    keywords: Optional[List[str]] = None,
+    filter_urls: bool = True,
+    model: Optional[str] = None,
 ) -> bool:
     """
     Check if the body of the issue is likely spam based on a set of keywords and URLs.
@@ -179,9 +249,10 @@ def is_likely_spam(
 
     Args:
         body (Optional[str]): the body of the issue
-        keywords (Optional[List[str]]): a list of keywords that are likely spam, defaults to a set of keywords
+        keywords (Optional[List[str]]): a list of additional keywords that are likely spam, defaults to a set of keywords
             from the global configuration under the `github issues: spam keywords` key
     """
+
     _urls = ["leadgeneration.com", "leadmagnet.com"]
     _keywords = [
         "100 times more effective",
@@ -244,7 +315,7 @@ def is_likely_spam(
         if re.search(url_regex, body):
             return True
 
-    return False
+    return is_likely_spam_from_genai(body, model=model)
 
 
 def prefill_github_issue_url(

diff --git a/pyproject.toml b/pyproject.toml
@@ -10,3 +10,7 @@ exclude = '''(?x)(
 [[tool.mypy.overrides]]
 module = "docassemble.base.*"
 ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = "google.*"
+ignore_missing_imports = true
diff --git a/setup.py b/setup.py
@@ -53,7 +53,7 @@ def find_package_data(where='.', package='', exclude=standard_exclude, exclude_d
       url='https://courtformsonline.org',
       packages=find_packages(),
       namespace_packages=['docassemble'],
-      install_requires=['docassemble.ALToolbox>=0.6.0'],
+      install_requires=['docassemble.ALToolbox>=0.6.0', 'google-generativeai'],
       zip_safe=False,
       package_data=find_package_data(where='docassemble/GithubFeedbackForm/', package='docassemble.GithubFeedbackForm'),
      )