Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Run tests against SQLite and Postgres on CI #2454

Closed
wants to merge 15 commits into from
98 changes: 78 additions & 20 deletions backend/code_review_backend/issues/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from datetime import date, datetime, timedelta

from django.conf import settings
from django.db.models import Count, Prefetch, Q
from django.db.models import BooleanField, Count, ExpressionWrapper, Prefetch, Q
from django.db.models.functions import TruncDate
from django.shortcuts import get_object_or_404
from django.urls import path
Expand All @@ -20,6 +20,7 @@
LEVEL_ERROR,
Diff,
Issue,
IssueLink,
Repository,
Revision,
)
Expand Down Expand Up @@ -121,19 +122,31 @@ def get_queryset(self):
# Because of the perf. hit filter issues that are not older than today - 3 months.
.filter(created__gte=date.today() - timedelta(days=90))
.prefetch_related(
"issues",
Prefetch(
"issue_links",
queryset=IssueLink.objects.select_related("issue"),
),
"revision",
"revision__base_repository",
"revision__head_repository",
"repository",
)
.annotate(nb_issues=Count("issues"))
.annotate(nb_errors=Count("issues", filter=Q(issues__level="error")))
.annotate(nb_warnings=Count("issues", filter=Q(issues__level="warning")))
.annotate(nb_issues=Count("issue_links"))
.annotate(
nb_errors=Count(
"issue_links", filter=Q(issue_links__issue__level=LEVEL_ERROR)
)
)
.annotate(
nb_warnings=Count(
"issue_links", filter=Q(issue_links__issue__level="warning")
)
)
.annotate(
nb_issues_publishable=Count(
"issues",
filter=Q(issues__in_patch=True) | Q(issues__level=LEVEL_ERROR),
"issue_links",
filter=Q(issue_links__in_patch=True)
| Q(issue_links__issue__level=LEVEL_ERROR),
)
)
.order_by("-id")
Expand Down Expand Up @@ -190,9 +203,25 @@ def get_queryset(self):
if not self.kwargs.get("diff_id"):
return Issue.objects.none()
diff = get_object_or_404(Diff, id=self.kwargs["diff_id"])
# No multiple revision should be linked to a single diff
# but we use the distinct clause to match the DB state.
return Issue.objects.filter(diffs=diff).distinct()
return (
Issue.objects.filter(issue_links__diff=diff)
.annotate(publishable=Q(issue_links__in_patch=True) & Q(level=LEVEL_ERROR))
.values(
"id",
"hash",
"analyzer",
"analyzer_check",
"path",
"line",
"nb_lines",
"char",
"level",
"message",
"publishable",
"issue_links__in_patch",
"issue_links__new_for_revision",
)
)


class IssueBulkCreate(generics.CreateAPIView):
Expand All @@ -212,7 +241,7 @@ def get_serializer_context(self):
return context


class IssueCheckDetails(CachedView, generics.ListAPIView):
class IssueCheckDetails(generics.ListAPIView):
"""
List all the issues found by a specific analyzer check in a repository
"""
Expand All @@ -223,24 +252,27 @@ def get_queryset(self):
repo = self.kwargs["repository"]

queryset = (
Issue.objects.filter(revisions__head_repository__slug=repo)
Issue.objects.filter(issue_links__revision__head_repository__slug=repo)
.filter(analyzer=self.kwargs["analyzer"])
.filter(analyzer_check=self.kwargs["check"])
.annotate(publishable=Q(issue_links__in_patch=True) & Q(level=LEVEL_ERROR))
.prefetch_related(
"diffs__repository",
"issue_links__diff__repository",
Prefetch(
"diffs__revision",
"issue_links__diff__revision",
queryset=Revision.objects.select_related(
"base_repository", "head_repository"
),
),
)
# List of diffs for each link of this issue
.prefetch_related("diffs")
.order_by("-created")
)

# Display only publishable issues by default
publishable = self.request.query_params.get("publishable", "true").lower()
_filter = Q(in_patch=True) | Q(level=LEVEL_ERROR)
_filter = Q(issue_links__in_patch=True) | Q(level=LEVEL_ERROR)
if publishable == "true":
queryset = queryset.filter(_filter)
elif publishable == "false":
Expand Down Expand Up @@ -271,14 +303,27 @@ class IssueCheckStats(CachedView, generics.ListAPIView):
def get_queryset(self):
queryset = (
Issue.objects.values(
"revisions__head_repository__slug", "analyzer", "analyzer_check"
"issue_links__revision__head_repository__slug",
"analyzer",
"analyzer_check",
)
# We want to count distinct issues because they can be referenced on multiple diffs
.annotate(total=Count("id", distinct=True))
.annotate(
publishable=Count("id", filter=Q(in_patch=True) | Q(level=LEVEL_ERROR))
has_check=ExpressionWrapper(
Q(analyzer_check__isnull=True), output_field=BooleanField()
)
)
.annotate(
publishable=Count(
"id", filter=Q(issue_links__in_patch=True) | Q(level=LEVEL_ERROR)
)
)
.distinct(
"issue_links__revision__head_repository__slug",
"analyzer",
"analyzer_check",
)
.distinct("revisions__head_repository__slug", "analyzer", "analyzer_check")
)

# Filter issues by date
Expand All @@ -292,10 +337,23 @@ def get_queryset(self):
# Because of the perf. hit filter, issues that are not older than today - 3 months.
since = date.today() - timedelta(days=90)

queryset = queryset.filter(revisions__created__gte=since).distinct()
queryset = queryset.filter(issue_links__revision__created__gte=since).distinct()

return queryset.order_by(
"-total", "revisions__head_repository__slug", "analyzer", "analyzer_check"
"-total",
"issue_links__revision__head_repository__slug",
"analyzer",
# Use same order than PostgreSQL with SQLite
"has_check",
"analyzer_check",
).values(
"issue_links__revision__head_repository__slug",
"analyzer",
"analyzer_check",
"total",
"publishable",
"issue_links__in_patch",
"issue_links__new_for_revision",
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from parsepatch.patch import Patch

from code_review_backend.app.settings import BACKEND_USER_AGENT
from code_review_backend.issues.models import Diff, Issue
from code_review_backend.issues.models import Diff, IssueLink

logging.basicConfig(level=logging.INFO)

Expand Down Expand Up @@ -47,35 +47,41 @@ def load_hgmo_patch(diff):
return lines


def detect_in_patch(issue, lines):
def detect_in_patch(issue_link, lines):
"""From the code-review bot revisions.py contains() method"""
modified_lines = lines.get(issue.path)
modified_lines = lines.get(issue_link.issue.path)

if modified_lines is None:
# File not in patch
issue.in_patch = False
issue_link.in_patch = False

elif issue.line is None:
elif issue_link.issue.line is None:
# Empty line means full file
issue.in_patch = True
issue_link.in_patch = True

else:
# Detect if this issue is in the patch
chunk_lines = set(range(issue.line, issue.line + issue.nb_lines))
issue.in_patch = not chunk_lines.isdisjoint(modified_lines)
return issue
chunk_lines = set(
range(
issue_link.issue.line, issue_link.issue.line + issue_link.issue.nb_lines
)
)
issue_link.in_patch = not chunk_lines.isdisjoint(modified_lines)
return issue_link


def process_diff(diff: Diff):
"""This function needs to be on the top level in order to be usable by the pool"""
try:
lines = load_hgmo_patch(diff)

issues = [detect_in_patch(issue, lines) for issue in diff.issues.all()]
issue_links = [
detect_in_patch(issue_link, lines) for issue_link in diff.issue_links.all()
]
logging.info(
f"Found {len([i for i in issues if i.in_patch])} issues in patch for {diff.id}"
f"Found {len([i for i in issue_links if i.in_patch])} issue link in patch for {diff.id}"
)
Issue.objects.bulk_update(issues, ["in_patch"])
IssueLink.objects.bulk_update(issue_links, ["in_patch"])
except Exception as e:
logging.info(f"Failure on diff {diff.id}: {e}")

Expand All @@ -94,7 +100,9 @@ def add_arguments(self, parser):
def handle(self, *args, **options):
# Only apply on diffs with issues that are not already processed
diffs = (
Diff.objects.filter(issues__in_patch__isnull=True).order_by("id").distinct()
Diff.objects.filter(issue_links__in_patch__isnull=True)
.order_by("id")
.distinct()
)
logger.debug(f"Will process {diffs.count()} diffs")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,30 +72,36 @@ def save_issues(self, diff, issues):

# Build all issues for that diff, in a single DB call
created_issues = Issue.objects.bulk_create(
Issue(
path=i["path"],
line=i["line"],
nb_lines=i.get("nb_lines", 1),
char=i.get("char"),
level=i.get("level", "warning"),
analyzer_check=i.get("kind") or i.get("check"),
message=i.get("message"),
analyzer=i["analyzer"],
hash=i["hash"],
new_for_revision=detect_new_for_revision(
diff, path=i["path"], hash=i["hash"]
),
)
for i in issues
if i["hash"]
[
Issue(
path=i["path"],
line=i["line"],
nb_lines=i.get("nb_lines", 1),
char=i.get("char"),
level=i.get("level", "warning"),
analyzer_check=i.get("kind") or i.get("check"),
message=i.get("message"),
analyzer=i["analyzer"],
hash=i["hash"],
)
for i in issues
if i["hash"]
],
ignore_conflicts=True,
)
IssueLink.objects.bulk_create(
IssueLink(
issue=i,
diff=diff,
revision_id=diff.revision_id,
)
for i in created_issues
[
IssueLink(
issue=i,
diff=diff,
revision_id=diff.revision_id,
new_for_revision=detect_new_for_revision(
diff, path=i.path, hash=i.hash
),
)
for i in created_issues
],
ignore_conflicts=True,
)
return created_issues

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Generated by Django 5.1.1 on 2024-10-14 13:05

from django.db import migrations, models
from django.db.models import OuterRef, Subquery


def move_attributes(apps, schema_editor):
"""
Update values for new attributes on IssueLink
using source data from linked Issue
"""
IssueLink = apps.get_model("issues", "IssueLink")
Issue = apps.get_model("issues", "Issue")

IssueLink.objects.update(
in_patch=Subquery(
Issue.objects.filter(id=OuterRef("issue_id")).values("in_patch")[:1]
),
new_for_revision=Subquery(
Issue.objects.filter(id=OuterRef("issue_id")).values("new_for_revision")[:1]
),
)


class Migration(migrations.Migration):
dependencies = [
("issues", "0011_indexes_revision_issue_path"),
]

operations = [
# Move in-patch & new_for_revision to IssueLink, propagating their state
migrations.AddField(
model_name="issuelink",
name="in_patch",
field=models.BooleanField(null=True),
),
migrations.AddField(
model_name="issuelink",
name="new_for_revision",
field=models.BooleanField(null=True),
),
migrations.RunPython(move_attributes),
migrations.RemoveField(
model_name="issue",
name="in_patch",
),
migrations.RemoveField(
model_name="issue",
name="new_for_revision",
),
]
Loading