Skip to content

Commit

Permalink
add searchvector to commit table and index using GIN
Browse files Browse the repository at this point in the history
  • Loading branch information
Netacci committed Jan 24, 2025
1 parent d81aa5e commit ed99452
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 7 deletions.
1 change: 1 addition & 0 deletions treeherder/config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
INSTALLED_APPS = [
"django.contrib.auth",
"django.contrib.contenttypes",
"django.contrib.postgres.search",
# Disable Django's own staticfiles handling in favour of WhiteNoise, for
# greater consistency between gunicorn and `./manage.py runserver`.
"whitenoise.runserver_nostatic",
Expand Down
24 changes: 24 additions & 0 deletions treeherder/model/migrations/0036_commit_search_vector_idx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Generated by Django 5.1.2 on 2025-01-24 07:42

import django.contrib.postgres.indexes
import django.contrib.postgres.search
from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
("model", "0035_bugscache_optional_bugzilla_ref"),
]

operations = [
migrations.AddIndex(
model_name="commit",
index=django.contrib.postgres.indexes.GinIndex(
django.contrib.postgres.search.SearchVector(
"revision", "author", "comments", config="english"
),
name="search_vector_idx",
),
),
]
12 changes: 8 additions & 4 deletions treeherder/model/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@

import newrelic.agent
from django.contrib.auth.models import User
from django.contrib.postgres.search import TrigramSimilarity
from django.contrib.postgres.indexes import GinIndex
from django.core.cache import cache
from django.core.exceptions import ObjectDoesNotExist
from django.core.validators import MinLengthValidator
from django.db import models, transaction
from django.db.models import Count, Max, Min, Q, Subquery
from django.contrib.postgres.search import TrigramSimilarity, SearchVector
from django.db.utils import ProgrammingError
from django.forms import model_to_dict
from django.utils import timezone
Expand Down Expand Up @@ -188,11 +189,14 @@ class Commit(models.Model):
class Meta:
db_table = "commit"
unique_together = ("push", "revision")

indexes = [
GinIndex(
SearchVector("revision", "author", "comments", config="english"),
name="search_vector_idx",
),
]
def __str__(self):
return f"{self.push.repository.name} {self.revision}"


class MachinePlatform(models.Model):
id = models.AutoField(primary_key=True)
os_name = models.CharField(max_length=25)
Expand Down
13 changes: 10 additions & 3 deletions treeherder/webapp/api/push.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from rest_framework.status import HTTP_400_BAD_REQUEST, HTTP_404_NOT_FOUND

from treeherder.log_parser.failureline import get_group_results
from treeherder.model.models import Job, JobType, Push, Repository
from treeherder.model.models import Job, JobType, Push, Repository, Commit
from treeherder.push_health.builds import get_build_failures
from treeherder.push_health.compare import get_commit_history
from treeherder.push_health.linting import get_lint_failures
Expand All @@ -22,6 +22,7 @@
from treeherder.webapp.api.serializers import PushSerializer
from treeherder.webapp.api.utils import to_datetime, to_timestamp

from django.contrib.postgres.search import SearchVector, SearchQuery
logger = logging.getLogger(__name__)


Expand All @@ -42,7 +43,6 @@ def list(self, request, project):

# This will contain some meta data about the request and results
meta = {}

# support ranges for date as well as revisions(changes) like old tbpl
for param in [
"fromchange",
Expand All @@ -60,7 +60,6 @@ def list(self, request, project):
all_repos = request.query_params.get("all_repos")

pushes = Push.objects.order_by("-time")

if not all_repos:
try:
repository = Repository.objects.get(name=project)
Expand All @@ -71,6 +70,14 @@ def list(self, request, project):

pushes = pushes.filter(repository=repository)

search_param = filter_params.get("search")
if search_param:
filtered_commits = Commit.objects.annotate(
search=SearchVector("revision", "author", "comments", config="english")
).filter(
search=SearchQuery(search_param, config="english")
).values_list("push_id", flat=True)
pushes = pushes.filter(id__in=filtered_commits)
for param, value in meta.items():
if param == "fromchange":
revision_field = "revision__startswith" if len(value) < 40 else "revision"
Expand Down

0 comments on commit ed99452

Please sign in to comment.