Skip to content

Commit

Permalink
feat: implement manual triage view
Browse files Browse the repository at this point in the history
This view allows users to filter CVEs and packages, link them and create
NixpkgsIssue from their selections.

Model changes:
    - Added **SearchVectorField** to improve performance of full-text search.
    - Indices added for performance improvements:
        - **GinIndex** for SearchVectorFields.
        - **BTreeIndex** for fields to be searched for filtering and aggregation.
  • Loading branch information
alejandrosame committed Aug 9, 2024
1 parent 4192cc0 commit 8a4375b
Show file tree
Hide file tree
Showing 13 changed files with 1,247 additions and 34 deletions.
1 change: 1 addition & 0 deletions src/website/MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ recursive-include webview/management *
recursive-include webview/migrations *
recursive-include webview/static *
recursive-include webview/templates *
recursive-include webview/templatetags *
recursive-include tracker/management *
recursive-include tracker/migrations *
recursive-include tracker/static *
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Generated by Django 4.2.7 on 2024-06-23 20:18

import django.contrib.postgres.indexes
import django.contrib.postgres.search
from django.contrib.postgres.search import SearchVector
from django.db import migrations
import pgtrigger.compiler
import pgtrigger.migrations

from shared.models import AffectedProduct, Container, Cpe, Description


def update_search_vectors(apps, schema_editor):
Container.objects.update(search_vector=SearchVector("title"))
Description.objects.update(search_vector=SearchVector("value"))
AffectedProduct.objects.update(search_vector=SearchVector("vendor", "product", "package_name", "repo"))
Cpe.objects.update(search_vector=SearchVector("name"))

class Migration(migrations.Migration):

dependencies = [
('shared', '0024_alter_nixlicense_unique_together_and_more'),
]

operations = [
migrations.AddField(
model_name='affectedproduct',
name='search_vector',
field=django.contrib.postgres.search.SearchVectorField(null=True),
),
migrations.AddField(
model_name='container',
name='search_vector',
field=django.contrib.postgres.search.SearchVectorField(null=True),
),
migrations.AddField(
model_name='cpe',
name='search_vector',
field=django.contrib.postgres.search.SearchVectorField(null=True),
),
migrations.AddField(
model_name='description',
name='search_vector',
field=django.contrib.postgres.search.SearchVectorField(null=True),
),
migrations.AddIndex(
model_name='affectedproduct',
index=django.contrib.postgres.indexes.GinIndex(fields=['search_vector'], name='shared_affe_search__aa6eef_gin'),
),
migrations.AddIndex(
model_name='container',
index=django.contrib.postgres.indexes.GinIndex(fields=['search_vector'], name='shared_cont_search__979c85_gin'),
),
migrations.AddIndex(
model_name='cpe',
index=django.contrib.postgres.indexes.GinIndex(fields=['search_vector'], name='shared_cpe_search__347dd9_gin'),
),
migrations.AddIndex(
model_name='description',
index=django.contrib.postgres.indexes.GinIndex(fields=['search_vector'], name='shared_desc_search__dd1c6d_gin'),
),
pgtrigger.migrations.AddTrigger(
model_name='affectedproduct',
trigger=pgtrigger.compiler.Trigger(name='affected_search_vector', sql=pgtrigger.compiler.UpsertTriggerSql(execute='tsvector_update_trigger("search_vector", "pg_catalog.english", "vendor", "product", "package_name", "repo")', func='', hash='27c6a9539dba25b1c7641933f15dc17346a04b87', operation='INSERT OR UPDATE OF "vendor", "product", "package_name", "repo"', pgid='pgtrigger_affected_search_vector_25927', table='shared_affectedproduct', when='BEFORE')),
),
pgtrigger.migrations.AddTrigger(
model_name='container',
trigger=pgtrigger.compiler.Trigger(name='cve_container_search_vector', sql=pgtrigger.compiler.UpsertTriggerSql(execute='tsvector_update_trigger("search_vector", "pg_catalog.english", "title")', func='', hash='26d3e8e55e86b059c13aa7375180c06c7c3cfc5e', operation='INSERT OR UPDATE OF "title"', pgid='pgtrigger_cve_container_search_vector_85378', table='shared_container', when='BEFORE')),
),
pgtrigger.migrations.AddTrigger(
model_name='cpe',
trigger=pgtrigger.compiler.Trigger(name='cpe_search_vector_idx', sql=pgtrigger.compiler.UpsertTriggerSql(execute='tsvector_update_trigger("search_vector", "pg_catalog.english", "name")', func='', hash='5831e70cff7886047233ef2d3b870a320fa5d81d', operation='INSERT OR UPDATE OF "name"', pgid='pgtrigger_cpe_search_vector_idx_80861', table='shared_cpe', when='BEFORE')),
),
pgtrigger.migrations.AddTrigger(
model_name='description',
trigger=pgtrigger.compiler.Trigger(name='description_search_vector_idx', sql=pgtrigger.compiler.UpsertTriggerSql(execute='tsvector_update_trigger("search_vector", "pg_catalog.english", "value")', func='', hash='5b6196ba33f28ffdb9dcfff123f0ea2fe94588fd', operation='INSERT OR UPDATE OF "value"', pgid='pgtrigger_description_search_vector_idx_ce47a', table='shared_description', when='BEFORE')),
),
# Make sure that the VectorFields are populated when there's data before this migration runs
migrations.RunPython(
update_search_vectors, reverse_code=migrations.RunPython.noop
)
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.2.7 on 2024-07-29 20:36

import django.contrib.postgres.indexes
from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
('shared', '0025_affectedproduct_search_vector_and_more'),
]

operations = [
migrations.AddIndex(
model_name='nixderivation',
index=django.contrib.postgres.indexes.BTreeIndex(fields=['name'], name='shared_nixd_name_fd9eb7_btree'),
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Generated by Django 4.2.7 on 2024-07-30 20:10

import django.contrib.postgres.indexes
import django.contrib.postgres.search
from django.contrib.postgres.search import SearchVector
from django.db import migrations
import pgtrigger.compiler
import pgtrigger.migrations

from shared.models import NixDerivation, NixDerivationMeta

def update_search_vectors(apps, schema_editor):
NixDerivation.objects.update(search_vector=SearchVector("attribute", "name"))
NixDerivationMeta.objects.update(search_vector=SearchVector("description"))

class Migration(migrations.Migration):

dependencies = [
('shared', '0026_nixderivation_shared_nixd_name_fd9eb7_btree'),
]

operations = [
migrations.AddField(
model_name='nixderivation',
name='search_vector',
field=django.contrib.postgres.search.SearchVectorField(null=True),
),
migrations.AddField(
model_name='nixderivationmeta',
name='search_vector',
field=django.contrib.postgres.search.SearchVectorField(null=True),
),
migrations.AddIndex(
model_name='nixderivation',
index=django.contrib.postgres.indexes.GinIndex(fields=['search_vector'], name='shared_nixd_search__8fd81f_gin'),
),
migrations.AddIndex(
model_name='nixderivationmeta',
index=django.contrib.postgres.indexes.GinIndex(fields=['search_vector'], name='shared_nixd_search__c5ba7b_gin'),
),
pgtrigger.migrations.AddTrigger(
model_name='nixderivation',
trigger=pgtrigger.compiler.Trigger(name='attribute_name_search_vector_idx', sql=pgtrigger.compiler.UpsertTriggerSql(execute='tsvector_update_trigger("search_vector", "pg_catalog.english", "attribute", "name")', func='', hash='8c736552457c8a45f333f2b43d93439bdadbd3cb', operation='INSERT OR UPDATE OF "attribute", "name"', pgid='pgtrigger_attribute_name_search_vector_idx_7899f', table='shared_nixderivation', when='BEFORE')),
),
pgtrigger.migrations.AddTrigger(
model_name='nixderivationmeta',
trigger=pgtrigger.compiler.Trigger(name='description_search_vector_idx', sql=pgtrigger.compiler.UpsertTriggerSql(execute='tsvector_update_trigger("search_vector", "pg_catalog.english", "description")', func='', hash='77a594a3693398d75d7297433c6c16d88fbc3267', operation='INSERT OR UPDATE OF "description"', pgid='pgtrigger_description_search_vector_idx_617b6', table='shared_nixderivationmeta', when='BEFORE')),
),
# Make sure that the VectorFields are populated when there's data before this migration runs
migrations.RunPython(
update_search_vectors, reverse_code=migrations.RunPython.noop
)
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.2.7 on 2024-07-30 22:39

import django.contrib.postgres.indexes
from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
('shared', '0027_nixderivation_search_vector_and_more'),
]

operations = [
migrations.AddIndex(
model_name='cverecord',
index=django.contrib.postgres.indexes.BTreeIndex(fields=['cve_id'], name='shared_cver_cve_id_7c6dc8_btree'),
),
]
82 changes: 82 additions & 0 deletions src/website/shared/models/cve.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from typing import Any

from django.contrib.postgres.indexes import BTreeIndex, GinIndex
from django.contrib.postgres.search import SearchVectorField
from django.core.validators import RegexValidator
from django.db import models
from django.db.models.signals import post_save
from django.dispatch import receiver
from django.utils.translation import gettext_lazy as _
from pgtrigger import UpdateSearchVector

from .nix_evaluation import NixDerivation

Expand Down Expand Up @@ -57,6 +60,11 @@ class RecordState(models.TextChoices):
def __str__(self) -> str:
return self.cve_id

class Meta:
indexes = [
BTreeIndex(fields=["cve_id"]),
]


class Product(models.Model):
vendor = models.CharField(max_length=512)
Expand Down Expand Up @@ -86,9 +94,27 @@ class Description(models.Model):
value = models.TextField()
media = models.ManyToManyField(SupportingMedia)

search_vector = SearchVectorField(null=True)

def __str__(self) -> str:
return f"{self.value[:32]}..."

class Meta:
indexes = [
# Add a GIN index to speed up vector search queries
GinIndex(fields=["search_vector"]),
]
triggers = [
# Add a trigger to maintain the search vector updated with row changes
UpdateSearchVector(
name="description_search_vector_idx",
vector_field="search_vector",
document_fields=[
"value",
],
)
]


class Tag(models.Model):
"""Class representing a tag related to a CVE record."""
Expand Down Expand Up @@ -198,6 +224,22 @@ class Cpe(models.Model):
],
)

search_vector = SearchVectorField(null=True)

class Meta:
indexes = [
# Add a GIN index to speed up vector search queries
GinIndex(fields=["search_vector"]),
]
triggers = [
# Add a trigger to maintain the search vector updated with row changes
UpdateSearchVector(
name="cpe_search_vector_idx",
vector_field="search_vector",
document_fields=["name"],
)
]


class Module(models.Model):
name = models.CharField(max_length=4096)
Expand Down Expand Up @@ -232,6 +274,27 @@ class Status(models.TextChoices):
program_files = models.ManyToManyField(ProgramFile)
program_routines = models.ManyToManyField(ProgramRoutine)

search_vector = SearchVectorField(null=True)

class Meta:
indexes = [
# Add a GIN index to speed up vector search queries
GinIndex(fields=["search_vector"]),
]
triggers = [
# Add a trigger to maintain the search vector updated with row changes
UpdateSearchVector(
name="affected_search_vector",
vector_field="search_vector",
document_fields=[
"vendor",
"product",
"package_name",
"repo",
],
)
]


class Container(models.Model):
"""Class representing a container (i.e. structured data) related to a CVE record."""
Expand Down Expand Up @@ -267,9 +330,28 @@ class Type(models.TextChoices):
credits = models.ManyToManyField(Credit)
source = models.JSONField(default=dict)

# Enable full-text search on CVE searches
search_vector = SearchVectorField(null=True)

def __str__(self) -> str:
return self.cve.cve_id

class Meta:
indexes = [
# Add a GIN index to speed up vector search queries
GinIndex(fields=["search_vector"]),
]
triggers = [
# Add a trigger to maintain the search vector updated with row changes
UpdateSearchVector(
name="cve_container_search_vector",
vector_field="search_vector",
document_fields=[
"title",
],
)
]


###
#
Expand Down
41 changes: 41 additions & 0 deletions src/website/shared/models/nix_evaluation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from django.contrib.postgres import fields
from django.contrib.postgres.indexes import BTreeIndex, GinIndex
from django.contrib.postgres.search import SearchVectorField
from django.db import models
from django.utils.translation import gettext_lazy as _
from pgtrigger import UpdateSearchVector


def text_length(choices: type[models.TextChoices]) -> int:
Expand Down Expand Up @@ -110,9 +113,27 @@ class NixDerivationMeta(models.Model):

position = models.URLField(null=True)

search_vector = SearchVectorField(null=True)

def __str__(self) -> str | None:
return self.description

class Meta:
indexes = [
# Add a GIN index to speed up vector search queries
GinIndex(fields=["search_vector"]),
]
triggers = [
# Add a trigger to maintain the search vector updated with row changes
UpdateSearchVector(
name="description_search_vector_idx",
vector_field="search_vector",
document_fields=[
"description",
],
)
]


class NixOutput(models.Model):
"""
Expand Down Expand Up @@ -274,6 +295,26 @@ class NixDerivation(models.Model):
NixEvaluation, related_name="derivations", on_delete=models.CASCADE
)

search_vector = SearchVectorField(null=True)

def __str__(self) -> str:
hash = self.derivation_path.split("-")[0].split("/")[-1]
return f"{self.name} {hash[:8]}"

class Meta:
indexes = [
BTreeIndex(fields=["name"]),
GinIndex(fields=["search_vector"]),
]

triggers = [
# Add a trigger to maintain the search vector updated with row changes
UpdateSearchVector(
name="attribute_name_search_vector_idx",
vector_field="search_vector",
document_fields=[
"attribute",
"name",
],
)
]
Loading

0 comments on commit 8a4375b

Please sign in to comment.