Improve face recognition (#15205)

* Validate faces using cosine distance and SVC * Formatting * Use opencv instead of face embedding * Update docs for training data * Adjust to score system * Set bounds * remove face embeddings * Update writing images * Add face library page * Add ability to select file * Install opencv deps * Cleanup * Use different deps * Move deps * Cleanup * Only show face library for desktop * Implement deleting * Add ability to upload image * Add support for uploading images
blakeblackshear · Dec 1, 2024 · ac94c73 · ac94c73
1 parent 851aef4
commit ac94c73
Show file tree

Hide file tree

Showing 15 changed files with 397 additions and 137 deletions.
diff --git a/docker/main/install_deps.sh b/docker/main/install_deps.sh
@@ -16,7 +16,9 @@ apt-get -qq install --no-install-recommends -y \
     curl \
     lsof \
     jq \
-    nethogs
+    nethogs \
+    libgl1 \
+    libglib2.0-0
 
 # ensure python3 defaults to python3.9
 update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1

diff --git a/docker/main/requirements-wheels.txt b/docker/main/requirements-wheels.txt
@@ -34,8 +34,8 @@ unidecode == 1.3.*
 # Image Manipulation
 numpy == 1.26.*
 opencv-python-headless == 4.9.0.*
+opencv-contrib-python == 4.9.0.*
 scipy == 1.13.*
-scikit-learn == 1.5.*
 # OpenVino & ONNX
 openvino == 2024.3.*
 onnxruntime-openvino == 1.19.* ; platform_machine == 'x86_64'

diff --git a/docs/docs/configuration/face_recognition.md b/docs/docs/configuration/face_recognition.md
@@ -18,4 +18,18 @@ Face recognition is disabled by default and requires semantic search to be enabl
 ```yaml
 face_recognition:
   enabled: true
-```
+```
+
+## Dataset
+
+The number of images needed for a sufficient training set for face recognition varies depending on several factors:
+
+- Complexity of the task: A simple task like recognizing faces of known individuals may require fewer images than a complex task like identifying unknown individuals in a large crowd.
+- Diversity of the dataset: A dataset with diverse images, including variations in lighting, pose, and facial expressions, will require fewer images per person than a less diverse dataset.
+- Desired accuracy: The higher the desired accuracy, the more images are typically needed.
+
+However, here are some general guidelines:
+
+- Minimum: For basic face recognition tasks, a minimum of 10-20 images per person is often recommended.
+- Recommended: For more robust and accurate systems, 30-50 images per person is a good starting point.
+- Ideal: For optimal performance, especially in challenging conditions, 100 or more images per person can be beneficial.
diff --git a/frigate/api/classification.py b/frigate/api/classification.py
@@ -1,11 +1,14 @@
 """Object classification APIs."""
 
 import logging
+import os
 
 from fastapi import APIRouter, Request, UploadFile
 from fastapi.responses import JSONResponse
+from pathvalidate import sanitize_filename
 
 from frigate.api.defs.tags import Tags
+from frigate.const import FACE_DIR
 from frigate.embeddings import EmbeddingsContext
 
 logger = logging.getLogger(__name__)
@@ -15,20 +18,18 @@
 
 @router.get("/faces")
 def get_faces():
-    return JSONResponse(content={"message": "there are faces"})
+    face_dict: dict[str, list[str]] = {}
+
+    for name in os.listdir(FACE_DIR):
+        face_dict[name] = []
+        for file in os.listdir(os.path.join(FACE_DIR, name)):
+            face_dict[name].append(file)
+
+    return JSONResponse(status_code=200, content=face_dict)
 
 
 @router.post("/faces/{name}")
 async def register_face(request: Request, name: str, file: UploadFile):
-    # if not file.content_type.startswith("image"):
-    #    return JSONResponse(
-    #        status_code=400,
-    #        content={
-    #            "success": False,
-    #            "message": "Only an image can be used to register a face.",
-    #        },
-    #    )
-
     context: EmbeddingsContext = request.app.embeddings
     context.register_face(name, await file.read())
     return JSONResponse(
@@ -37,8 +38,8 @@ async def register_face(request: Request, name: str, file: UploadFile):
     )
 
 
-@router.delete("/faces")
-def deregister_faces(request: Request, body: dict = None):
+@router.post("/faces/{name}/delete")
+def deregister_faces(request: Request, name: str, body: dict = None):
     json: dict[str, any] = body or {}
     list_of_ids = json.get("ids", "")
 
@@ -49,7 +50,9 @@ def deregister_faces(request: Request, body: dict = None):
         )
 
     context: EmbeddingsContext = request.app.embeddings
-    context.delete_face_ids(list_of_ids)
+    context.delete_face_ids(
+        name, map(lambda file: sanitize_filename(file), list_of_ids)
+    )
     return JSONResponse(
         content=({"success": True, "message": "Successfully deleted faces."}),
         status_code=200,

diff --git a/frigate/config/semantic_search.py b/frigate/config/semantic_search.py
@@ -24,7 +24,10 @@ class SemanticSearchConfig(FrigateBaseModel):
 class FaceRecognitionConfig(FrigateBaseModel):
     enabled: bool = Field(default=False, title="Enable face recognition.")
     threshold: float = Field(
-        default=0.9, title="Face similarity score required to be considered a match."
+        default=170,
+        title="minimum face distance score required to be considered a match.",
+        gt=0.0,
+        le=1.0,
     )
     min_area: int = Field(
         default=500, title="Min area of face box to consider running face recognition."

diff --git a/frigate/db/sqlitevecq.py b/frigate/db/sqlitevecq.py
@@ -29,22 +29,15 @@ def delete_embeddings_description(self, event_ids: list[str]) -> None:
         ids = ",".join(["?" for _ in event_ids])
         self.execute_sql(f"DELETE FROM vec_descriptions WHERE id IN ({ids})", event_ids)
 
-    def delete_embeddings_face(self, face_ids: list[str]) -> None:
-        ids = ",".join(["?" for _ in face_ids])
-        self.execute_sql(f"DELETE FROM vec_faces WHERE id IN ({ids})", face_ids)
-
     def drop_embeddings_tables(self) -> None:
         self.execute_sql("""
             DROP TABLE vec_descriptions;
         """)
         self.execute_sql("""
             DROP TABLE vec_thumbnails;
         """)
-        self.execute_sql("""
-            DROP TABLE vec_faces;
-        """)
 
-    def create_embeddings_tables(self, face_recognition: bool) -> None:
+    def create_embeddings_tables(self) -> None:
         """Create vec0 virtual table for embeddings"""
         self.execute_sql("""
             CREATE VIRTUAL TABLE IF NOT EXISTS vec_thumbnails USING vec0(
@@ -58,11 +51,3 @@ def create_embeddings_tables(self, face_recognition: bool) -> None:
                 description_embedding FLOAT[768] distance_metric=cosine
             );
         """)
-
-        if face_recognition:
-            self.execute_sql("""
-                CREATE VIRTUAL TABLE IF NOT EXISTS vec_faces USING vec0(
-                    id TEXT PRIMARY KEY,
-                    face_embedding FLOAT[512] distance_metric=cosine
-                );
-            """)
diff --git a/frigate/embeddings/__init__.py b/frigate/embeddings/__init__.py
@@ -14,7 +14,7 @@
 
 from frigate.comms.embeddings_updater import EmbeddingsRequestEnum, EmbeddingsRequestor
 from frigate.config import FrigateConfig
-from frigate.const import CONFIG_DIR
+from frigate.const import CONFIG_DIR, FACE_DIR
 from frigate.db.sqlitevecq import SqliteVecQueueDatabase
 from frigate.models import Event
 from frigate.util.builtin import serialize
@@ -209,8 +209,13 @@ def get_face_ids(self, name: str) -> list[str]:
 
         return self.db.execute_sql(sql_query).fetchall()
 
-    def delete_face_ids(self, ids: list[str]) -> None:
-        self.db.delete_embeddings_face(ids)
+    def delete_face_ids(self, face: str, ids: list[str]) -> None:
+        folder = os.path.join(FACE_DIR, face)
+        for id in ids:
+            file_path = os.path.join(folder, id)
+
+            if os.path.isfile(file_path):
+                os.unlink(file_path)
 
     def update_description(self, event_id: str, description: str) -> None:
         self.requestor.send_data(

diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py
@@ -3,8 +3,6 @@
 import base64
 import logging
 import os
-import random
-import string
 import time
 
 from numpy import ndarray
@@ -14,7 +12,6 @@
 from frigate.config import FrigateConfig
 from frigate.const import (
     CONFIG_DIR,
-    FACE_DIR,
     UPDATE_EMBEDDINGS_REINDEX_PROGRESS,
     UPDATE_MODEL_STATE,
 )
@@ -68,7 +65,7 @@ def __init__(self, config: FrigateConfig, db: SqliteVecQueueDatabase) -> None:
         self.requestor = InterProcessRequestor()
 
         # Create tables if they don't exist
-        self.db.create_embeddings_tables(self.config.face_recognition.enabled)
+        self.db.create_embeddings_tables()
 
         models = [
             "jinaai/jina-clip-v1-text_model_fp16.onnx",
@@ -126,22 +123,6 @@ def __init__(self, config: FrigateConfig, db: SqliteVecQueueDatabase) -> None:
             device="GPU" if config.semantic_search.model_size == "large" else "CPU",
         )
 
-        self.face_embedding = None
-
-        if self.config.face_recognition.enabled:
-            self.face_embedding = GenericONNXEmbedding(
-                model_name="facenet",
-                model_file="facenet.onnx",
-                download_urls={
-                    "facenet.onnx": "https://github.com/NickM-27/facenet-onnx/releases/download/v1.0/facenet.onnx",
-                    "facedet.onnx": "https://github.com/opencv/opencv_zoo/raw/refs/heads/main/models/face_detection_yunet/face_detection_yunet_2023mar_int8.onnx",
-                },
-                model_size="large",
-                model_type=ModelTypeEnum.face,
-                requestor=self.requestor,
-                device="GPU",
-            )
-
         self.lpr_detection_model = None
         self.lpr_classification_model = None
         self.lpr_recognition_model = None
@@ -277,40 +258,12 @@ def batch_embed_description(
 
         return embeddings
 
-    def embed_face(self, label: str, thumbnail: bytes, upsert: bool = False) -> ndarray:
-        embedding = self.face_embedding(thumbnail)[0]
-
-        if upsert:
-            rand_id = "".join(
-                random.choices(string.ascii_lowercase + string.digits, k=6)
-            )
-            id = f"{label}-{rand_id}"
-
-            # write face to library
-            folder = os.path.join(FACE_DIR, label)
-            file = os.path.join(folder, f"{id}.webp")
-            os.makedirs(folder, exist_ok=True)
-
-            # save face image
-            with open(file, "wb") as output:
-                output.write(thumbnail)
-
-            self.db.execute_sql(
-                """
-                INSERT OR REPLACE INTO vec_faces(id, face_embedding)
-                VALUES(?, ?)
-                """,
-                (id, serialize(embedding)),
-            )
-
-        return embedding
-
     def reindex(self) -> None:
         logger.info("Indexing tracked object embeddings...")
 
         self.db.drop_embeddings_tables()
         logger.debug("Dropped embeddings tables.")
-        self.db.create_embeddings_tables(self.config.face_recognition.enabled)
+        self.db.create_embeddings_tables()
         logger.debug("Created embeddings tables.")
 
         # Delete the saved stats file

diff --git a/frigate/embeddings/maintainer.py b/frigate/embeddings/maintainer.py
@@ -3,7 +3,9 @@
 import base64
 import logging
 import os
+import random
 import re
+import string
 import threading
 from multiprocessing.synchronize import Event as MpEvent
 from typing import Optional
@@ -22,7 +24,12 @@
 from frigate.comms.events_updater import EventEndSubscriber, EventUpdateSubscriber
 from frigate.comms.inter_process import InterProcessRequestor
 from frigate.config import FrigateConfig
-from frigate.const import CLIPS_DIR, FRIGATE_LOCALHOST, UPDATE_EVENT_DESCRIPTION
+from frigate.const import (
+    CLIPS_DIR,
+    FACE_DIR,
+    FRIGATE_LOCALHOST,
+    UPDATE_EVENT_DESCRIPTION,
+)
 from frigate.embeddings.lpr.lpr import LicensePlateRecognition
 from frigate.events.types import EventTypeEnum
 from frigate.genai import get_genai_client
@@ -69,7 +76,9 @@ def __init__(
         self.requires_face_detection = "face" not in self.config.objects.all_objects
         self.detected_faces: dict[str, float] = {}
         self.face_classifier = (
-            FaceClassificationModel(db) if self.face_recognition_enabled else None
+            FaceClassificationModel(self.config.face_recognition, db)
+            if self.face_recognition_enabled
+            else None
         )
 
         # create communication for updating event descriptions
@@ -144,12 +153,14 @@ def _handle_request(topic: str, data: dict[str, any]) -> str:
                     if not self.face_recognition_enabled:
                         return False
 
+                    rand_id = "".join(
+                        random.choices(string.ascii_lowercase + string.digits, k=6)
+                    )
+                    label = data["face_name"]
+                    id = f"{label}-{rand_id}"
+
                     if data.get("cropped"):
-                        self.embeddings.embed_face(
-                            data["face_name"],
-                            base64.b64decode(data["image"]),
-                            upsert=True,
-                        )
+                        pass
                     else:
                         img = cv2.imdecode(
                             np.frombuffer(
@@ -163,12 +174,18 @@ def _handle_request(topic: str, data: dict[str, any]) -> str:
                             return False
 
                         face = img[face_box[1] : face_box[3], face_box[0] : face_box[2]]
-                        ret, webp = cv2.imencode(
+                        ret, thumbnail = cv2.imencode(
                             ".webp", face, [int(cv2.IMWRITE_WEBP_QUALITY), 100]
                         )
-                        self.embeddings.embed_face(
-                            data["face_name"], webp.tobytes(), upsert=True
-                        )
+
+                    # write face to library
+                    folder = os.path.join(FACE_DIR, label)
+                    file = os.path.join(folder, f"{id}.webp")
+                    os.makedirs(folder, exist_ok=True)
+
+                    # save face image
+                    with open(file, "wb") as output:
+                        output.write(thumbnail.tobytes())
 
                 self.face_classifier.clear_classifier()
                 return True
@@ -201,7 +218,9 @@ def _process_updates(self) -> None:
 
         # Create our own thumbnail based on the bounding box and the frame time
         try:
-            yuv_frame = self.frame_manager.get(frame_name, camera_config.frame_shape_yuv)
+            yuv_frame = self.frame_manager.get(
+                frame_name, camera_config.frame_shape_yuv
+            )
         except FileNotFoundError:
             pass
 
@@ -447,16 +466,7 @@ def _process_face(self, obj_data: dict[str, any], frame: np.ndarray) -> None:
                 ),
             ]
 
-        ret, webp = cv2.imencode(
-            ".webp", face_frame, [int(cv2.IMWRITE_WEBP_QUALITY), 100]
-        )
-
-        if not ret:
-            logger.debug("Not processing face due to error creating cropped image.")
-            return
-
-        embedding = self.embeddings.embed_face("unknown", webp.tobytes(), upsert=False)
-        res = self.face_classifier.classify_face(embedding)
+        res = self.face_classifier.classify_face(face_frame)
 
         if not res:
             return
@@ -467,11 +477,9 @@ def _process_face(self, obj_data: dict[str, any], frame: np.ndarray) -> None:
             f"Detected best face for person as: {sub_label} with score {score}"
         )
 
-        if score < self.config.face_recognition.threshold or (
-            id in self.detected_faces and score <= self.detected_faces[id]
-        ):
+        if id in self.detected_faces and score <= self.detected_faces[id]:
             logger.debug(
-                f"Recognized face score {score} is less than threshold ({self.config.face_recognition.threshold}) / previous face score ({self.detected_faces.get(id)})."
+                f"Recognized face distance {score} is less than previous face distance ({self.detected_faces.get(id)})."
             )
             return