From cdb397feba38e1ccfcb44a0ac8b3971e275ea1e0 Mon Sep 17 00:00:00 2001 From: David Manthey Date: Mon, 25 Nov 2024 10:54:34 -0500 Subject: [PATCH] This uses the orjson library to serialize annotations. The orjson library is substantially faster than the built in python library at he expense of being less flexible. Here, we strip out the access control dictionaries from each annotation, convert the mongo id to a string, then use orjson to serialize. Further, rather than load the entire set of annotations into memory and serialize them in one shot, we stream them from the database and serialize them individually. When there are enough of them for efficiency, we yield them to the output function. This allows the data to start being returned before the database finishes, and, if the connection is interrupted, to stop pulling from the database when it is no longer useful. orjson is included as part of large_image, so we don't need to add it to the dependencies. --- .../server/api/annotation.py | 49 +++++++++++++++---- 1 file changed, 40 insertions(+), 9 deletions(-) diff --git a/devops/girder/plugins/AnnotationPlugin/upenncontrast_annotation/server/api/annotation.py b/devops/girder/plugins/AnnotationPlugin/upenncontrast_annotation/server/api/annotation.py index e9f3b35a..920b1dd4 100644 --- a/devops/girder/plugins/AnnotationPlugin/upenncontrast_annotation/server/api/annotation.py +++ b/devops/girder/plugins/AnnotationPlugin/upenncontrast_annotation/server/api/annotation.py @@ -1,8 +1,12 @@ +import json +import orjson + from girder.api import access from girder.api.describe import Description, describeRoute, autoDescribeRoute -from girder.api.rest import Resource, loadmodel +from girder.api.rest import Resource, loadmodel, setResponseHeader from girder.constants import AccessType from girder.exceptions import AccessException, RestException +from girder.utility import JsonEncoder from ..helpers.proxiedModel import recordable, memoizeBodyJson from ..models.annotation import Annotation as AnnotationModel @@ -209,14 +213,41 @@ def find(self, params): query["shape"] = params["shape"] if params["tags"] is not None and len(params["tags"]) > 0: query["tags"] = {"$all": params["tags"]} - return self._annotationModel.findWithPermissions( - query, - sort=sort, - user=self.getCurrentUser(), - level=AccessType.READ, - limit=limit, - offset=offset, - ) + + def generateResult(): + cursor = self._annotationModel.findWithPermissions( + query, + sort=sort, + user=self.getCurrentUser(), + level=AccessType.READ, + limit=limit, + offset=offset, + ) + chunk = [b"["] + first = True + for annotation in cursor: + if not first: + chunk.append(b",") + # orjson and base json won't serialize ObjectIds + annotation["_id"] = str(annotation["_id"]) + # We don't need to transmit the access control for + # annotations + annotation.pop("access") + # Otherwise, we can use json + # chunk.append(json.dumps(annotation, allow_nan=False, cls=JsonEncoder, separators=(",", ":")).encode()) + # If we got rid of ObjectIds, using the json defaults is faster + # chunk.append(json.dumps(annotation).encode()) + # But orjson is faster yet + chunk.append(orjson.dumps(annotation)) + first = False + if len(chunk) > 1000: + yield b"".join(chunk) + chunk = [] + chunk.append(b"]") + yield b"".join(chunk) + + setResponseHeader("Content-Type", "application/json") + return generateResult @access.user @describeRoute(