Skip to content

Commit

Permalink
This uses the orjson library to serialize annotations.
Browse files Browse the repository at this point in the history
The orjson library is substantially faster than the built in python
library at he expense of being less flexible.  Here, we strip out the
access control dictionaries from each annotation, convert the mongo id
to a string, then use orjson to serialize.  Further, rather than load
the entire set of annotations into memory and serialize them in one
shot, we stream them from the database and serialize them individually.
When there are enough of them for efficiency, we yield them to the
output function.  This allows the data to start being returned before
the database finishes, and, if the connection is interrupted, to stop
pulling from the database when it is no longer useful.

orjson is included as part of large_image, so we don't need to add it to
the dependencies.
  • Loading branch information
manthey committed Nov 25, 2024
1 parent f26f4ec commit cdb397f
Showing 1 changed file with 40 additions and 9 deletions.
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import json
import orjson

from girder.api import access
from girder.api.describe import Description, describeRoute, autoDescribeRoute
from girder.api.rest import Resource, loadmodel
from girder.api.rest import Resource, loadmodel, setResponseHeader
from girder.constants import AccessType
from girder.exceptions import AccessException, RestException
from girder.utility import JsonEncoder
from ..helpers.proxiedModel import recordable, memoizeBodyJson
from ..models.annotation import Annotation as AnnotationModel

Expand Down Expand Up @@ -209,14 +213,41 @@ def find(self, params):
query["shape"] = params["shape"]
if params["tags"] is not None and len(params["tags"]) > 0:
query["tags"] = {"$all": params["tags"]}
return self._annotationModel.findWithPermissions(
query,
sort=sort,
user=self.getCurrentUser(),
level=AccessType.READ,
limit=limit,
offset=offset,
)

def generateResult():
cursor = self._annotationModel.findWithPermissions(
query,
sort=sort,
user=self.getCurrentUser(),
level=AccessType.READ,
limit=limit,
offset=offset,
)
chunk = [b"["]
first = True
for annotation in cursor:
if not first:
chunk.append(b",")
# orjson and base json won't serialize ObjectIds
annotation["_id"] = str(annotation["_id"])
# We don't need to transmit the access control for
# annotations
annotation.pop("access")
# Otherwise, we can use json
# chunk.append(json.dumps(annotation, allow_nan=False, cls=JsonEncoder, separators=(",", ":")).encode())
# If we got rid of ObjectIds, using the json defaults is faster
# chunk.append(json.dumps(annotation).encode())
# But orjson is faster yet
chunk.append(orjson.dumps(annotation))
first = False
if len(chunk) > 1000:
yield b"".join(chunk)
chunk = []
chunk.append(b"]")
yield b"".join(chunk)

setResponseHeader("Content-Type", "application/json")
return generateResult

@access.user
@describeRoute(
Expand Down

0 comments on commit cdb397f

Please sign in to comment.