diff --git a/lambdas/service/app.py b/lambdas/service/app.py index f7ee8422b3..94f739b421 100644 --- a/lambdas/service/app.py +++ b/lambdas/service/app.py @@ -1238,10 +1238,17 @@ def get_project_data(project_id: Optional[str] = None) -> JSON: schema.object( additional_properties=True, organTypes=schema.array(str), - totalFileSize=int, + totalFileSize=float, fileTypeSummaries=array_of_object_spec, - totalCellCount=int, - cellCountSummaries=array_of_object_spec + totalCellCount=float, + cellCountSummaries=array_of_object_spec, + projectEstimatedCellCount=float, + donorCount=int, + fileCount=int, + labCount=int, + projectCount=int, + speciesCount=int, + specimenCount=int, ) ) } diff --git a/lambdas/service/openapi.json b/lambdas/service/openapi.json index 0354dae100..00642fceb9 100644 --- a/lambdas/service/openapi.json +++ b/lambdas/service/openapi.json @@ -3033,6 +3033,56 @@ } ] }, + "projectEstimatedCellCount": { + "oneOf": [ + { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": {} + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "contains": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + }, + { + "type": "object", + "properties": { + "within": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + }, + { + "type": "object", + "properties": { + "intersects": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + } + ] + }, "projectId": { "oneOf": [ { @@ -3698,7 +3748,7 @@ } } }, - "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a facet name, a relational operator, and an\narray of facet values. The available operators are \"is\", \"within\",\n\"contains\", and \"intersects\". Multiple filters are combined using \"and\"\nlogic. An entity must match all filters to be included in the response.\nHow multiple facet values within a single filter are combined depends\non the operator.\n\nFor the \"is\" operator, multiple values are combined using \"or\"\nlogic. For example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}`\nselects entities where the file format is either \"fastq\" or\n\"fastq.gz\". For the \"within\", \"intersects\", and \"contains\"\noperators, the facet values must come in nested pairs specifying\nupper and lower bounds, and multiple pairs are combined using \"and\"\nlogic. For example, `{\"donorCount\": {\"within\": [[1,5], [5,10]]}}`\nselects entities whose donor organism count falls within both\nranges, i.e., is exactly 5.\n\nThe organismAge facet is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported facet names are: accessions, arrayExpressAccessions, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, geoSeriesAccessions, insdcProjectAccessions, insdcStudyAccessions, institution, instrumentManufacturerModel, isIntermediate, laboratory, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, organismAgeUnit, pairedEnd, preservationMethod, project, projectDescription, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, workflow\n" + "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a facet name, a relational operator, and an\narray of facet values. The available operators are \"is\", \"within\",\n\"contains\", and \"intersects\". Multiple filters are combined using \"and\"\nlogic. An entity must match all filters to be included in the response.\nHow multiple facet values within a single filter are combined depends\non the operator.\n\nFor the \"is\" operator, multiple values are combined using \"or\"\nlogic. For example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}`\nselects entities where the file format is either \"fastq\" or\n\"fastq.gz\". For the \"within\", \"intersects\", and \"contains\"\noperators, the facet values must come in nested pairs specifying\nupper and lower bounds, and multiple pairs are combined using \"and\"\nlogic. For example, `{\"donorCount\": {\"within\": [[1,5], [5,10]]}}`\nselects entities whose donor organism count falls within both\nranges, i.e., is exactly 5.\n\nThe organismAge facet is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported facet names are: accessions, arrayExpressAccessions, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, geoSeriesAccessions, insdcProjectAccessions, insdcStudyAccessions, institution, instrumentManufacturerModel, isIntermediate, laboratory, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, organismAgeUnit, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, workflow\n" }, { "name": "size", @@ -3763,6 +3813,7 @@ "preservationMethod", "project", "projectDescription", + "projectEstimatedCellCount", "projectId", "projectTitle", "publicationTitle", @@ -6015,6 +6066,56 @@ } ] }, + "projectEstimatedCellCount": { + "oneOf": [ + { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": {} + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "contains": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + }, + { + "type": "object", + "properties": { + "within": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + }, + { + "type": "object", + "properties": { + "intersects": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + } + ] + }, "projectId": { "oneOf": [ { @@ -6680,7 +6781,7 @@ } } }, - "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a facet name, a relational operator, and an\narray of facet values. The available operators are \"is\", \"within\",\n\"contains\", and \"intersects\". Multiple filters are combined using \"and\"\nlogic. An entity must match all filters to be included in the response.\nHow multiple facet values within a single filter are combined depends\non the operator.\n\nFor the \"is\" operator, multiple values are combined using \"or\"\nlogic. For example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}`\nselects entities where the file format is either \"fastq\" or\n\"fastq.gz\". For the \"within\", \"intersects\", and \"contains\"\noperators, the facet values must come in nested pairs specifying\nupper and lower bounds, and multiple pairs are combined using \"and\"\nlogic. For example, `{\"donorCount\": {\"within\": [[1,5], [5,10]]}}`\nselects entities whose donor organism count falls within both\nranges, i.e., is exactly 5.\n\nThe organismAge facet is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported facet names are: accessions, arrayExpressAccessions, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, geoSeriesAccessions, insdcProjectAccessions, insdcStudyAccessions, institution, instrumentManufacturerModel, isIntermediate, laboratory, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, organismAgeUnit, pairedEnd, preservationMethod, project, projectDescription, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, workflow\n" + "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a facet name, a relational operator, and an\narray of facet values. The available operators are \"is\", \"within\",\n\"contains\", and \"intersects\". Multiple filters are combined using \"and\"\nlogic. An entity must match all filters to be included in the response.\nHow multiple facet values within a single filter are combined depends\non the operator.\n\nFor the \"is\" operator, multiple values are combined using \"or\"\nlogic. For example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}`\nselects entities where the file format is either \"fastq\" or\n\"fastq.gz\". For the \"within\", \"intersects\", and \"contains\"\noperators, the facet values must come in nested pairs specifying\nupper and lower bounds, and multiple pairs are combined using \"and\"\nlogic. For example, `{\"donorCount\": {\"within\": [[1,5], [5,10]]}}`\nselects entities whose donor organism count falls within both\nranges, i.e., is exactly 5.\n\nThe organismAge facet is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported facet names are: accessions, arrayExpressAccessions, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, geoSeriesAccessions, insdcProjectAccessions, insdcStudyAccessions, institution, instrumentManufacturerModel, isIntermediate, laboratory, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, organismAgeUnit, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, workflow\n" }, { "name": "size", @@ -6745,6 +6846,7 @@ "preservationMethod", "project", "projectDescription", + "projectEstimatedCellCount", "projectId", "projectTitle", "publicationTitle", @@ -9242,6 +9344,56 @@ } ] }, + "projectEstimatedCellCount": { + "oneOf": [ + { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": {} + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "contains": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + }, + { + "type": "object", + "properties": { + "within": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + }, + { + "type": "object", + "properties": { + "intersects": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + } + ] + }, "projectId": { "oneOf": [ { @@ -9907,7 +10059,7 @@ } } }, - "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a facet name, a relational operator, and an\narray of facet values. The available operators are \"is\", \"within\",\n\"contains\", and \"intersects\". Multiple filters are combined using \"and\"\nlogic. An entity must match all filters to be included in the response.\nHow multiple facet values within a single filter are combined depends\non the operator.\n\nFor the \"is\" operator, multiple values are combined using \"or\"\nlogic. For example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}`\nselects entities where the file format is either \"fastq\" or\n\"fastq.gz\". For the \"within\", \"intersects\", and \"contains\"\noperators, the facet values must come in nested pairs specifying\nupper and lower bounds, and multiple pairs are combined using \"and\"\nlogic. For example, `{\"donorCount\": {\"within\": [[1,5], [5,10]]}}`\nselects entities whose donor organism count falls within both\nranges, i.e., is exactly 5.\n\nThe organismAge facet is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported facet names are: accessions, arrayExpressAccessions, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, geoSeriesAccessions, insdcProjectAccessions, insdcStudyAccessions, institution, instrumentManufacturerModel, isIntermediate, laboratory, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, organismAgeUnit, pairedEnd, preservationMethod, project, projectDescription, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, workflow\n" + "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a facet name, a relational operator, and an\narray of facet values. The available operators are \"is\", \"within\",\n\"contains\", and \"intersects\". Multiple filters are combined using \"and\"\nlogic. An entity must match all filters to be included in the response.\nHow multiple facet values within a single filter are combined depends\non the operator.\n\nFor the \"is\" operator, multiple values are combined using \"or\"\nlogic. For example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}`\nselects entities where the file format is either \"fastq\" or\n\"fastq.gz\". For the \"within\", \"intersects\", and \"contains\"\noperators, the facet values must come in nested pairs specifying\nupper and lower bounds, and multiple pairs are combined using \"and\"\nlogic. For example, `{\"donorCount\": {\"within\": [[1,5], [5,10]]}}`\nselects entities whose donor organism count falls within both\nranges, i.e., is exactly 5.\n\nThe organismAge facet is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported facet names are: accessions, arrayExpressAccessions, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, geoSeriesAccessions, insdcProjectAccessions, insdcStudyAccessions, institution, instrumentManufacturerModel, isIntermediate, laboratory, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, organismAgeUnit, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, workflow\n" }, { "name": "size", @@ -9972,6 +10124,7 @@ "preservationMethod", "project", "projectDescription", + "projectEstimatedCellCount", "projectId", "projectTitle", "publicationTitle", @@ -12224,6 +12377,56 @@ } ] }, + "projectEstimatedCellCount": { + "oneOf": [ + { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": {} + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "contains": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + }, + { + "type": "object", + "properties": { + "within": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + }, + { + "type": "object", + "properties": { + "intersects": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + } + ] + }, "projectId": { "oneOf": [ { @@ -12889,7 +13092,7 @@ } } }, - "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a facet name, a relational operator, and an\narray of facet values. The available operators are \"is\", \"within\",\n\"contains\", and \"intersects\". Multiple filters are combined using \"and\"\nlogic. An entity must match all filters to be included in the response.\nHow multiple facet values within a single filter are combined depends\non the operator.\n\nFor the \"is\" operator, multiple values are combined using \"or\"\nlogic. For example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}`\nselects entities where the file format is either \"fastq\" or\n\"fastq.gz\". For the \"within\", \"intersects\", and \"contains\"\noperators, the facet values must come in nested pairs specifying\nupper and lower bounds, and multiple pairs are combined using \"and\"\nlogic. For example, `{\"donorCount\": {\"within\": [[1,5], [5,10]]}}`\nselects entities whose donor organism count falls within both\nranges, i.e., is exactly 5.\n\nThe organismAge facet is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported facet names are: accessions, arrayExpressAccessions, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, geoSeriesAccessions, insdcProjectAccessions, insdcStudyAccessions, institution, instrumentManufacturerModel, isIntermediate, laboratory, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, organismAgeUnit, pairedEnd, preservationMethod, project, projectDescription, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, workflow\n" + "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a facet name, a relational operator, and an\narray of facet values. The available operators are \"is\", \"within\",\n\"contains\", and \"intersects\". Multiple filters are combined using \"and\"\nlogic. An entity must match all filters to be included in the response.\nHow multiple facet values within a single filter are combined depends\non the operator.\n\nFor the \"is\" operator, multiple values are combined using \"or\"\nlogic. For example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}`\nselects entities where the file format is either \"fastq\" or\n\"fastq.gz\". For the \"within\", \"intersects\", and \"contains\"\noperators, the facet values must come in nested pairs specifying\nupper and lower bounds, and multiple pairs are combined using \"and\"\nlogic. For example, `{\"donorCount\": {\"within\": [[1,5], [5,10]]}}`\nselects entities whose donor organism count falls within both\nranges, i.e., is exactly 5.\n\nThe organismAge facet is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported facet names are: accessions, arrayExpressAccessions, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, geoSeriesAccessions, insdcProjectAccessions, insdcStudyAccessions, institution, instrumentManufacturerModel, isIntermediate, laboratory, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, organismAgeUnit, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, workflow\n" }, { "name": "size", @@ -12954,6 +13157,7 @@ "preservationMethod", "project", "projectDescription", + "projectEstimatedCellCount", "projectId", "projectTitle", "publicationTitle", @@ -15451,57 +15655,7 @@ } ] }, - "projectId": { - "oneOf": [ - { - "type": "object", - "properties": { - "is": { - "type": "array", - "items": {} - } - }, - "required": [ - "is" - ], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "contains": { - "type": "array", - "items": {}, - "minItems": 2, - "maxItems": 2 - } - } - }, - { - "type": "object", - "properties": { - "within": { - "type": "array", - "items": {}, - "minItems": 2, - "maxItems": 2 - } - } - }, - { - "type": "object", - "properties": { - "intersects": { - "type": "array", - "items": {}, - "minItems": 2, - "maxItems": 2 - } - } - } - ] - }, - "projectTitle": { + "projectEstimatedCellCount": { "oneOf": [ { "type": "object", @@ -15551,7 +15705,7 @@ } ] }, - "publicationTitle": { + "projectId": { "oneOf": [ { "type": "object", @@ -15601,7 +15755,7 @@ } ] }, - "sampleDisease": { + "projectTitle": { "oneOf": [ { "type": "object", @@ -15651,7 +15805,7 @@ } ] }, - "sampleEntityType": { + "publicationTitle": { "oneOf": [ { "type": "object", @@ -15701,7 +15855,7 @@ } ] }, - "sampleId": { + "sampleDisease": { "oneOf": [ { "type": "object", @@ -15751,7 +15905,107 @@ } ] }, - "selectedCellType": { + "sampleEntityType": { + "oneOf": [ + { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": {} + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "contains": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + }, + { + "type": "object", + "properties": { + "within": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + }, + { + "type": "object", + "properties": { + "intersects": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + } + ] + }, + "sampleId": { + "oneOf": [ + { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": {} + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "contains": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + }, + { + "type": "object", + "properties": { + "within": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + }, + { + "type": "object", + "properties": { + "intersects": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + } + ] + }, + "selectedCellType": { "oneOf": [ { "type": "object", @@ -16116,7 +16370,7 @@ } } }, - "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a facet name, a relational operator, and an\narray of facet values. The available operators are \"is\", \"within\",\n\"contains\", and \"intersects\". Multiple filters are combined using \"and\"\nlogic. An entity must match all filters to be included in the response.\nHow multiple facet values within a single filter are combined depends\non the operator.\n\nFor the \"is\" operator, multiple values are combined using \"or\"\nlogic. For example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}`\nselects entities where the file format is either \"fastq\" or\n\"fastq.gz\". For the \"within\", \"intersects\", and \"contains\"\noperators, the facet values must come in nested pairs specifying\nupper and lower bounds, and multiple pairs are combined using \"and\"\nlogic. For example, `{\"donorCount\": {\"within\": [[1,5], [5,10]]}}`\nselects entities whose donor organism count falls within both\nranges, i.e., is exactly 5.\n\nThe organismAge facet is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported facet names are: accessions, arrayExpressAccessions, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, geoSeriesAccessions, insdcProjectAccessions, insdcStudyAccessions, institution, instrumentManufacturerModel, isIntermediate, laboratory, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, organismAgeUnit, pairedEnd, preservationMethod, project, projectDescription, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, workflow\n" + "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a facet name, a relational operator, and an\narray of facet values. The available operators are \"is\", \"within\",\n\"contains\", and \"intersects\". Multiple filters are combined using \"and\"\nlogic. An entity must match all filters to be included in the response.\nHow multiple facet values within a single filter are combined depends\non the operator.\n\nFor the \"is\" operator, multiple values are combined using \"or\"\nlogic. For example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}`\nselects entities where the file format is either \"fastq\" or\n\"fastq.gz\". For the \"within\", \"intersects\", and \"contains\"\noperators, the facet values must come in nested pairs specifying\nupper and lower bounds, and multiple pairs are combined using \"and\"\nlogic. For example, `{\"donorCount\": {\"within\": [[1,5], [5,10]]}}`\nselects entities whose donor organism count falls within both\nranges, i.e., is exactly 5.\n\nThe organismAge facet is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported facet names are: accessions, arrayExpressAccessions, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, geoSeriesAccessions, insdcProjectAccessions, insdcStudyAccessions, institution, instrumentManufacturerModel, isIntermediate, laboratory, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, organismAgeUnit, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, workflow\n" }, { "name": "size", @@ -16181,6 +16435,7 @@ "preservationMethod", "project", "projectDescription", + "projectEstimatedCellCount", "projectId", "projectTitle", "publicationTitle", @@ -18433,6 +18688,56 @@ } ] }, + "projectEstimatedCellCount": { + "oneOf": [ + { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": {} + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "contains": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + }, + { + "type": "object", + "properties": { + "within": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + }, + { + "type": "object", + "properties": { + "intersects": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + } + ] + }, "projectId": { "oneOf": [ { @@ -19098,7 +19403,7 @@ } } }, - "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a facet name, a relational operator, and an\narray of facet values. The available operators are \"is\", \"within\",\n\"contains\", and \"intersects\". Multiple filters are combined using \"and\"\nlogic. An entity must match all filters to be included in the response.\nHow multiple facet values within a single filter are combined depends\non the operator.\n\nFor the \"is\" operator, multiple values are combined using \"or\"\nlogic. For example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}`\nselects entities where the file format is either \"fastq\" or\n\"fastq.gz\". For the \"within\", \"intersects\", and \"contains\"\noperators, the facet values must come in nested pairs specifying\nupper and lower bounds, and multiple pairs are combined using \"and\"\nlogic. For example, `{\"donorCount\": {\"within\": [[1,5], [5,10]]}}`\nselects entities whose donor organism count falls within both\nranges, i.e., is exactly 5.\n\nThe organismAge facet is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported facet names are: accessions, arrayExpressAccessions, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, geoSeriesAccessions, insdcProjectAccessions, insdcStudyAccessions, institution, instrumentManufacturerModel, isIntermediate, laboratory, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, organismAgeUnit, pairedEnd, preservationMethod, project, projectDescription, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, workflow\n" + "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a facet name, a relational operator, and an\narray of facet values. The available operators are \"is\", \"within\",\n\"contains\", and \"intersects\". Multiple filters are combined using \"and\"\nlogic. An entity must match all filters to be included in the response.\nHow multiple facet values within a single filter are combined depends\non the operator.\n\nFor the \"is\" operator, multiple values are combined using \"or\"\nlogic. For example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}`\nselects entities where the file format is either \"fastq\" or\n\"fastq.gz\". For the \"within\", \"intersects\", and \"contains\"\noperators, the facet values must come in nested pairs specifying\nupper and lower bounds, and multiple pairs are combined using \"and\"\nlogic. For example, `{\"donorCount\": {\"within\": [[1,5], [5,10]]}}`\nselects entities whose donor organism count falls within both\nranges, i.e., is exactly 5.\n\nThe organismAge facet is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported facet names are: accessions, arrayExpressAccessions, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, geoSeriesAccessions, insdcProjectAccessions, insdcStudyAccessions, institution, instrumentManufacturerModel, isIntermediate, laboratory, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, organismAgeUnit, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, workflow\n" }, { "name": "size", @@ -19163,6 +19468,7 @@ "preservationMethod", "project", "projectDescription", + "projectEstimatedCellCount", "projectId", "projectTitle", "publicationTitle", @@ -21660,6 +21966,56 @@ } ] }, + "projectEstimatedCellCount": { + "oneOf": [ + { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": {} + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "contains": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + }, + { + "type": "object", + "properties": { + "within": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + }, + { + "type": "object", + "properties": { + "intersects": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + } + ] + }, "projectId": { "oneOf": [ { @@ -22325,7 +22681,7 @@ } } }, - "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a facet name, a relational operator, and an\narray of facet values. The available operators are \"is\", \"within\",\n\"contains\", and \"intersects\". Multiple filters are combined using \"and\"\nlogic. An entity must match all filters to be included in the response.\nHow multiple facet values within a single filter are combined depends\non the operator.\n\nFor the \"is\" operator, multiple values are combined using \"or\"\nlogic. For example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}`\nselects entities where the file format is either \"fastq\" or\n\"fastq.gz\". For the \"within\", \"intersects\", and \"contains\"\noperators, the facet values must come in nested pairs specifying\nupper and lower bounds, and multiple pairs are combined using \"and\"\nlogic. For example, `{\"donorCount\": {\"within\": [[1,5], [5,10]]}}`\nselects entities whose donor organism count falls within both\nranges, i.e., is exactly 5.\n\nThe organismAge facet is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported facet names are: accessions, arrayExpressAccessions, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, geoSeriesAccessions, insdcProjectAccessions, insdcStudyAccessions, institution, instrumentManufacturerModel, isIntermediate, laboratory, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, organismAgeUnit, pairedEnd, preservationMethod, project, projectDescription, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, workflow\n" + "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a facet name, a relational operator, and an\narray of facet values. The available operators are \"is\", \"within\",\n\"contains\", and \"intersects\". Multiple filters are combined using \"and\"\nlogic. An entity must match all filters to be included in the response.\nHow multiple facet values within a single filter are combined depends\non the operator.\n\nFor the \"is\" operator, multiple values are combined using \"or\"\nlogic. For example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}`\nselects entities where the file format is either \"fastq\" or\n\"fastq.gz\". For the \"within\", \"intersects\", and \"contains\"\noperators, the facet values must come in nested pairs specifying\nupper and lower bounds, and multiple pairs are combined using \"and\"\nlogic. For example, `{\"donorCount\": {\"within\": [[1,5], [5,10]]}}`\nselects entities whose donor organism count falls within both\nranges, i.e., is exactly 5.\n\nThe organismAge facet is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported facet names are: accessions, arrayExpressAccessions, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, geoSeriesAccessions, insdcProjectAccessions, insdcStudyAccessions, institution, instrumentManufacturerModel, isIntermediate, laboratory, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, organismAgeUnit, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, workflow\n" }, { "name": "size", @@ -22390,6 +22746,7 @@ "preservationMethod", "project", "projectDescription", + "projectEstimatedCellCount", "projectId", "projectTitle", "publicationTitle", @@ -24642,6 +24999,56 @@ } ] }, + "projectEstimatedCellCount": { + "oneOf": [ + { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": {} + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "contains": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + }, + { + "type": "object", + "properties": { + "within": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + }, + { + "type": "object", + "properties": { + "intersects": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + } + ] + }, "projectId": { "oneOf": [ { @@ -25307,7 +25714,7 @@ } } }, - "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a facet name, a relational operator, and an\narray of facet values. The available operators are \"is\", \"within\",\n\"contains\", and \"intersects\". Multiple filters are combined using \"and\"\nlogic. An entity must match all filters to be included in the response.\nHow multiple facet values within a single filter are combined depends\non the operator.\n\nFor the \"is\" operator, multiple values are combined using \"or\"\nlogic. For example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}`\nselects entities where the file format is either \"fastq\" or\n\"fastq.gz\". For the \"within\", \"intersects\", and \"contains\"\noperators, the facet values must come in nested pairs specifying\nupper and lower bounds, and multiple pairs are combined using \"and\"\nlogic. For example, `{\"donorCount\": {\"within\": [[1,5], [5,10]]}}`\nselects entities whose donor organism count falls within both\nranges, i.e., is exactly 5.\n\nThe organismAge facet is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported facet names are: accessions, arrayExpressAccessions, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, geoSeriesAccessions, insdcProjectAccessions, insdcStudyAccessions, institution, instrumentManufacturerModel, isIntermediate, laboratory, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, organismAgeUnit, pairedEnd, preservationMethod, project, projectDescription, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, workflow\n" + "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a facet name, a relational operator, and an\narray of facet values. The available operators are \"is\", \"within\",\n\"contains\", and \"intersects\". Multiple filters are combined using \"and\"\nlogic. An entity must match all filters to be included in the response.\nHow multiple facet values within a single filter are combined depends\non the operator.\n\nFor the \"is\" operator, multiple values are combined using \"or\"\nlogic. For example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}`\nselects entities where the file format is either \"fastq\" or\n\"fastq.gz\". For the \"within\", \"intersects\", and \"contains\"\noperators, the facet values must come in nested pairs specifying\nupper and lower bounds, and multiple pairs are combined using \"and\"\nlogic. For example, `{\"donorCount\": {\"within\": [[1,5], [5,10]]}}`\nselects entities whose donor organism count falls within both\nranges, i.e., is exactly 5.\n\nThe organismAge facet is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported facet names are: accessions, arrayExpressAccessions, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, geoSeriesAccessions, insdcProjectAccessions, insdcStudyAccessions, institution, instrumentManufacturerModel, isIntermediate, laboratory, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, organismAgeUnit, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, workflow\n" }, { "name": "size", @@ -25372,6 +25779,7 @@ "preservationMethod", "project", "projectDescription", + "projectEstimatedCellCount", "projectId", "projectTitle", "publicationTitle", @@ -27746,6 +28154,56 @@ } ] }, + "projectEstimatedCellCount": { + "oneOf": [ + { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": {} + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "contains": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + }, + { + "type": "object", + "properties": { + "within": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + }, + { + "type": "object", + "properties": { + "intersects": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + } + ] + }, "projectId": { "oneOf": [ { @@ -28411,7 +28869,7 @@ } } }, - "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a facet name, a relational operator, and an\narray of facet values. The available operators are \"is\", \"within\",\n\"contains\", and \"intersects\". Multiple filters are combined using \"and\"\nlogic. An entity must match all filters to be included in the response.\nHow multiple facet values within a single filter are combined depends\non the operator.\n\nFor the \"is\" operator, multiple values are combined using \"or\"\nlogic. For example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}`\nselects entities where the file format is either \"fastq\" or\n\"fastq.gz\". For the \"within\", \"intersects\", and \"contains\"\noperators, the facet values must come in nested pairs specifying\nupper and lower bounds, and multiple pairs are combined using \"and\"\nlogic. For example, `{\"donorCount\": {\"within\": [[1,5], [5,10]]}}`\nselects entities whose donor organism count falls within both\nranges, i.e., is exactly 5.\n\nThe organismAge facet is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported facet names are: accessions, arrayExpressAccessions, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, geoSeriesAccessions, insdcProjectAccessions, insdcStudyAccessions, institution, instrumentManufacturerModel, isIntermediate, laboratory, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, organismAgeUnit, pairedEnd, preservationMethod, project, projectDescription, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, workflow\n" + "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a facet name, a relational operator, and an\narray of facet values. The available operators are \"is\", \"within\",\n\"contains\", and \"intersects\". Multiple filters are combined using \"and\"\nlogic. An entity must match all filters to be included in the response.\nHow multiple facet values within a single filter are combined depends\non the operator.\n\nFor the \"is\" operator, multiple values are combined using \"or\"\nlogic. For example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}`\nselects entities where the file format is either \"fastq\" or\n\"fastq.gz\". For the \"within\", \"intersects\", and \"contains\"\noperators, the facet values must come in nested pairs specifying\nupper and lower bounds, and multiple pairs are combined using \"and\"\nlogic. For example, `{\"donorCount\": {\"within\": [[1,5], [5,10]]}}`\nselects entities whose donor organism count falls within both\nranges, i.e., is exactly 5.\n\nThe organismAge facet is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported facet names are: accessions, arrayExpressAccessions, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, geoSeriesAccessions, insdcProjectAccessions, insdcStudyAccessions, institution, instrumentManufacturerModel, isIntermediate, laboratory, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, organismAgeUnit, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, workflow\n" } ] }, @@ -28432,8 +28890,8 @@ } }, "totalFileSize": { - "type": "integer", - "format": "int64" + "type": "number", + "format": "double" }, "fileTypeSummaries": { "type": "array", @@ -28444,8 +28902,8 @@ } }, "totalCellCount": { - "type": "integer", - "format": "int64" + "type": "number", + "format": "double" }, "cellCountSummaries": { "type": "array", @@ -28454,6 +28912,34 @@ "properties": {}, "additionalProperties": true } + }, + "projectEstimatedCellCount": { + "type": "number", + "format": "double" + }, + "donorCount": { + "type": "integer", + "format": "int64" + }, + "fileCount": { + "type": "integer", + "format": "int64" + }, + "labCount": { + "type": "integer", + "format": "int64" + }, + "projectCount": { + "type": "integer", + "format": "int64" + }, + "speciesCount": { + "type": "integer", + "format": "int64" + }, + "specimenCount": { + "type": "integer", + "format": "int64" } }, "required": [ @@ -28461,7 +28947,14 @@ "totalFileSize", "fileTypeSummaries", "totalCellCount", - "cellCountSummaries" + "cellCountSummaries", + "projectEstimatedCellCount", + "donorCount", + "fileCount", + "labCount", + "projectCount", + "speciesCount", + "specimenCount" ], "additionalProperties": true } @@ -30645,6 +31138,56 @@ } ] }, + "projectEstimatedCellCount": { + "oneOf": [ + { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": {} + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "contains": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + }, + { + "type": "object", + "properties": { + "within": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + }, + { + "type": "object", + "properties": { + "intersects": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + } + ] + }, "projectId": { "oneOf": [ { @@ -31310,7 +31853,7 @@ } } }, - "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a facet name, a relational operator, and an\narray of facet values. The available operators are \"is\", \"within\",\n\"contains\", and \"intersects\". Multiple filters are combined using \"and\"\nlogic. An entity must match all filters to be included in the response.\nHow multiple facet values within a single filter are combined depends\non the operator.\n\nFor the \"is\" operator, multiple values are combined using \"or\"\nlogic. For example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}`\nselects entities where the file format is either \"fastq\" or\n\"fastq.gz\". For the \"within\", \"intersects\", and \"contains\"\noperators, the facet values must come in nested pairs specifying\nupper and lower bounds, and multiple pairs are combined using \"and\"\nlogic. For example, `{\"donorCount\": {\"within\": [[1,5], [5,10]]}}`\nselects entities whose donor organism count falls within both\nranges, i.e., is exactly 5.\n\nThe organismAge facet is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported facet names are: accessions, arrayExpressAccessions, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, geoSeriesAccessions, insdcProjectAccessions, insdcStudyAccessions, institution, instrumentManufacturerModel, isIntermediate, laboratory, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, organismAgeUnit, pairedEnd, preservationMethod, project, projectDescription, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, workflow\n" + "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a facet name, a relational operator, and an\narray of facet values. The available operators are \"is\", \"within\",\n\"contains\", and \"intersects\". Multiple filters are combined using \"and\"\nlogic. An entity must match all filters to be included in the response.\nHow multiple facet values within a single filter are combined depends\non the operator.\n\nFor the \"is\" operator, multiple values are combined using \"or\"\nlogic. For example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}`\nselects entities where the file format is either \"fastq\" or\n\"fastq.gz\". For the \"within\", \"intersects\", and \"contains\"\noperators, the facet values must come in nested pairs specifying\nupper and lower bounds, and multiple pairs are combined using \"and\"\nlogic. For example, `{\"donorCount\": {\"within\": [[1,5], [5,10]]}}`\nselects entities whose donor organism count falls within both\nranges, i.e., is exactly 5.\n\nThe organismAge facet is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported facet names are: accessions, arrayExpressAccessions, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, geoSeriesAccessions, insdcProjectAccessions, insdcStudyAccessions, institution, instrumentManufacturerModel, isIntermediate, laboratory, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, organismAgeUnit, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, workflow\n" } ] } @@ -33517,6 +34060,56 @@ } ] }, + "projectEstimatedCellCount": { + "oneOf": [ + { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": {} + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "contains": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + }, + { + "type": "object", + "properties": { + "within": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + }, + { + "type": "object", + "properties": { + "intersects": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + } + ] + }, "projectId": { "oneOf": [ { @@ -34182,7 +34775,7 @@ } } }, - "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a facet name, a relational operator, and an\narray of facet values. The available operators are \"is\", \"within\",\n\"contains\", and \"intersects\". Multiple filters are combined using \"and\"\nlogic. An entity must match all filters to be included in the response.\nHow multiple facet values within a single filter are combined depends\non the operator.\n\nFor the \"is\" operator, multiple values are combined using \"or\"\nlogic. For example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}`\nselects entities where the file format is either \"fastq\" or\n\"fastq.gz\". For the \"within\", \"intersects\", and \"contains\"\noperators, the facet values must come in nested pairs specifying\nupper and lower bounds, and multiple pairs are combined using \"and\"\nlogic. For example, `{\"donorCount\": {\"within\": [[1,5], [5,10]]}}`\nselects entities whose donor organism count falls within both\nranges, i.e., is exactly 5.\n\nThe organismAge facet is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported facet names are: accessions, arrayExpressAccessions, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, geoSeriesAccessions, insdcProjectAccessions, insdcStudyAccessions, institution, instrumentManufacturerModel, isIntermediate, laboratory, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, organismAgeUnit, pairedEnd, preservationMethod, project, projectDescription, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, workflow\n" + "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a facet name, a relational operator, and an\narray of facet values. The available operators are \"is\", \"within\",\n\"contains\", and \"intersects\". Multiple filters are combined using \"and\"\nlogic. An entity must match all filters to be included in the response.\nHow multiple facet values within a single filter are combined depends\non the operator.\n\nFor the \"is\" operator, multiple values are combined using \"or\"\nlogic. For example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}`\nselects entities where the file format is either \"fastq\" or\n\"fastq.gz\". For the \"within\", \"intersects\", and \"contains\"\noperators, the facet values must come in nested pairs specifying\nupper and lower bounds, and multiple pairs are combined using \"and\"\nlogic. For example, `{\"donorCount\": {\"within\": [[1,5], [5,10]]}}`\nselects entities whose donor organism count falls within both\nranges, i.e., is exactly 5.\n\nThe organismAge facet is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported facet names are: accessions, arrayExpressAccessions, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, geoSeriesAccessions, insdcProjectAccessions, insdcStudyAccessions, institution, instrumentManufacturerModel, isIntermediate, laboratory, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, organismAgeUnit, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, workflow\n" }, { "name": "format", @@ -36442,6 +37035,56 @@ } ] }, + "projectEstimatedCellCount": { + "oneOf": [ + { + "type": "object", + "properties": { + "is": { + "type": "array", + "items": {} + } + }, + "required": [ + "is" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "contains": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + }, + { + "type": "object", + "properties": { + "within": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + }, + { + "type": "object", + "properties": { + "intersects": { + "type": "array", + "items": {}, + "minItems": 2, + "maxItems": 2 + } + } + } + ] + }, "projectId": { "oneOf": [ { @@ -37107,7 +37750,7 @@ } } }, - "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a facet name, a relational operator, and an\narray of facet values. The available operators are \"is\", \"within\",\n\"contains\", and \"intersects\". Multiple filters are combined using \"and\"\nlogic. An entity must match all filters to be included in the response.\nHow multiple facet values within a single filter are combined depends\non the operator.\n\nFor the \"is\" operator, multiple values are combined using \"or\"\nlogic. For example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}`\nselects entities where the file format is either \"fastq\" or\n\"fastq.gz\". For the \"within\", \"intersects\", and \"contains\"\noperators, the facet values must come in nested pairs specifying\nupper and lower bounds, and multiple pairs are combined using \"and\"\nlogic. For example, `{\"donorCount\": {\"within\": [[1,5], [5,10]]}}`\nselects entities whose donor organism count falls within both\nranges, i.e., is exactly 5.\n\nThe organismAge facet is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported facet names are: accessions, arrayExpressAccessions, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, geoSeriesAccessions, insdcProjectAccessions, insdcStudyAccessions, institution, instrumentManufacturerModel, isIntermediate, laboratory, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, organismAgeUnit, pairedEnd, preservationMethod, project, projectDescription, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, workflow\n" + "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a facet name, a relational operator, and an\narray of facet values. The available operators are \"is\", \"within\",\n\"contains\", and \"intersects\". Multiple filters are combined using \"and\"\nlogic. An entity must match all filters to be included in the response.\nHow multiple facet values within a single filter are combined depends\non the operator.\n\nFor the \"is\" operator, multiple values are combined using \"or\"\nlogic. For example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}`\nselects entities where the file format is either \"fastq\" or\n\"fastq.gz\". For the \"within\", \"intersects\", and \"contains\"\noperators, the facet values must come in nested pairs specifying\nupper and lower bounds, and multiple pairs are combined using \"and\"\nlogic. For example, `{\"donorCount\": {\"within\": [[1,5], [5,10]]}}`\nselects entities whose donor organism count falls within both\nranges, i.e., is exactly 5.\n\nThe organismAge facet is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported facet names are: accessions, arrayExpressAccessions, assayType, biologicalSex, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, geoSeriesAccessions, insdcProjectAccessions, insdcStudyAccessions, institution, instrumentManufacturerModel, isIntermediate, laboratory, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, organismAgeUnit, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, workflow\n" }, { "name": "format", diff --git a/src/azul/plugins/metadata/hca/__init__.py b/src/azul/plugins/metadata/hca/__init__.py index 4bf2ae0aad..343e9166f1 100644 --- a/src/azul/plugins/metadata/hca/__init__.py +++ b/src/azul/plugins/metadata/hca/__init__.py @@ -156,6 +156,7 @@ def service_config(self) -> ServiceConfig: "insdcProjectAccessions": "contents.projects.insdc_project_accessions", "insdcStudyAccessions": "contents.projects.insdc_study_accessions", "accessions": "contents.projects.accessions", + "projectEstimatedCellCount": "contents.projects.estimated_cell_count", "biologicalSex": "contents.donors.biological_sex", "sampleId": "contents.samples.biomaterial_id", @@ -248,7 +249,8 @@ def service_config(self) -> ServiceConfig: "project.contributors.institution": "institutions", "project.contributors.laboratory": "laboratory", "project.project_core.project_short_name": "project_short_name", - "project.project_core.project_title": "project_title" + "project.project_core.project_title": "project_title", + "project.estimated_cell_count": "estimated_cell_count" }, "contents.specimens": { "specimen_from_organism.provenance.document_id": "document_id", diff --git a/src/azul/plugins/metadata/hca/aggregate.py b/src/azul/plugins/metadata/hca/aggregate.py index a26d0ccf86..d764e26c1d 100644 --- a/src/azul/plugins/metadata/hca/aggregate.py +++ b/src/azul/plugins/metadata/hca/aggregate.py @@ -24,6 +24,7 @@ FrequencySetAccumulator, GroupingAggregator, ListAccumulator, + MaxAccumulator, SetAccumulator, SetOfDictAccumulator, SimpleAggregator, @@ -164,6 +165,8 @@ def _get_accumulator(self, field) -> Optional[Accumulator]: 'publications', 'accessions'): return None + elif field == 'estimated_cell_count': + return MaxAccumulator() else: return super()._get_accumulator(field) diff --git a/src/azul/plugins/metadata/hca/transform.py b/src/azul/plugins/metadata/hca/transform.py index afded93a12..535356a654 100644 --- a/src/azul/plugins/metadata/hca/transform.py +++ b/src/azul/plugins/metadata/hca/transform.py @@ -602,7 +602,8 @@ def _project_types(cls) -> FieldTypes: 'insdc_study_accessions': [null_str], 'supplementary_links': [null_str], '_type': null_str, - 'accessions': cls._accession_types() + 'accessions': cls._accession_types(), + 'estimated_cell_count': null_int } def _project(self, project: api.Project) -> MutableJSON: @@ -647,7 +648,8 @@ def _project(self, project: api.Project) -> MutableJSON: 'supplementary_links': sorted(project.supplementary_links), '_type': 'project', 'accessions': sorted(map(self._accession, project.accessions), - key=itemgetter('namespace', 'accession')) + key=itemgetter('namespace', 'accession')), + 'estimated_cell_count': project.estimated_cell_count } @classmethod diff --git a/src/azul/service/avro_pfb.py b/src/azul/service/avro_pfb.py index 5cb9230e98..d498f8fdac 100644 --- a/src/azul/service/avro_pfb.py +++ b/src/azul/service/avro_pfb.py @@ -438,6 +438,7 @@ def _entity_schema_recursive(field_types: FieldTypes, # Exceptions are fields that do not become lists during aggregation exceptions = ( 'donor_count', + 'estimated_cell_count', 'submission_date', 'total_estimated_cells', 'update_date', diff --git a/src/azul/service/elasticsearch_service.py b/src/azul/service/elasticsearch_service.py index 714b424fec..fdc41bf067 100644 --- a/src/azul/service/elasticsearch_service.py +++ b/src/azul/service/elasticsearch_service.py @@ -534,8 +534,13 @@ def transform_summary(self, 'terms', field='contents.samples.effective_organ.keyword', size=config.terms_aggregation_size) + elif entity_type == 'projects': + # Add a project cell count aggregate + es_search.aggs.metric('projectEstimatedCellCount', + 'sum', + field='contents.projects.estimated_cell_count_') else: - assert entity_type == 'projects', entity_type + assert False, entity_type cardinality_aggregations = { 'samples': { diff --git a/src/azul/service/hca_response_v5.py b/src/azul/service/hca_response_v5.py index 1f78363fac..7caee58fb4 100644 --- a/src/azul/service/hca_response_v5.py +++ b/src/azul/service/hca_response_v5.py @@ -34,6 +34,7 @@ to_camel_case, ) from azul.types import ( + AnyJSON, JSON, ) @@ -83,8 +84,8 @@ class FileTypeSummary(JsonObject): # https://github.com/DataBiosphere/azul/issues/3180 source = ListProperty() # List could have string(s) and/or None count = IntegerProperty() - totalSize = IntegerProperty() - matrixCellCount = IntegerProperty() + totalSize = FloatProperty() + matrixCellCount = FloatProperty() isIntermediate = BooleanProperty() contentDescription = ListProperty() # List could have string(s) and/or None @@ -92,8 +93,8 @@ class FileTypeSummary(JsonObject): def for_bucket(cls, bucket: JSON) -> 'FileTypeSummary': self = cls() self.count = bucket['doc_count'] - self.totalSize = int(bucket['size_by_type']['value']) # Casting to integer since ES returns a double - self.matrixCellCount = int(bucket['matrix_cell_count_by_type']['value']) + self.totalSize = bucket['size_by_type']['value'] + self.matrixCellCount = bucket['matrix_cell_count_by_type']['value'] self.format = bucket['key'] # FIXME: Remove deprecated field 'fileType' # https://github.com/DataBiosphere/azul/issues/3180 @@ -162,6 +163,7 @@ class SummaryRepresentation(JsonObject): donorCount = IntegerProperty() labCount = IntegerProperty() totalCellCount = FloatProperty() + projectEstimatedCellCount = FloatProperty() organTypes = ListProperty(StringProperty(required=False)) fileTypeSummaries = ListProperty(FileTypeSummary) cellCountSummaries = ListProperty(OrganCellCountSummary) @@ -237,7 +239,7 @@ def __init__(self, aggregations): self.aggregations = aggregations def return_response(self): - def agg_value(*path: str) -> JSON: + def agg_value(*path: str) -> AnyJSON: agg = self.aggregations for name in path: agg = agg[name] @@ -257,6 +259,7 @@ def agg_values(function: Callable[[JSON], T], *path: str) -> List[T]: donorCount=agg_value('donorCount', 'value'), labCount=agg_value('labCount', 'value'), totalCellCount=agg_value('totalCellCount', 'value'), + projectEstimatedCellCount=agg_value('projectEstimatedCellCount', 'value'), organTypes=agg_values(OrganType.for_bucket, 'organTypes', 'buckets'), fileTypeSummaries=agg_values(FileTypeSummary.for_bucket, @@ -332,10 +335,11 @@ def make_projects(self, entry): for project in contents["projects"]: translated_project = { **self._make_entity(project), - "projectId": project['document_id'], - "projectTitle": project.get("project_title"), - "projectShortname": project["project_short_name"], - "laboratory": sorted(set(project.get("laboratory", [None]))) + 'projectId': project['document_id'], + 'projectTitle': project.get('project_title'), + 'projectShortname': project['project_short_name'], + 'laboratory': sorted(set(project.get('laboratory', [None]))), + 'estimatedCellCount': project['estimated_cell_count'], } if self.entity_type in ('projects', 'bundles'): entity = one(entry['contents']['aggregate_dates']) diff --git a/src/azul/service/repository_service.py b/src/azul/service/repository_service.py index 504b0d9719..4b3ca9519b 100644 --- a/src/azul/service/repository_service.py +++ b/src/azul/service/repository_service.py @@ -157,7 +157,8 @@ def get_summary(self, catalog: CatalogName, filters): ], 'projects': [ 'project', - 'labCount' + 'labCount', + 'projectEstimatedCellCount' ], 'cell_suspensions': [ 'totalCellCount', diff --git a/test/indexer/data/aaa96233-bf27-44c7-82df-b4dc15ad4d9d.2018-11-02T113344.698028Z.results.json b/test/indexer/data/aaa96233-bf27-44c7-82df-b4dc15ad4d9d.2018-11-02T113344.698028Z.results.json index 45678878ce..acbc421be3 100644 --- a/test/indexer/data/aaa96233-bf27-44c7-82df-b4dc15ad4d9d.2018-11-02T113344.698028Z.results.json +++ b/test/indexer/data/aaa96233-bf27-44c7-82df-b4dc15ad4d9d.2018-11-02T113344.698028Z.results.json @@ -273,7 +273,9 @@ "submission_date": "2018-11-02T10:02:12.133000Z", "update_date": "2018-11-02T10:07:39.499000Z", "_type": "project", - "accessions": [] + "accessions": [], + "estimated_cell_count": 9223372036854774784, + "estimated_cell_count_": null } ], "matrices": [], @@ -737,7 +739,9 @@ "_type": "project", "submission_date": "2018-11-02T10:02:12.133000Z", "update_date": "2018-11-02T10:07:39.499000Z", - "accessions": [] + "accessions": [], + "estimated_cell_count": 9223372036854774784, + "estimated_cell_count_": null } ] } @@ -993,7 +997,9 @@ "_type": "project", "submission_date": "2018-11-02T10:02:12.133000Z", "update_date": "2018-11-02T10:07:39.499000Z", - "accessions": [] + "accessions": [], + "estimated_cell_count": 9223372036854774784, + "estimated_cell_count_": null } ] } @@ -1276,7 +1282,9 @@ "_type": "project", "submission_date": "2018-11-02T10:02:12.133000Z", "update_date": "2018-11-02T10:07:39.499000Z", - "accessions": [] + "accessions": [], + "estimated_cell_count": 9223372036854774784, + "estimated_cell_count_": null } ] } @@ -1559,7 +1567,9 @@ "submission_date": "2018-11-02T10:02:12.133000Z", "update_date": "2018-11-02T10:07:39.499000Z", "_type": "project", - "accessions" : [] + "accessions" : [], + "estimated_cell_count": 9223372036854774784, + "estimated_cell_count_": null } ], "matrices": [], @@ -1899,7 +1909,9 @@ "update_date": "2018-11-02T10:07:39.499000Z", "_type": [ "project" - ] + ], + "estimated_cell_count": 9223372036854774784, + "estimated_cell_count_": null } ], "matrices": [], @@ -2415,7 +2427,9 @@ "update_date": "2018-11-02T10:07:39.499000Z", "_type": [ "project" - ] + ], + "estimated_cell_count": 9223372036854774784, + "estimated_cell_count_": null } ] }, @@ -2727,7 +2741,9 @@ "project" ], "submission_date": "2018-11-02T10:02:12.133000Z", - "update_date": "2018-11-02T10:07:39.499000Z" + "update_date": "2018-11-02T10:07:39.499000Z", + "estimated_cell_count": 9223372036854774784, + "estimated_cell_count_": null } ] }, @@ -2999,7 +3015,9 @@ "project" ], "submission_date": "2018-11-02T10:02:12.133000Z", - "update_date": "2018-11-02T10:07:39.499000Z" + "update_date": "2018-11-02T10:07:39.499000Z", + "estimated_cell_count": 9223372036854774784, + "estimated_cell_count_": null } ] }, @@ -3332,7 +3350,9 @@ "_type": "project", "submission_date": "2018-11-02T10:02:12.133000Z", "update_date": "2018-11-02T10:07:39.499000Z", - "accessions": [] + "accessions": [], + "estimated_cell_count": 9223372036854774784, + "estimated_cell_count_": null } ], "matrices": [], @@ -3620,7 +3640,9 @@ "_type": "project", "submission_date": "2018-11-02T10:02:12.133000Z", "update_date": "2018-11-02T10:07:39.499000Z", - "accessions": [] + "accessions": [], + "estimated_cell_count": 9223372036854774784, + "estimated_cell_count_": null } ] }, @@ -3913,7 +3935,9 @@ "project" ], "submission_date": "2018-11-02T10:02:12.133000Z", - "update_date": "2018-11-02T10:07:39.499000Z" + "update_date": "2018-11-02T10:07:39.499000Z", + "estimated_cell_count": 9223372036854774784, + "estimated_cell_count_": null } ] }, diff --git a/test/indexer/test_hca_indexer.py b/test/indexer/test_hca_indexer.py index 99039ced2e..848de2fa67 100644 --- a/test/indexer/test_hca_indexer.py +++ b/test/indexer/test_hca_indexer.py @@ -1,3 +1,6 @@ +from bisect import ( + insort, +) from collections import ( Counter, defaultdict, @@ -56,6 +59,9 @@ config, hmac, ) +from azul.collections import ( + NestedDict, +) from azul.deployment import ( aws, ) @@ -1268,6 +1274,47 @@ def test_accessions_fields(self): ] self.assertEqual(expected_accessions, project['accessions']) + def test_cell_counts(self): + """ + Verify the cell counts found in project, cell_suspension, and file entities + """ + # Bundles from the canned staging area, both for project 90bf705c + # https://github.com/HumanCellAtlas/schema-test-data/ + bundle_fqid = self.bundle_fqid(uuid='4da04038-adab-59a9-b6c4-3a61242cc972', + version='2021-01-01T00:00:00.000000Z') + self._index_canned_bundle(bundle_fqid) + bundle_fqid = self.bundle_fqid(uuid='d7b8cbff-aee9-5a05-a4a1-d8f4e720aee7', + version='2021-01-01T00:00:00.000000Z') + self._index_canned_bundle(bundle_fqid) + hits = self._get_all_hits() + + field_paths = [ + ('projects', 'estimated_cell_count'), + ('cell_suspensions', 'total_estimated_cells'), + ('files', 'matrix_cell_count') + ] + actual = NestedDict(2, list) + for hit in sorted(hits, key=lambda d: d['_id']): + entity_type, aggregate = self._parse_index_name(hit) + contents = hit['_source']['contents'] + for inner_entity_type, field_name in field_paths: + for inner_entity in contents[inner_entity_type]: + value = inner_entity[field_name] + insort(actual[aggregate][entity_type][inner_entity_type], value) + + expected = NestedDict(1, dict) + for aggregate in False, True: + for entity_type in self.index_service.entity_types(self.catalog): + is_project_aggregate = aggregate and entity_type == 'projects' + expected[aggregate][entity_type] = { + # estimated_cell_count is aggregated using max, not sum + 'projects': [10000] if is_project_aggregate else [10000, 10000], + 'cell_suspensions': [40000] if is_project_aggregate else [20000, 20000], + 'files': [17100] if is_project_aggregate else [2100, 15000] + } + + self.assertEqual(expected.to_dict(), actual.to_dict()) + def test_no_cell_count_contributions(self): def assert_cell_suspension(expected: JSON, hits: List[JSON]): project_hit = one(hit diff --git a/test/service/data/pfb_manifest.results.json b/test/service/data/pfb_manifest.results.json index 9451d1146b..228a66baf0 100644 --- a/test/service/data/pfb_manifest.results.json +++ b/test/service/data/pfb_manifest.results.json @@ -494,6 +494,7 @@ "document_id": [ "6615efae-fca8-4dd2-a223-9cfcf30fe94d" ], + "estimated_cell_count": "", "geo_series_accessions": [ "" ], @@ -815,6 +816,7 @@ "document_id": [ "e8642221-4c2c-4fd7-b926-a68bce363c88" ], + "estimated_cell_count": "", "geo_series_accessions": [ "" ], @@ -5103,6 +5105,7 @@ "document_id": [ "90bf705c-d891-5ce2-aa54-094488b445c6" ], + "estimated_cell_count": 10000, "geo_series_accessions": [ "" ], diff --git a/test/service/test_manifest.py b/test/service/test_manifest.py index f617423aff..3110ef24a7 100644 --- a/test/service/test_manifest.py +++ b/test/service/test_manifest.py @@ -370,6 +370,8 @@ def test_compact_manifest(self): 'Melanoma infiltration of stromal and immune cells', 'Melanoma infiltration of stromal and immune cells'), + ('project.estimated_cell_count', '', ''), + ('specimen_from_organism.provenance.document_id', '', 'aaaaaaaa-7bab-44ba-a81d-3d8cb3873244 || b4e55fe1-7bab-44ba-a81d-3d8cb3873244'), @@ -649,6 +651,7 @@ def test_terra_bdbag_manifest(self): 'project__contributors__laboratory': '', 'project__project_core__project_short_name': 'integration/Smart-seq2/2018-10-10T02:23:36Z', 'project__project_core__project_title': 'Q4_DEMO-Single cell RNA-seq of primary human glioblastomas', + 'project__estimated_cell_count': '', 'specimen_from_organism__provenance__document_id': 'b5894cf5-ecdc-4ea6-a0b9-5335ab678c7a', 'specimen_from_organism__diseases': 'glioblastoma', 'specimen_from_organism__organ': 'brain', @@ -746,6 +749,7 @@ def test_terra_bdbag_manifest(self): 'project__contributors__laboratory': 'John Dear', 'project__project_core__project_short_name': 'Single of human pancreas', 'project__project_core__project_title': 'Single cell transcriptome patterns.', + 'project__estimated_cell_count': '', 'specimen_from_organism__provenance__document_id': 'a21dc760-a500-4236-bcff-da34a0e873d2', 'specimen_from_organism__diseases': 'normal', 'specimen_from_organism__organ': 'pancreas', @@ -860,6 +864,7 @@ def sort_rows(rows: List[Dict[str, str]]) -> List[List[Tuple[str, str]]]: 'project__contributors__laboratory', 'project__project_core__project_short_name', 'project__project_core__project_title', + 'project__estimated_cell_count', 'specimen_from_organism__provenance__document_id', 'specimen_from_organism__diseases', 'specimen_from_organism__organ', diff --git a/test/service/test_repository_projects.py b/test/service/test_repository_projects.py index c596f9d2a9..eda696d300 100644 --- a/test/service/test_repository_projects.py +++ b/test/service/test_repository_projects.py @@ -88,7 +88,8 @@ def assert_file_type_summaries(hit): 'contributorMatrices', 'submissionDate', 'updateDate', - 'accessions' + 'accessions', + 'estimatedCellCount' } response_json = get_response_json() self.assertIn('hits', response_json) diff --git a/test/service/test_response.py b/test/service/test_response.py index 6c3ba0a247..c40e55bf16 100644 --- a/test/service/test_response.py +++ b/test/service/test_response.py @@ -210,6 +210,7 @@ def test_key_search_files_response(self): "projectTitle": ["Single cell transcriptome patterns."], "submissionDate": "2018-11-02T10:02:12.133000Z", "updateDate": "2018-11-02T10:07:39.499000Z", + "estimatedCellCount": None, } ], "protocols": [ @@ -315,7 +316,7 @@ def test_key_search_samples_response(self): "isIntermediate": None, "source": [None], "fileSource": [None], - "totalSize": 385472253 + "totalSize": 385472253.0 } ], "organoids": [ @@ -328,6 +329,7 @@ def test_key_search_samples_response(self): "projectTitle": ["Single cell transcriptome patterns."], "submissionDate": "2018-11-02T10:02:12.133000Z", "updateDate": "2018-11-02T10:07:39.499000Z", + "estimatedCellCount": None, } ], "protocols": [ @@ -479,6 +481,7 @@ def test_file_search_response(self): "projectTitle": ["Single cell transcriptome patterns."], "submissionDate": "2018-11-02T10:02:12.133000Z", "updateDate": "2018-11-02T10:07:39.499000Z", + "estimatedCellCount": None, } ], "protocols": [ @@ -772,7 +775,7 @@ def test_projects_key_search_response(self): "isIntermediate": None, "source": [None], "fileSource": [None], - "totalSize": 385472253 + "totalSize": 385472253.0 } ], "organoids": [ @@ -856,6 +859,7 @@ def test_projects_key_search_response(self): ], "submissionDate": "2018-11-02T10:02:12.133000Z", "updateDate": "2018-11-02T10:07:39.499000Z", + "estimatedCellCount": None, "matrices": {}, "contributorMatrices": {}, "accessions": [], @@ -970,7 +974,7 @@ def test_projects_file_search_response(self): "isIntermediate": None, "source": [None], "fileSource": [None], - "totalSize": 385472253 + "totalSize": 385472253.0 } ], "organoids": [ @@ -1052,6 +1056,7 @@ def test_projects_file_search_response(self): "supplementaryLinks": [ 'https://www.ebi.ac.uk/gxa/sc/experiments/E-GEOD-81547/Results' ], + "estimatedCellCount": None, "matrices": {}, "contributorMatrices": {}, "submissionDate": "2018-11-02T10:02:12.133000Z", @@ -1206,7 +1211,7 @@ def test_project_accessions_response(self): "isIntermediate": None, "source": ['DCP/2 Analysis'], "fileSource": ['DCP/2 Analysis'], - "totalSize": 2395616 + "totalSize": 2395616.0 }, { "contentDescription": [None], @@ -1329,6 +1334,7 @@ def test_project_accessions_response(self): } ], "supplementaryLinks": [None], + "estimatedCellCount": None, "matrices": {}, "contributorMatrices": {}, "submissionDate": "2019-02-14T18:29:42.531000Z", @@ -2138,7 +2144,7 @@ def test_grouping(self): 'fileType': 'fastq.gz', 'format': 'fastq.gz', 'count': 117, - 'totalSize': 1670420872710, + 'totalSize': 1670420872710.0, 'matrixCellCount': None, 'isIntermediate': None, 'contentDescription': ['DNA sequence'], @@ -2149,7 +2155,7 @@ def test_grouping(self): 'fileType': 'fastq.gz', 'format': 'fastq.gz', 'count': 3, - 'totalSize': 128307505318, + 'totalSize': 128307505318.0, 'matrixCellCount': None, 'isIntermediate': None, 'contentDescription': ['Cellular Genetics'], @@ -2160,7 +2166,7 @@ def test_grouping(self): 'fileType': 'loom', 'format': 'loom', 'count': 40, - 'totalSize': 59207580244, + 'totalSize': 59207580244.0, 'matrixCellCount': None, 'isIntermediate': True, 'contentDescription': ['Count Matrix'], @@ -2171,7 +2177,7 @@ def test_grouping(self): 'fileType': 'loom', 'format': 'loom', 'count': 1, - 'totalSize': 5389602923, + 'totalSize': 5389602923.0, 'matrixCellCount': None, 'isIntermediate': False, 'contentDescription': ['Count Matrix'], @@ -2182,7 +2188,7 @@ def test_grouping(self): 'fileType': 'bam', 'format': 'bam', 'count': 40, - 'totalSize': 1659270110045, + 'totalSize': 1659270110045.0, 'matrixCellCount': None, 'isIntermediate': None, 'contentDescription': [None], @@ -2334,6 +2340,75 @@ def test_inner_entity_samples(self): self.assertEqual(expected_hits, [hit['samples'] for hit in hits]) +@patch_dss_endpoint +@patch_source_cache +class TestSchemaTestDataCannedBundle(WebServiceTestCase): + maxDiff = None + + @classmethod + def bundles(cls) -> List[BundleFQID]: + return [ + # Bundles from the canned staging area, both for project 90bf705c + # https://github.com/HumanCellAtlas/schema-test-data/ + cls.bundle_fqid(uuid='4da04038-adab-59a9-b6c4-3a61242cc972', + version='2021-01-01T00:00:00.000000Z'), + cls.bundle_fqid(uuid='d7b8cbff-aee9-5a05-a4a1-d8f4e720aee7', + version='2021-01-01T00:00:00.000000Z'), + ] + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls._setup_indices() + + @classmethod + def tearDownClass(cls): + cls._teardown_indices() + super().tearDownClass() + + def test_project_cell_count(self): + """ + Verify the project 'estimatedCellCount' value across the various endpoints + """ + expected_cell_counts = { + 'files': [10000, 10000], + 'samples': [10000, 10000], + 'projects': [10000], + 'bundles': [10000, 10000], + } + params = {'catalog': self.catalog} + for entity_type in expected_cell_counts.keys(): + with self.subTest(entity_type=entity_type): + url = self.base_url.set(path=('index', entity_type), args=params) + response = requests.get(url) + response.raise_for_status() + response_json = response.json() + actual_cell_counts = [] + for hit in response_json['hits']: + project = one(hit['projects']) + actual_cell_counts.append(project['estimatedCellCount']) + self.assertEqual(expected_cell_counts[entity_type], + actual_cell_counts) + + def test_summary_cell_counts(self): + url = self.base_url.set(path='/index/summary', + args=dict(catalog=self.catalog)) + response = requests.get(str(url)) + response.raise_for_status() + summary = response.json() + self.assertEqual(summary['projectCount'], 1) + self.assertEqual(summary['fileCount'], 1 + 1) + self.assertEqual(summary['projectEstimatedCellCount'], 10000.0) + self.assertEqual(summary['totalCellCount'], 20000.0 + 20000.0) # cell suspensions + self.assertEqual(summary['cellCountSummaries'], [ + { + 'organType': ['blood'], + 'countOfDocsWithOrganType': 2, + 'totalCellCountByOrgan': 20000.0 + 20000.0 + } + ]) + + @patch_dss_endpoint @patch_source_cache class TestSortAndFilterByCellCount(WebServiceTestCase): @@ -2925,6 +3000,7 @@ def test_summary_response(self): response = requests.get(str(url)) response.raise_for_status() summary_object = response.json() + self.assertEqual(summary_object['projectEstimatedCellCount'], 0.0) self.assertEqual(summary_object['fileCount'], 2 + 19 + 227) self.assertEqual(summary_object['labCount'], 1 + 1 + 1) self.assertEqual(summary_object['donorCount'], 1 + 4 + 1)