From abaab7e1ab741f7353db4da80e107cd3ec51b760 Mon Sep 17 00:00:00 2001 From: Gesina Phillips Date: Tue, 12 Mar 2024 16:42:52 -0400 Subject: [PATCH 01/16] initial work to get dir schema minor versions --- src/ingest_validation_tools/schema_loader.py | 16 ++++++ .../validation_utils.py | 55 +++++++++++-------- 2 files changed, 47 insertions(+), 24 deletions(-) diff --git a/src/ingest_validation_tools/schema_loader.py b/src/ingest_validation_tools/schema_loader.py index 62d6c98ff..715e1cf4d 100644 --- a/src/ingest_validation_tools/schema_loader.py +++ b/src/ingest_validation_tools/schema_loader.py @@ -260,6 +260,22 @@ def get_directory_schema( return schema +def get_possible_directory_schemas(dir_schema) -> Optional[List]: + schemas = [] + # TODO: check formatting of minor versions and tailor better; + # deal with major version only + directory_schema_minor_versions = sorted( + _directory_schemas_path.glob(dir_schema), reverse=True + ) + if not directory_schema_minor_versions: + return None + for directory_schema_path in directory_schema_minor_versions: + schema = load_yaml(directory_schema_path) + schema["files"] += [] + schemas.append(schema) + return schemas + + def _validate_field(field: dict) -> None: if field["name"].endswith("_unit") and "enum" not in field["constraints"]: raise Exception('"_unit" fields must have enum constraints', field) diff --git a/src/ingest_validation_tools/validation_utils.py b/src/ingest_validation_tools/validation_utils.py index 9fc67529d..506ec4b4b 100644 --- a/src/ingest_validation_tools/validation_utils.py +++ b/src/ingest_validation_tools/validation_utils.py @@ -14,7 +14,7 @@ from ingest_validation_tools.schema_loader import ( PreflightError, SchemaVersion, - get_directory_schema, + get_possible_directory_schemas, ) from ingest_validation_tools.table_validator import ReportType @@ -160,34 +160,41 @@ def get_data_dir_errors( """ Validate a single data_path. """ - schema = get_directory_schema(dir_schema=dir_schema) + possible_schemas = get_possible_directory_schemas(dir_schema) - if schema is None: + if possible_schemas is None: return {"Undefined directory schema": dir_schema} - schema_warning_fields = [field for field in schema if field in ["deprecated", "draft"]] - schema_warning = ( - {f"{schema_warning_fields[0].title()} directory schema": dir_schema} - if schema_warning_fields - else None - ) + # Collect errors, discard if schema validates against a minor version + errors = defaultdict(list) - try: - validate_directory(data_path, schema["files"], dataset_ignore_globs=dataset_ignore_globs) - except DirectoryValidationErrors as e: - # If there are DirectoryValidationErrors and the schema is deprecated/draft... - # schema deprecation/draft status is more important. + for schema in possible_schemas: + schema_warning_fields = [field for field in schema if field in ["deprecated", "draft"]] + schema_warning = ( + f"{schema_warning_fields[0].title()} directory schema: {dir_schema}" + if schema_warning_fields + else None + ) + + try: + validate_directory( + data_path, schema["files"], dataset_ignore_globs=dataset_ignore_globs + ) + except DirectoryValidationErrors as e: + # If there are DirectoryValidationErrors and the schema is deprecated/draft... + # schema deprecation/draft status is more important. + errors[f"{data_path} (as {dir_schema})"].extend(e.errors) + if schema_warning: + errors[f"{data_path} (as {dir_schema})"].append(schema_warning) + continue + except OSError as e: + # If there are OSErrors and the schema is deprecated/draft... + # the OSErrors are more important. + errors[f"{data_path} (as {dir_schema})"].append(f"{e.strerror}: {e.filename}") if schema_warning: - return schema_warning - errors = {} - errors[f"{data_path} (as {dir_schema})"] = e.errors - return errors - except OSError as e: - # If there are OSErrors and the schema is deprecated/draft... - # the OSErrors are more important. - return {f"{data_path} (as {dir_schema})": {e.strerror: e.filename}} - if schema_warning: - return schema_warning + errors[f"{data_path} (as {dir_schema})"].append(schema_warning) + if errors: + return dict(errors) # No problems! return None From d66b522b22a76b8a00152b36bca054993c68ae4e Mon Sep 17 00:00:00 2001 From: Gesina Phillips Date: Fri, 15 Mar 2024 15:39:07 -0400 Subject: [PATCH 02/16] ran tests, updated dir schema location logic --- CHANGELOG.md | 1 + .../bad-cedar-dir-histology/README.md | 6 +++--- .../fixtures.json | 2 +- .../fixtures.json | 2 +- .../fixtures.json | 2 +- .../fixtures.json | 2 +- .../fixtures.json | 2 +- .../fixtures.json | 2 +- .../README.md | 6 +++--- .../dataset-examples/bad-codex-data/README.md | 4 ++-- .../bad-scatacseq-data/README.md | 4 ++-- .../bad-tsv-formats/README.md | 4 ++-- .../fixtures.json | 2 +- .../expected-failure/fixtures.json | 2 +- src/ingest_validation_tools/schema_loader.py | 2 +- .../validation_utils.py | 2 +- tests-manual/update_test_data.py | 19 +++++++++++++++++-- tests/test_dataset_examples.py | 12 ++++++++++++ 18 files changed, 52 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 457b77246..7515fcff6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ - Update Lightsheet directory schema - Update Histology to include description on OME-TIFFs - Update Histology with links +- Accommodate dir schema minor versions ## v0.0.17 diff --git a/examples/dataset-examples/bad-cedar-dir-histology/README.md b/examples/dataset-examples/bad-cedar-dir-histology/README.md index 1dd13c01d..4baa2f8b9 100644 --- a/examples/dataset-examples/bad-cedar-dir-histology/README.md +++ b/examples/dataset-examples/bad-cedar-dir-histology/README.md @@ -4,10 +4,10 @@ Upload Errors: ? examples/dataset-examples/bad-cedar-dir-histology/upload/bad-histology-metadata.tsv, column 'data_path', value './dataset-1' : examples/dataset-examples/bad-cedar-dir-histology/upload/dataset-1 (as histology-v2): - No such file or directory: examples/dataset-examples/bad-cedar-dir-histology/upload/dataset-1 + - 'No such file or directory: examples/dataset-examples/bad-cedar-dir-histology/upload/dataset-1' examples/dataset-examples/bad-cedar-dir-histology/upload/bad-histology-metadata.tsv, column 'data_path', value './wrong': examples/dataset-examples/bad-cedar-dir-histology/upload/wrong (as histology-v2): - Not allowed: + - Not allowed: - not-allowed. Required but missing: - extras\/.*. @@ -26,4 +26,4 @@ Reference Errors: Hint: 'If validation fails because of extra whitespace in the TSV, try: src/cleanup_whitespace.py --tsv_in original.tsv --tsv_out clean.tsv.' -``` +``` \ No newline at end of file diff --git a/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-child-metadata/fixtures.json b/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-child-metadata/fixtures.json index f7e3a7e21..8fcbaaad5 100644 --- a/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-child-metadata/fixtures.json +++ b/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-child-metadata/fixtures.json @@ -1 +1 @@ -{"assaytype": {"RNAseq": {"assaytype": "scRNAseq-10Genomics-v3", "contains-pii": true, "dataset-type": "RNAseq", "description": "scRNA-seq (10x Genomics v3)", "dir-schema": "rnaseq-v2", "primary": true, "vitessce-hints": []}, "Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (No probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}}, "validation": {"scRNAseq-10Genomics-v3": {"examples/dataset-examples/bad-cedar-multi-assay-visium-bad-child-metadata/upload/bad-visium-rnaseq-metadata.tsv": [{"URL Errors": ["Row 3, field 'parent_sample_id' with value '': 404 Client Error: Not Found for url: https://entity.api.hubmapconsortium.org/entities/"], "Validation Errors": ["On row 1, column \"parent_sample_id\", value \"\" fails because of error \"missingRequired\"", "On row 2, column \"preparation_protocol_doi\", value \"wrong\" fails because of error \"invalidUrl\""]}]}, "contributors": null, "visium-no-probes": null, "h-and-e": null}} \ No newline at end of file +{"assaytype": {"RNAseq": {"assaytype": "scRNAseq-10xGenomics-v3", "contains-pii": true, "dataset-type": "RNAseq", "description": "scRNA-seq (10x Genomics v3)", "dir-schema": "rnaseq-v2", "primary": true, "vitessce-hints": []}, "Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (no probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}}, "validation": {"scRNAseq-10xGenomics-v3": {"examples/dataset-examples/bad-cedar-multi-assay-visium-bad-child-metadata/upload/bad-visium-rnaseq-metadata.tsv": [{"URL Errors": ["Row 3, field 'parent_sample_id' with value '': 404 Client Error: Not Found for url: https://entity.api.hubmapconsortium.org/entities/"], "Validation Errors": ["On row 1, column \"parent_sample_id\", value \"\" fails because of error \"missingRequired\"", "On row 2, column \"preparation_protocol_doi\", value \"wrong\" fails because of error \"invalidUrl\""]}]}, "contributors": null, "visium-no-probes": null, "h-and-e": null}} \ No newline at end of file diff --git a/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/fixtures.json b/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/fixtures.json index c56029912..4c99b98f8 100644 --- a/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/fixtures.json +++ b/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/fixtures.json @@ -1 +1 @@ -{"assaytype": {"Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (No probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}, "RNAseq": {"assaytype": "scRNAseq-10Genomics-v3", "contains-pii": true, "dataset-type": "RNAseq", "description": "scRNA-seq (10x Genomics v3)", "dir-schema": "rnaseq-v2", "primary": true, "vitessce-hints": []}}, "validation": {"visium-no-probes": null, "contributors": null, "h-and-e": null, "scRNAseq-10Genomics-v3": null}} \ No newline at end of file +{"assaytype": {"Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (no probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}, "RNAseq": {"assaytype": "scRNAseq-10xGenomics-v3", "contains-pii": true, "dataset-type": "RNAseq", "description": "scRNA-seq (10x Genomics v3)", "dir-schema": "rnaseq-v2", "primary": true, "vitessce-hints": []}}, "validation": {"visium-no-probes": null, "contributors": null, "h-and-e": null, "scRNAseq-10xGenomics-v3": null}} \ No newline at end of file diff --git a/examples/dataset-examples/bad-cedar-multi-assay-visium-missing-child/fixtures.json b/examples/dataset-examples/bad-cedar-multi-assay-visium-missing-child/fixtures.json index 78f6d8876..be91292c0 100644 --- a/examples/dataset-examples/bad-cedar-multi-assay-visium-missing-child/fixtures.json +++ b/examples/dataset-examples/bad-cedar-multi-assay-visium-missing-child/fixtures.json @@ -1 +1 @@ -{"assaytype": {"Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (No probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}}, "validation": {}} \ No newline at end of file +{"assaytype": {"Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (no probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}}, "validation": {}} \ No newline at end of file diff --git a/examples/dataset-examples/bad-cedar-multi-assay-visium-unreferenced-parent-path/fixtures.json b/examples/dataset-examples/bad-cedar-multi-assay-visium-unreferenced-parent-path/fixtures.json index 788b23491..b22ffa40f 100644 --- a/examples/dataset-examples/bad-cedar-multi-assay-visium-unreferenced-parent-path/fixtures.json +++ b/examples/dataset-examples/bad-cedar-multi-assay-visium-unreferenced-parent-path/fixtures.json @@ -1 +1 @@ -{"assaytype": {"Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (No probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}, "RNAseq": {"assaytype": "scRNAseq-10Genomics-v3", "contains-pii": true, "dataset-type": "RNAseq", "description": "scRNA-seq (10x Genomics v3)", "dir-schema": "rnaseq-v2", "primary": true, "vitessce-hints": []}}, "validation": {}} \ No newline at end of file +{"assaytype": {"Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (no probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}, "RNAseq": {"assaytype": "scRNAseq-10xGenomics-v3", "contains-pii": true, "dataset-type": "RNAseq", "description": "scRNA-seq (10x Genomics v3)", "dir-schema": "rnaseq-v2", "primary": true, "vitessce-hints": []}}, "validation": {}} \ No newline at end of file diff --git a/examples/dataset-examples/bad-cedar-multi-assay-visium-with-standalone-histology/fixtures.json b/examples/dataset-examples/bad-cedar-multi-assay-visium-with-standalone-histology/fixtures.json index 788b23491..b22ffa40f 100644 --- a/examples/dataset-examples/bad-cedar-multi-assay-visium-with-standalone-histology/fixtures.json +++ b/examples/dataset-examples/bad-cedar-multi-assay-visium-with-standalone-histology/fixtures.json @@ -1 +1 @@ -{"assaytype": {"Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (No probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}, "RNAseq": {"assaytype": "scRNAseq-10Genomics-v3", "contains-pii": true, "dataset-type": "RNAseq", "description": "scRNA-seq (10x Genomics v3)", "dir-schema": "rnaseq-v2", "primary": true, "vitessce-hints": []}}, "validation": {}} \ No newline at end of file +{"assaytype": {"Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (no probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}, "RNAseq": {"assaytype": "scRNAseq-10xGenomics-v3", "contains-pii": true, "dataset-type": "RNAseq", "description": "scRNA-seq (10x Genomics v3)", "dir-schema": "rnaseq-v2", "primary": true, "vitessce-hints": []}}, "validation": {}} \ No newline at end of file diff --git a/examples/dataset-examples/bad-cedar-multi-assay-visium-wrong-child/fixtures.json b/examples/dataset-examples/bad-cedar-multi-assay-visium-wrong-child/fixtures.json index d3b154fe8..47773ae03 100644 --- a/examples/dataset-examples/bad-cedar-multi-assay-visium-wrong-child/fixtures.json +++ b/examples/dataset-examples/bad-cedar-multi-assay-visium-wrong-child/fixtures.json @@ -1 +1 @@ -{"assaytype": {"RNAseq (with probes)": {"assaytype": "scRNAseq-visium-with-probes", "contains-pii": true, "dataset-type": "RNAseq (with probes)", "description": "Visium RNAseq with probes", "dir-schema": "rnaseq-with-probes-v2", "primary": true, "vitessce-hints": []}, "Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (No probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}}, "validation": {}} \ No newline at end of file +{"assaytype": {"RNAseq (with probes)": {"assaytype": "scRNAseq-visium-with-probes", "contains-pii": true, "dataset-type": "RNAseq (with probes)", "description": "Visium RNAseq with probes", "dir-schema": "rnaseq-with-probes-v2", "primary": true, "vitessce-hints": []}, "Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (no probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}}, "validation": {}} \ No newline at end of file diff --git a/examples/dataset-examples/bad-codex-akoya-directory-v1-missing-dataset.json/README.md b/examples/dataset-examples/bad-codex-akoya-directory-v1-missing-dataset.json/README.md index 956d83566..b49faaddc 100644 --- a/examples/dataset-examples/bad-codex-akoya-directory-v1-missing-dataset.json/README.md +++ b/examples/dataset-examples/bad-codex-akoya-directory-v1-missing-dataset.json/README.md @@ -5,9 +5,9 @@ Upload Errors: column 'data_path', value 'dataset-1' : ? examples/dataset-examples/bad-codex-akoya-directory-v1-missing-dataset.json/upload/dataset-1 (as codex-v1-with-dataset-json) - : Required but missing: - - (raw|src_[^/]*)/dataset\.json. + : - Required but missing: + - (raw|src_[^/]*)/dataset\.json. Hint: 'If validation fails because of extra whitespace in the TSV, try: src/cleanup_whitespace.py --tsv_in original.tsv --tsv_out clean.tsv.' -``` +``` \ No newline at end of file diff --git a/examples/dataset-examples/bad-codex-data/README.md b/examples/dataset-examples/bad-codex-data/README.md index 64d953339..e00df1c20 100644 --- a/examples/dataset-examples/bad-codex-data/README.md +++ b/examples/dataset-examples/bad-codex-data/README.md @@ -22,7 +22,7 @@ Upload Errors: Directory Errors: examples/dataset-examples/bad-codex-data/upload/codex-metadata.tsv, column 'data_path', value 'dataset-1': examples/dataset-examples/bad-codex-data/upload/dataset-1 (as codex-v1-with-dataset-json): - Not allowed: + - Not allowed: - channelnames.txt. - cyc002_reg001_200216_112537/bad. - experiment.json. @@ -48,4 +48,4 @@ Metadata TSV Validation Errors: Hint: 'If validation fails because of extra whitespace in the TSV, try: src/cleanup_whitespace.py --tsv_in original.tsv --tsv_out clean.tsv.' -``` +``` \ No newline at end of file diff --git a/examples/dataset-examples/bad-scatacseq-data/README.md b/examples/dataset-examples/bad-scatacseq-data/README.md index a1056d7a2..f95ad1842 100644 --- a/examples/dataset-examples/bad-scatacseq-data/README.md +++ b/examples/dataset-examples/bad-scatacseq-data/README.md @@ -6,7 +6,7 @@ Upload Errors: Directory Errors: examples/dataset-examples/bad-scatacseq-data/upload/scatacseq-metadata.tsv, column 'data_path', value 'dataset-1': examples/dataset-examples/bad-scatacseq-data/upload/dataset-1 (as scatacseq-v0): - Not allowed: + - Not allowed: - not-the-file-you-are-looking-for.txt. - unexpected-directory/place-holder.txt. Required but missing: @@ -23,4 +23,4 @@ Metadata TSV Validation Errors: Hint: 'If validation fails because of extra whitespace in the TSV, try: src/cleanup_whitespace.py --tsv_in original.tsv --tsv_out clean.tsv.' -``` +``` \ No newline at end of file diff --git a/examples/dataset-examples/bad-tsv-formats/README.md b/examples/dataset-examples/bad-tsv-formats/README.md index 2a6c9b10b..7853b3307 100644 --- a/examples/dataset-examples/bad-tsv-formats/README.md +++ b/examples/dataset-examples/bad-tsv-formats/README.md @@ -8,7 +8,7 @@ Upload Errors: Directory Errors: examples/dataset-examples/bad-tsv-formats/upload/codex-metadata.tsv, column 'data_path', value 'dataset-1/': examples/dataset-examples/bad-tsv-formats/upload/dataset-1 (as codex-v1-with-dataset-json): - Not allowed: + - Not allowed: - channelnames.txt. - cyc002_reg001_200216_112537/1_00001_Z001_CH1.tif. - experiment.json. @@ -75,4 +75,4 @@ Metadata TSV Validation Errors: Hint: 'If validation fails because of extra whitespace in the TSV, try: src/cleanup_whitespace.py --tsv_in original.tsv --tsv_out clean.tsv.' -``` +``` \ No newline at end of file diff --git a/examples/dataset-examples/good-cedar-multi-assay-visium/fixtures.json b/examples/dataset-examples/good-cedar-multi-assay-visium/fixtures.json index c56029912..4c99b98f8 100644 --- a/examples/dataset-examples/good-cedar-multi-assay-visium/fixtures.json +++ b/examples/dataset-examples/good-cedar-multi-assay-visium/fixtures.json @@ -1 +1 @@ -{"assaytype": {"Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (No probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}, "RNAseq": {"assaytype": "scRNAseq-10Genomics-v3", "contains-pii": true, "dataset-type": "RNAseq", "description": "scRNA-seq (10x Genomics v3)", "dir-schema": "rnaseq-v2", "primary": true, "vitessce-hints": []}}, "validation": {"visium-no-probes": null, "contributors": null, "h-and-e": null, "scRNAseq-10Genomics-v3": null}} \ No newline at end of file +{"assaytype": {"Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (no probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}, "RNAseq": {"assaytype": "scRNAseq-10xGenomics-v3", "contains-pii": true, "dataset-type": "RNAseq", "description": "scRNA-seq (10x Genomics v3)", "dir-schema": "rnaseq-v2", "primary": true, "vitessce-hints": []}}, "validation": {"visium-no-probes": null, "contributors": null, "h-and-e": null, "scRNAseq-10xGenomics-v3": null}} \ No newline at end of file diff --git a/examples/plugin-tests/expected-failure/fixtures.json b/examples/plugin-tests/expected-failure/fixtures.json index c56029912..4c99b98f8 100644 --- a/examples/plugin-tests/expected-failure/fixtures.json +++ b/examples/plugin-tests/expected-failure/fixtures.json @@ -1 +1 @@ -{"assaytype": {"Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (No probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}, "RNAseq": {"assaytype": "scRNAseq-10Genomics-v3", "contains-pii": true, "dataset-type": "RNAseq", "description": "scRNA-seq (10x Genomics v3)", "dir-schema": "rnaseq-v2", "primary": true, "vitessce-hints": []}}, "validation": {"visium-no-probes": null, "contributors": null, "h-and-e": null, "scRNAseq-10Genomics-v3": null}} \ No newline at end of file +{"assaytype": {"Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (no probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}, "RNAseq": {"assaytype": "scRNAseq-10xGenomics-v3", "contains-pii": true, "dataset-type": "RNAseq", "description": "scRNA-seq (10x Genomics v3)", "dir-schema": "rnaseq-v2", "primary": true, "vitessce-hints": []}}, "validation": {"visium-no-probes": null, "contributors": null, "h-and-e": null, "scRNAseq-10xGenomics-v3": null}} \ No newline at end of file diff --git a/src/ingest_validation_tools/schema_loader.py b/src/ingest_validation_tools/schema_loader.py index 715e1cf4d..360d79189 100644 --- a/src/ingest_validation_tools/schema_loader.py +++ b/src/ingest_validation_tools/schema_loader.py @@ -265,7 +265,7 @@ def get_possible_directory_schemas(dir_schema) -> Optional[List]: # TODO: check formatting of minor versions and tailor better; # deal with major version only directory_schema_minor_versions = sorted( - _directory_schemas_path.glob(dir_schema), reverse=True + _directory_schemas_path.glob(f"{dir_schema}*.yaml"), reverse=True ) if not directory_schema_minor_versions: return None diff --git a/src/ingest_validation_tools/validation_utils.py b/src/ingest_validation_tools/validation_utils.py index 506ec4b4b..0a2d31261 100644 --- a/src/ingest_validation_tools/validation_utils.py +++ b/src/ingest_validation_tools/validation_utils.py @@ -183,7 +183,7 @@ def get_data_dir_errors( except DirectoryValidationErrors as e: # If there are DirectoryValidationErrors and the schema is deprecated/draft... # schema deprecation/draft status is more important. - errors[f"{data_path} (as {dir_schema})"].extend(e.errors) + errors[f"{data_path} (as {dir_schema})"].append(e.errors) if schema_warning: errors[f"{data_path} (as {dir_schema})"].append(schema_warning) continue diff --git a/tests-manual/update_test_data.py b/tests-manual/update_test_data.py index 6ab560c54..c65a14d22 100644 --- a/tests-manual/update_test_data.py +++ b/tests-manual/update_test_data.py @@ -34,6 +34,7 @@ def __init__( opts: Dict = {}, verbose: bool = False, dry_run: bool = True, + full_diff: bool = False, ): self.dir = dir self.globus_token = globus_token @@ -42,6 +43,7 @@ def __init__( self.verbose = verbose self.upload_verbose = True if "plugin-tests" in dir else False self.dry_run = dry_run + self.full_diff = full_diff def log(self, verbose_message, short_message: Optional[str] = None): if self.verbose: @@ -92,7 +94,13 @@ def update_test_data(self) -> Dict[str, List]: if "README" not in self.exclude: readme = self.open_or_create_readme() try: - diff_test(self.dir, readme, clean_report(report), verbose=self.verbose) + diff_test( + self.dir, + readme, + clean_report(report), + verbose=self.verbose, + full_diff=self.full_diff, + ) readme.close() print(f"No diff found, skipping {self.dir}/README.md") except MockException: @@ -109,6 +117,7 @@ def update_test_data(self) -> Dict[str, List]: ) self.change_report[self.dir].append("README diff found") else: + breakpoint() self.log( f""" Writing the following report to {self.dir}/README.md: @@ -117,7 +126,6 @@ def update_test_data(self) -> Dict[str, List]: f"Updating {self.dir}/README.md.", ) with open(f"{self.dir}/README.md", "w") as f: - # TODO: potential issues with blank lines at end of report f.write(clean_report(report)) dataset_test(self.dir, self.opts) else: @@ -227,6 +235,7 @@ def call_update(dir: str, args) -> Dict: dry_run=args.dry_run, verbose=args.verbose, exclude=args.exclude, + full_diff=args.full_diff, ).update_test_data() return change_report @@ -277,6 +286,12 @@ def call_update(dir: str, args) -> Dict: action="store_true", help="Default is False. Used for investigating testing failures with more verbose output. Requires passing a test_dir. Pass a blank Globus token as this runs offline.", ) +parser.add_argument( + "-f", + "--full_diff", + action="store_true", + help="Default is False. Show full and cleaned README diff.", +) args = parser.parse_args() # tsv-examples not currently integrated, could be if needed. diff --git a/tests/test_dataset_examples.py b/tests/test_dataset_examples.py index 9cd201091..137202796 100644 --- a/tests/test_dataset_examples.py +++ b/tests/test_dataset_examples.py @@ -67,7 +67,9 @@ def diff_test( readme: TextIOWrapper, report: str, verbose: bool = True, + full_diff: bool = False, ): + breakpoint() d = difflib.Differ() diff = list(d.compare(readme.readlines(), report.splitlines(keepends=True))) readme.close() @@ -77,6 +79,16 @@ def diff_test( ] new = "".join([line.strip() for line in cleaned_diff if line.startswith("+ ")]) removed = "".join([line.strip() for line in cleaned_diff if line.startswith("- ")]) + if full_diff: + print( + f""" + FULL: + {diff} + + CLEANED: + {cleaned_diff} + """ + ) if verbose: msg = f""" DIFF ADDED LINES: From 2f783477775906c5ed2cd50cf062de678c991ef4 Mon Sep 17 00:00:00 2001 From: Gesina Phillips Date: Mon, 18 Mar 2024 16:17:54 -0400 Subject: [PATCH 03/16] small updates to formatting, bugfixes --- .../README.md | 4 ++-- .../dataset-examples/bad-missing-data/README.md | 4 ++-- .../dataset-examples/bad-scrnaseq-v0/README.md | 4 ++-- .../good-cedar-multi-assay-visium/README.md | 4 ++-- .../dataset-iec-examples/bad-example/README.md | 4 ++-- tests-manual/update_test_data.py | 14 +++++++------- tests/test_dataset_examples.py | 3 +-- 7 files changed, 18 insertions(+), 19 deletions(-) diff --git a/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/README.md b/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/README.md index 6aca417ff..9eea1215f 100644 --- a/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/README.md +++ b/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/README.md @@ -4,7 +4,7 @@ Upload Errors: ? examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/upload/good-visium-assay-metadata.tsv, column 'data_path', value './Visium_9OLC_A4_S1' : examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/upload/Visium_9OLC_A4_S1 (as visium-no-probes-v2): - Required but missing: + - Required but missing: - lab_processed\/.*. - lab_processed\/images\/.*. - lab_processed\/images\/[^\/]*ome-tiff\.channels\.csv. @@ -12,4 +12,4 @@ Upload Errors: Hint: 'If validation fails because of extra whitespace in the TSV, try: src/cleanup_whitespace.py --tsv_in original.tsv --tsv_out clean.tsv.' -``` +``` \ No newline at end of file diff --git a/examples/dataset-examples/bad-missing-data/README.md b/examples/dataset-examples/bad-missing-data/README.md index 13052dba9..6b747cf3b 100644 --- a/examples/dataset-examples/bad-missing-data/README.md +++ b/examples/dataset-examples/bad-missing-data/README.md @@ -10,7 +10,7 @@ Upload Errors: Directory Errors: examples/dataset-examples/bad-missing-data/upload/codex-metadata.tsv, column 'data_path', value 'dataset-1': examples/dataset-examples/bad-missing-data/upload/dataset-1 (as codex-v1-with-dataset-json): - No such file or directory: examples/dataset-examples/bad-missing-data/upload/dataset-1 + - 'No such file or directory: examples/dataset-examples/bad-missing-data/upload/dataset-1' Metadata TSV Validation Errors: Local Validation Errors: examples/dataset-examples/bad-missing-data/upload/codex-metadata.tsv (as codex-v0): @@ -22,4 +22,4 @@ Metadata TSV Validation Errors: Hint: 'If validation fails because of extra whitespace in the TSV, try: src/cleanup_whitespace.py --tsv_in original.tsv --tsv_out clean.tsv.' -``` +``` \ No newline at end of file diff --git a/examples/dataset-examples/bad-scrnaseq-v0/README.md b/examples/dataset-examples/bad-scrnaseq-v0/README.md index 16a976870..6db64a925 100644 --- a/examples/dataset-examples/bad-scrnaseq-v0/README.md +++ b/examples/dataset-examples/bad-scrnaseq-v0/README.md @@ -3,7 +3,7 @@ Upload Errors: Directory Errors: examples/dataset-examples/bad-scrnaseq-v0/upload/metadata.tsv, column 'data_path', value 'data': examples/dataset-examples/bad-scrnaseq-v0/upload/data (as scrnaseq-v0): - No such file or directory: examples/dataset-examples/bad-scrnaseq-v0/upload/data. + - 'No such file or directory: examples/dataset-examples/bad-scrnaseq-v0/upload/data.' Metadata TSV Validation Errors: Local Validation Errors: examples/dataset-examples/bad-scrnaseq-v0/upload/metadata.tsv (as scrnaseq-v0): @@ -16,4 +16,4 @@ Metadata TSV Validation Errors: Hint: 'If validation fails because of extra whitespace in the TSV, try: src/cleanup_whitespace.py --tsv_in original.tsv --tsv_out clean.tsv.' -``` +``` \ No newline at end of file diff --git a/examples/dataset-examples/good-cedar-multi-assay-visium/README.md b/examples/dataset-examples/good-cedar-multi-assay-visium/README.md index 99e56e681..56dc0e751 100644 --- a/examples/dataset-examples/good-cedar-multi-assay-visium/README.md +++ b/examples/dataset-examples/good-cedar-multi-assay-visium/README.md @@ -13,8 +13,8 @@ TSVs: Metadata schema version: '2' Directory schema versions: histology-v2 good-visium-rnaseq-metadata.tsv: - Schema: scRNAseq-10Genomics-v3-v2 + Schema: scRNAseq-10xGenomics-v3-v2 Metadata schema version: '2' Directory schema versions: rnaseq-v2 -``` +``` \ No newline at end of file diff --git a/examples/dataset-iec-examples/bad-example/README.md b/examples/dataset-iec-examples/bad-example/README.md index 08dfb9a0f..cfe76d94e 100644 --- a/examples/dataset-iec-examples/bad-example/README.md +++ b/examples/dataset-iec-examples/bad-example/README.md @@ -6,7 +6,7 @@ Upload Errors: Directory Errors: examples/dataset-iec-examples/bad-example/upload/metadata.tsv, column 'data_path', value '.': examples/dataset-iec-examples/bad-example/upload (as scatacseq-v0): - Not allowed: + - Not allowed: - should-not-be-here.txt. Metadata TSV Validation Errors: Local Validation Errors: @@ -22,4 +22,4 @@ Metadata TSV Validation Errors: Hint: 'If validation fails because of extra whitespace in the TSV, try: src/cleanup_whitespace.py --tsv_in original.tsv --tsv_out clean.tsv.' -``` +``` \ No newline at end of file diff --git a/tests-manual/update_test_data.py b/tests-manual/update_test_data.py index c65a14d22..4eba0e345 100644 --- a/tests-manual/update_test_data.py +++ b/tests-manual/update_test_data.py @@ -63,7 +63,7 @@ def update_test_data(self) -> Dict[str, List]: info = self.upload.get_info() errors = self.upload.get_errors() report = ErrorReport(info=info, errors=errors) - if "Too many requests" in report.as_md(): + if "Too Many Requests" in report.as_md(): raise Exception( f"Something went wrong with Spreadsheet Validator request for {self.dir}." ) @@ -91,13 +91,14 @@ def update_test_data(self) -> Dict[str, List]: json.dump(new_data, f) else: print(f"{self.dir}/fixtures.json excluded, not changed.") + cleaned_report = clean_report(report) if "README" not in self.exclude: readme = self.open_or_create_readme() try: diff_test( self.dir, readme, - clean_report(report), + cleaned_report, verbose=self.verbose, full_diff=self.full_diff, ) @@ -111,23 +112,22 @@ def update_test_data(self) -> Dict[str, List]: self.log( f""" Would have written the following report to {self.dir}/README.md: - {clean_report(report)} + {cleaned_report} """, f"Would have updated {self.dir}/README.md.", ) self.change_report[self.dir].append("README diff found") else: - breakpoint() self.log( f""" Writing the following report to {self.dir}/README.md: - {clean_report(report)} + {cleaned_report} """, f"Updating {self.dir}/README.md.", ) with open(f"{self.dir}/README.md", "w") as f: - f.write(clean_report(report)) - dataset_test(self.dir, self.opts) + f.write(cleaned_report) + dataset_test(self.dir, self.opts | {"globus_token": self.globus_token}) else: print(f"{self.dir}/README.md excluded, not changed.") return self.change_report diff --git a/tests/test_dataset_examples.py b/tests/test_dataset_examples.py index 137202796..55f5f090b 100644 --- a/tests/test_dataset_examples.py +++ b/tests/test_dataset_examples.py @@ -52,7 +52,7 @@ def dataset_test(test_dir: str, dataset_opts: Dict, verbose: bool = False): def clean_report(report: ErrorReport): clean_report = [] regex = re.compile(r"((Time|Git version): )(.*)") - for line in report.as_md(): + for line in report.as_md().splitlines(keepends=True): match = regex.search(line) if match: new_line = line.replace(match.group(3), "WILL_CHANGE") @@ -69,7 +69,6 @@ def diff_test( verbose: bool = True, full_diff: bool = False, ): - breakpoint() d = difflib.Differ() diff = list(d.compare(readme.readlines(), report.splitlines(keepends=True))) readme.close() From b4cde99924b9c6b918557b46c7255a2b5051e194 Mon Sep 17 00:00:00 2001 From: Gesina Phillips Date: Mon, 18 Mar 2024 16:31:22 -0400 Subject: [PATCH 04/16] clarifying comment --- src/ingest_validation_tools/schema_loader.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/ingest_validation_tools/schema_loader.py b/src/ingest_validation_tools/schema_loader.py index 360d79189..4c7d634c1 100644 --- a/src/ingest_validation_tools/schema_loader.py +++ b/src/ingest_validation_tools/schema_loader.py @@ -260,10 +260,11 @@ def get_directory_schema( return schema -def get_possible_directory_schemas(dir_schema) -> Optional[List]: +def get_possible_directory_schemas(dir_schema: str) -> Optional[List]: schemas = [] # TODO: check formatting of minor versions and tailor better; - # deal with major version only + # deal with whole number versions (e.g. if there is a v2, v2-1, and v2-2, + # is v2 properly v2.0 or is it the most current?) directory_schema_minor_versions = sorted( _directory_schemas_path.glob(f"{dir_schema}*.yaml"), reverse=True ) From 3218525651c88c26b7d1060988b8ff40c4e9a612 Mon Sep 17 00:00:00 2001 From: Gesina Phillips Date: Thu, 21 Mar 2024 14:44:29 -0400 Subject: [PATCH 05/16] save minor dir schema and report via get_info --- src/ingest_validation_tools/schema_loader.py | 10 ++--- src/ingest_validation_tools/upload.py | 9 ++-- .../validation_utils.py | 29 +++++++------ tests-manual/update_test_data.py | 2 +- tests/test_dataset_examples.py | 43 ++++++++++++++++++- 5 files changed, 70 insertions(+), 23 deletions(-) diff --git a/src/ingest_validation_tools/schema_loader.py b/src/ingest_validation_tools/schema_loader.py index 4c7d634c1..44baba9da 100644 --- a/src/ingest_validation_tools/schema_loader.py +++ b/src/ingest_validation_tools/schema_loader.py @@ -260,11 +260,9 @@ def get_directory_schema( return schema -def get_possible_directory_schemas(dir_schema: str) -> Optional[List]: - schemas = [] - # TODO: check formatting of minor versions and tailor better; - # deal with whole number versions (e.g. if there is a v2, v2-1, and v2-2, - # is v2 properly v2.0 or is it the most current?) +def get_possible_directory_schemas(dir_schema: str) -> Optional[Dict]: + schemas = {} + # this assumes that versions are numbered starting at x.0, no whole numbers directory_schema_minor_versions = sorted( _directory_schemas_path.glob(f"{dir_schema}*.yaml"), reverse=True ) @@ -273,7 +271,7 @@ def get_possible_directory_schemas(dir_schema: str) -> Optional[List]: for directory_schema_path in directory_schema_minor_versions: schema = load_yaml(directory_schema_path) schema["files"] += [] - schemas.append(schema) + schemas[Path(directory_schema_path).stem] = schema return schemas diff --git a/src/ingest_validation_tools/upload.py b/src/ingest_validation_tools/upload.py index d424de8ac..a63ee6081 100644 --- a/src/ingest_validation_tools/upload.py +++ b/src/ingest_validation_tools/upload.py @@ -130,12 +130,14 @@ def get_info(self) -> dict: stderr=subprocess.STDOUT, ).strip() + # If called before get_errors, will report dir schema major version only + try: effective_tsvs = { Path(path).name: { "Schema": sv.table_schema, "Metadata schema version": sv.version, - "Directory schema versions": sv.dir_schema, + "Directory schema version": sv.dir_schema, } for path, sv in self.effective_tsv_paths.items() } @@ -649,9 +651,10 @@ def _check_data_path( schema_version.dir_schema, data_path, dataset_ignore_globs=self.dataset_ignore_globs, - ) - if ref_errors: + ).popitem() + if type(ref_errors[0]) is list: errors[f"{str(metadata_path)}, column 'data_path', value '{path_value}'"] = ref_errors + schema_version.dir_schema = ref_errors[0] return errors def _check_other_path(self, metadata_path: Path, other_path_value: str, path_type: str): diff --git a/src/ingest_validation_tools/validation_utils.py b/src/ingest_validation_tools/validation_utils.py index 0a2d31261..263252984 100644 --- a/src/ingest_validation_tools/validation_utils.py +++ b/src/ingest_validation_tools/validation_utils.py @@ -156,7 +156,7 @@ def get_data_dir_errors( dir_schema: str, data_path: Path, dataset_ignore_globs: List[str] = [], -) -> Optional[dict]: +) -> Dict[str, Union[List[str], str]]: """ Validate a single data_path. """ @@ -166,12 +166,13 @@ def get_data_dir_errors( return {"Undefined directory schema": dir_schema} # Collect errors, discard if schema validates against a minor version - errors = defaultdict(list) + errors = [] - for schema in possible_schemas: + for schema_name, schema in possible_schemas.items(): + schema_errors = defaultdict(list) schema_warning_fields = [field for field in schema if field in ["deprecated", "draft"]] schema_warning = ( - f"{schema_warning_fields[0].title()} directory schema: {dir_schema}" + f"{schema_warning_fields[0].title()} directory schema: {schema_name}" if schema_warning_fields else None ) @@ -183,21 +184,25 @@ def get_data_dir_errors( except DirectoryValidationErrors as e: # If there are DirectoryValidationErrors and the schema is deprecated/draft... # schema deprecation/draft status is more important. - errors[f"{data_path} (as {dir_schema})"].append(e.errors) + schema_errors[f"{data_path} (as {schema_name})"].append(e.errors) if schema_warning: - errors[f"{data_path} (as {dir_schema})"].append(schema_warning) + schema_errors[f"{data_path} (as {schema_name})"].append(schema_warning) + errors.append(schema_errors) continue except OSError as e: # If there are OSErrors and the schema is deprecated/draft... # the OSErrors are more important. - errors[f"{data_path} (as {dir_schema})"].append(f"{e.strerror}: {e.filename}") + schema_errors[f"{data_path} (as {schema_name})"].append(f"{e.strerror}: {e.filename}") if schema_warning: - errors[f"{data_path} (as {dir_schema})"].append(schema_warning) + schema_errors[f"{data_path} (as {schema_name})"].append(schema_warning) + if schema_errors: + errors.append(schema_errors) + continue + # Found a schema with no problems! + return {schema_name: "No errors!"} if errors: - return dict(errors) - - # No problems! - return None + return errors[0] + return {str(data_path): f"Unknown error validating directory schema for {data_path}"} def get_context_of_decode_error(e: UnicodeDecodeError) -> str: diff --git a/tests-manual/update_test_data.py b/tests-manual/update_test_data.py index 4eba0e345..5bf40b1bc 100644 --- a/tests-manual/update_test_data.py +++ b/tests-manual/update_test_data.py @@ -60,8 +60,8 @@ def update_test_data(self) -> Dict[str, List]: **self.opts, verbose=self.upload_verbose, ) - info = self.upload.get_info() errors = self.upload.get_errors() + info = self.upload.get_info() report = ErrorReport(info=info, errors=errors) if "Too Many Requests" in report.as_md(): raise Exception( diff --git a/tests/test_dataset_examples.py b/tests/test_dataset_examples.py index 55f5f090b..dd6148899 100644 --- a/tests/test_dataset_examples.py +++ b/tests/test_dataset_examples.py @@ -39,8 +39,8 @@ def dataset_test(test_dir: str, dataset_opts: Dict, verbose: bool = False): print(f"Testing {test_dir}...") readme = open(f"{test_dir}/README.md", "r") upload = Upload(Path(f"{test_dir}/upload"), **dataset_opts) - info = upload.get_info() errors = upload.get_errors() + info = upload.get_info() report = ErrorReport(info=info, errors=errors) diff_test(test_dir, readme, clean_report(report), verbose=verbose) if "PreflightError" in report.as_md(): @@ -237,3 +237,44 @@ def multi_dataset_assert(self, tsv_paths: List[str], mock_assaytype_data: Mock): except AssertionError as e: print(e) self.errors.append(e) + + def prep_upload(self, test_dir: str, opts: Dict): + with patch( + "ingest_validation_tools.validation_utils.get_assaytype_data", + side_effect=lambda row, ingest_url: _assaytype_side_effect(test_dir, row, ingest_url), + ): + with patch( + "ingest_validation_tools.upload.Upload.online_checks", + side_effect=lambda tsv_path, schema_name, report_type: _online_side_effect( + schema_name, test_dir, tsv_path, report_type + ), + ): + upload = Upload(Path(f"{test_dir}/upload"), **opts) + upload.get_errors() + return upload + + # @patch( + # "ingest_validation_tools.schema_loader.get_possible_directory_schemas", + # {"test-schema-v1.0": {}, "test-schema-v1.1": {}}, + # ) + # def test_data_dir_versions_highest_version(self): + # # pick 1 good and 1 bad example dir; assert names (or numbers) of effective TSVs inside + # test_dirs = [] + # for test_dir in test_dirs: + # upload = self.prep_upload(test_dir, DATASET_EXAMPLES_OPTS) + # dir_schemas = upload.get_dir_schema_versions() + # expected_result = {upload.effective_tsv_paths.popitem()[0]: "test-schema-v1.1"} + # self.assertEqual(dir_schemas, expected_result) + # + # @patch( + # "ingest_validation_tools.schema_loader.get_possible_directory_schemas", + # {"test-schema-v1.0": {}, "test-schema-v1.1": {}}, + # ) + # def test_data_dir_versions_lower_version(self): + # # pick 1 good and 1 bad example dir; assert names (or numbers) of effective TSVs inside + # test_dirs = [] + # for test_dir in test_dirs: + # upload = self.prep_upload(test_dir, DATASET_EXAMPLES_OPTS) + # dir_schemas = upload.get_dir_schema_versions() + # expected_result = {upload.effective_tsv_paths.popitem()[0]: "test-schema-v1.0"} + # self.assertEqual(dir_schemas, expected_result) From 9b173d24c78bbf2beccd3bae1f704c8c260c21bf Mon Sep 17 00:00:00 2001 From: Gesina Phillips Date: Thu, 21 Mar 2024 14:57:22 -0400 Subject: [PATCH 06/16] updated readmes --- .../bad-cedar-dir-histology/README.md | 11 +++++------ .../README.md | 5 ++--- .../README.md | 8 +++----- examples/dataset-examples/bad-codex-data/README.md | 6 +++--- examples/dataset-examples/bad-missing-data/README.md | 6 +++--- .../dataset-examples/bad-scatacseq-data/README.md | 6 +++--- examples/dataset-examples/bad-scrnaseq-v0/README.md | 5 ++--- examples/dataset-examples/bad-tsv-formats/README.md | 6 +++--- .../dataset-examples/good-cedar-histology/README.md | 4 ++-- .../good-cedar-multi-assay-visium/README.md | 6 +++--- .../README.md | 4 ++-- .../good-codex-akoya-metadata-v1/README.md | 4 ++-- examples/dataset-examples/good-maldiims/README.md | 4 ++-- .../good-scatacseq-metadata-v0/README.md | 4 ++-- .../good-scatacseq-metadata-v1/README.md | 4 ++-- examples/dataset-iec-examples/bad-example/README.md | 7 +++---- examples/dataset-iec-examples/good-example/README.md | 4 ++-- src/ingest_validation_tools/upload.py | 6 ++++-- src/ingest_validation_tools/validation_utils.py | 8 ++++---- 19 files changed, 52 insertions(+), 56 deletions(-) diff --git a/examples/dataset-examples/bad-cedar-dir-histology/README.md b/examples/dataset-examples/bad-cedar-dir-histology/README.md index 4baa2f8b9..a4977e423 100644 --- a/examples/dataset-examples/bad-cedar-dir-histology/README.md +++ b/examples/dataset-examples/bad-cedar-dir-histology/README.md @@ -2,12 +2,11 @@ Upload Errors: Directory Errors: ? examples/dataset-examples/bad-cedar-dir-histology/upload/bad-histology-metadata.tsv, - column 'data_path', value './dataset-1' - : examples/dataset-examples/bad-cedar-dir-histology/upload/dataset-1 (as histology-v2): - - 'No such file or directory: examples/dataset-examples/bad-cedar-dir-histology/upload/dataset-1' - examples/dataset-examples/bad-cedar-dir-histology/upload/bad-histology-metadata.tsv, column 'data_path', value './wrong': - examples/dataset-examples/bad-cedar-dir-histology/upload/wrong (as histology-v2): - - Not allowed: + column 'data_path', value './dataset-1' (as histology-v2) + : - 'No such file or directory: examples/dataset-examples/bad-cedar-dir-histology/upload/dataset-1' + ? examples/dataset-examples/bad-cedar-dir-histology/upload/bad-histology-metadata.tsv, + column 'data_path', value './wrong' (as histology-v2) + : - Not allowed: - not-allowed. Required but missing: - extras\/.*. diff --git a/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/README.md b/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/README.md index 9eea1215f..71687dba7 100644 --- a/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/README.md +++ b/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/README.md @@ -2,9 +2,8 @@ Upload Errors: Directory Errors: ? examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/upload/good-visium-assay-metadata.tsv, - column 'data_path', value './Visium_9OLC_A4_S1' - : examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/upload/Visium_9OLC_A4_S1 (as visium-no-probes-v2): - - Required but missing: + column 'data_path', value './Visium_9OLC_A4_S1' (as visium-no-probes-v2) + : - Required but missing: - lab_processed\/.*. - lab_processed\/images\/.*. - lab_processed\/images\/[^\/]*ome-tiff\.channels\.csv. diff --git a/examples/dataset-examples/bad-codex-akoya-directory-v1-missing-dataset.json/README.md b/examples/dataset-examples/bad-codex-akoya-directory-v1-missing-dataset.json/README.md index b49faaddc..838f0a2c8 100644 --- a/examples/dataset-examples/bad-codex-akoya-directory-v1-missing-dataset.json/README.md +++ b/examples/dataset-examples/bad-codex-akoya-directory-v1-missing-dataset.json/README.md @@ -2,11 +2,9 @@ Upload Errors: Directory Errors: ? examples/dataset-examples/bad-codex-akoya-directory-v1-missing-dataset.json/upload/name-just-needs-to-end-with-metadata.tsv, - column 'data_path', value 'dataset-1' - : ? examples/dataset-examples/bad-codex-akoya-directory-v1-missing-dataset.json/upload/dataset-1 - (as codex-v1-with-dataset-json) - : - Required but missing: - - (raw|src_[^/]*)/dataset\.json. + column 'data_path', value 'dataset-1' (as codex-v1-with-dataset-json) + : - Required but missing: + - (raw|src_[^/]*)/dataset\.json. Hint: 'If validation fails because of extra whitespace in the TSV, try: src/cleanup_whitespace.py --tsv_in original.tsv --tsv_out clean.tsv.' diff --git a/examples/dataset-examples/bad-codex-data/README.md b/examples/dataset-examples/bad-codex-data/README.md index e00df1c20..737eeb04e 100644 --- a/examples/dataset-examples/bad-codex-data/README.md +++ b/examples/dataset-examples/bad-codex-data/README.md @@ -20,9 +20,9 @@ Upload Errors: - "Decode Error: Invalid ascii because ordinal not in range(128): \"mber\tconjugated_tag\n\ \ [ \xF0 ] \x9F\x98\x83\t\tbad-value\t\t\tinv\"." Directory Errors: - examples/dataset-examples/bad-codex-data/upload/codex-metadata.tsv, column 'data_path', value 'dataset-1': - examples/dataset-examples/bad-codex-data/upload/dataset-1 (as codex-v1-with-dataset-json): - - Not allowed: + ? examples/dataset-examples/bad-codex-data/upload/codex-metadata.tsv, column 'data_path', + value 'dataset-1' (as codex-v1-with-dataset-json) + : - Not allowed: - channelnames.txt. - cyc002_reg001_200216_112537/bad. - experiment.json. diff --git a/examples/dataset-examples/bad-missing-data/README.md b/examples/dataset-examples/bad-missing-data/README.md index 6b747cf3b..9d71f2529 100644 --- a/examples/dataset-examples/bad-missing-data/README.md +++ b/examples/dataset-examples/bad-missing-data/README.md @@ -8,9 +8,9 @@ Upload Errors: 'antibodies_path', value 'antibodies-missing.tsv' : - 'File does not exist: examples/dataset-examples/bad-missing-data/upload/antibodies-missing.tsv.' Directory Errors: - examples/dataset-examples/bad-missing-data/upload/codex-metadata.tsv, column 'data_path', value 'dataset-1': - examples/dataset-examples/bad-missing-data/upload/dataset-1 (as codex-v1-with-dataset-json): - - 'No such file or directory: examples/dataset-examples/bad-missing-data/upload/dataset-1' + ? examples/dataset-examples/bad-missing-data/upload/codex-metadata.tsv, column + 'data_path', value 'dataset-1' (as codex-v1-with-dataset-json) + : - 'No such file or directory: examples/dataset-examples/bad-missing-data/upload/dataset-1' Metadata TSV Validation Errors: Local Validation Errors: examples/dataset-examples/bad-missing-data/upload/codex-metadata.tsv (as codex-v0): diff --git a/examples/dataset-examples/bad-scatacseq-data/README.md b/examples/dataset-examples/bad-scatacseq-data/README.md index f95ad1842..4d42fa5ca 100644 --- a/examples/dataset-examples/bad-scatacseq-data/README.md +++ b/examples/dataset-examples/bad-scatacseq-data/README.md @@ -4,9 +4,9 @@ Upload Errors: examples/dataset-examples/bad-scatacseq-data/upload/scatacseq-metadata.tsv, column 'contributors_path', value '.': - 'Expected a TSV, but found a directory: examples/dataset-examples/bad-scatacseq-data/upload.' Directory Errors: - examples/dataset-examples/bad-scatacseq-data/upload/scatacseq-metadata.tsv, column 'data_path', value 'dataset-1': - examples/dataset-examples/bad-scatacseq-data/upload/dataset-1 (as scatacseq-v0): - - Not allowed: + ? examples/dataset-examples/bad-scatacseq-data/upload/scatacseq-metadata.tsv, + column 'data_path', value 'dataset-1' (as scatacseq-v0) + : - Not allowed: - not-the-file-you-are-looking-for.txt. - unexpected-directory/place-holder.txt. Required but missing: diff --git a/examples/dataset-examples/bad-scrnaseq-v0/README.md b/examples/dataset-examples/bad-scrnaseq-v0/README.md index 6db64a925..94d29986a 100644 --- a/examples/dataset-examples/bad-scrnaseq-v0/README.md +++ b/examples/dataset-examples/bad-scrnaseq-v0/README.md @@ -1,9 +1,8 @@ ``` Upload Errors: Directory Errors: - examples/dataset-examples/bad-scrnaseq-v0/upload/metadata.tsv, column 'data_path', value 'data': - examples/dataset-examples/bad-scrnaseq-v0/upload/data (as scrnaseq-v0): - - 'No such file or directory: examples/dataset-examples/bad-scrnaseq-v0/upload/data.' + examples/dataset-examples/bad-scrnaseq-v0/upload/metadata.tsv, column 'data_path', value 'data' (as scrnaseq-v0): + - 'No such file or directory: examples/dataset-examples/bad-scrnaseq-v0/upload/data.' Metadata TSV Validation Errors: Local Validation Errors: examples/dataset-examples/bad-scrnaseq-v0/upload/metadata.tsv (as scrnaseq-v0): diff --git a/examples/dataset-examples/bad-tsv-formats/README.md b/examples/dataset-examples/bad-tsv-formats/README.md index 7853b3307..a87df3a82 100644 --- a/examples/dataset-examples/bad-tsv-formats/README.md +++ b/examples/dataset-examples/bad-tsv-formats/README.md @@ -6,9 +6,9 @@ Upload Errors: examples/dataset-examples/bad-tsv-formats/upload/codex-metadata.tsv, column 'antibodies_path', value 'antibodies.tsv': - 'File does not exist: examples/dataset-examples/bad-tsv-formats/upload/antibodies.tsv.' Directory Errors: - examples/dataset-examples/bad-tsv-formats/upload/codex-metadata.tsv, column 'data_path', value 'dataset-1/': - examples/dataset-examples/bad-tsv-formats/upload/dataset-1 (as codex-v1-with-dataset-json): - - Not allowed: + ? examples/dataset-examples/bad-tsv-formats/upload/codex-metadata.tsv, column + 'data_path', value 'dataset-1/' (as codex-v1-with-dataset-json) + : - Not allowed: - channelnames.txt. - cyc002_reg001_200216_112537/1_00001_Z001_CH1.tif. - experiment.json. diff --git a/examples/dataset-examples/good-cedar-histology/README.md b/examples/dataset-examples/good-cedar-histology/README.md index 159f110bc..12aa61bfc 100644 --- a/examples/dataset-examples/good-cedar-histology/README.md +++ b/examples/dataset-examples/good-cedar-histology/README.md @@ -7,6 +7,6 @@ TSVs: validated-histology-metadata.tsv: Schema: h-and-e-v2 Metadata schema version: '2' - Directory schema versions: histology-v2 + Directory schema version: histology-v2 -``` +``` \ No newline at end of file diff --git a/examples/dataset-examples/good-cedar-multi-assay-visium/README.md b/examples/dataset-examples/good-cedar-multi-assay-visium/README.md index 56dc0e751..8eb325868 100644 --- a/examples/dataset-examples/good-cedar-multi-assay-visium/README.md +++ b/examples/dataset-examples/good-cedar-multi-assay-visium/README.md @@ -7,14 +7,14 @@ TSVs: good-visium-assay-metadata.tsv: Schema: visium-no-probes-v2 Metadata schema version: '2' - Directory schema versions: visium-no-probes-v2 + Directory schema version: visium-no-probes-v2 good-visium-histology-metadata.tsv: Schema: h-and-e-v2 Metadata schema version: '2' - Directory schema versions: histology-v2 + Directory schema version: histology-v2 good-visium-rnaseq-metadata.tsv: Schema: scRNAseq-10xGenomics-v3-v2 Metadata schema version: '2' - Directory schema versions: rnaseq-v2 + Directory schema version: rnaseq-v2 ``` \ No newline at end of file diff --git a/examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/README.md b/examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/README.md index c93e4ca5c..b1c13f59a 100644 --- a/examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/README.md +++ b/examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/README.md @@ -7,6 +7,6 @@ TSVs: name-just-needs-to-end-with-metadata.tsv: Schema: codex-v1 Metadata schema version: codex-v1 - Directory schema versions: codex-v1-with-dataset-json + Directory schema version: codex-v1-with-dataset-json -``` +``` \ No newline at end of file diff --git a/examples/dataset-examples/good-codex-akoya-metadata-v1/README.md b/examples/dataset-examples/good-codex-akoya-metadata-v1/README.md index f8ddb4fed..14144ea43 100644 --- a/examples/dataset-examples/good-codex-akoya-metadata-v1/README.md +++ b/examples/dataset-examples/good-codex-akoya-metadata-v1/README.md @@ -7,6 +7,6 @@ TSVs: name-just-needs-to-end-with-metadata.tsv: Schema: codex-v1 Metadata schema version: codex-v1 - Directory schema versions: codex-v1-with-dataset-json + Directory schema version: codex-v1-with-dataset-json -``` +``` \ No newline at end of file diff --git a/examples/dataset-examples/good-maldiims/README.md b/examples/dataset-examples/good-maldiims/README.md index 6f03e3923..15dd152bb 100644 --- a/examples/dataset-examples/good-maldiims/README.md +++ b/examples/dataset-examples/good-maldiims/README.md @@ -7,6 +7,6 @@ TSVs: metadata.tsv: Schema: ims-v0 Metadata schema version: ims-v0 - Directory schema versions: ims-v0 + Directory schema version: ims-v0 -``` +``` \ No newline at end of file diff --git a/examples/dataset-examples/good-scatacseq-metadata-v0/README.md b/examples/dataset-examples/good-scatacseq-metadata-v0/README.md index 434257af2..52abc4ede 100644 --- a/examples/dataset-examples/good-scatacseq-metadata-v0/README.md +++ b/examples/dataset-examples/good-scatacseq-metadata-v0/README.md @@ -7,6 +7,6 @@ TSVs: metadata.tsv: Schema: scatacseq-v0 Metadata schema version: scatacseq-v0 - Directory schema versions: scatacseq-v0 + Directory schema version: scatacseq-v0 -``` +``` \ No newline at end of file diff --git a/examples/dataset-examples/good-scatacseq-metadata-v1/README.md b/examples/dataset-examples/good-scatacseq-metadata-v1/README.md index 0449297ba..37ab71405 100644 --- a/examples/dataset-examples/good-scatacseq-metadata-v1/README.md +++ b/examples/dataset-examples/good-scatacseq-metadata-v1/README.md @@ -7,6 +7,6 @@ TSVs: metadata.tsv: Schema: scatacseq-v1 Metadata schema version: scatacseq-v1 - Directory schema versions: scatacseq-v0 + Directory schema version: scatacseq-v0 -``` +``` \ No newline at end of file diff --git a/examples/dataset-iec-examples/bad-example/README.md b/examples/dataset-iec-examples/bad-example/README.md index cfe76d94e..85c50ceb8 100644 --- a/examples/dataset-iec-examples/bad-example/README.md +++ b/examples/dataset-iec-examples/bad-example/README.md @@ -4,10 +4,9 @@ Upload Errors: examples/dataset-iec-examples/bad-example/upload/metadata.tsv, column 'contributors_path', value 'extras/contributors.tsv': Schema version is deprecated: contributors-v0 Directory Errors: - examples/dataset-iec-examples/bad-example/upload/metadata.tsv, column 'data_path', value '.': - examples/dataset-iec-examples/bad-example/upload (as scatacseq-v0): - - Not allowed: - - should-not-be-here.txt. + examples/dataset-iec-examples/bad-example/upload/metadata.tsv, column 'data_path', value '.' (as scatacseq-v0): + - Not allowed: + - should-not-be-here.txt. Metadata TSV Validation Errors: Local Validation Errors: examples/dataset-iec-examples/bad-example/upload/metadata.tsv (as scatacseq-v0): diff --git a/examples/dataset-iec-examples/good-example/README.md b/examples/dataset-iec-examples/good-example/README.md index 83fa010e4..cffa46dbd 100644 --- a/examples/dataset-iec-examples/good-example/README.md +++ b/examples/dataset-iec-examples/good-example/README.md @@ -7,6 +7,6 @@ TSVs: metadata.tsv: Schema: scatacseq-v0 Metadata schema version: scatacseq-v0 - Directory schema versions: scatacseq-v0 + Directory schema version: scatacseq-v0 -``` +``` \ No newline at end of file diff --git a/src/ingest_validation_tools/upload.py b/src/ingest_validation_tools/upload.py index a63ee6081..109a01e86 100644 --- a/src/ingest_validation_tools/upload.py +++ b/src/ingest_validation_tools/upload.py @@ -652,8 +652,10 @@ def _check_data_path( data_path, dataset_ignore_globs=self.dataset_ignore_globs, ).popitem() - if type(ref_errors[0]) is list: - errors[f"{str(metadata_path)}, column 'data_path', value '{path_value}'"] = ref_errors + if type(ref_errors[1]) is list: + errors[ + f"{str(metadata_path)}, column 'data_path', value '{path_value}' (as {Path(ref_errors[0]).stem})" + ] = ref_errors[1] schema_version.dir_schema = ref_errors[0] return errors diff --git a/src/ingest_validation_tools/validation_utils.py b/src/ingest_validation_tools/validation_utils.py index 263252984..9799e7258 100644 --- a/src/ingest_validation_tools/validation_utils.py +++ b/src/ingest_validation_tools/validation_utils.py @@ -184,17 +184,17 @@ def get_data_dir_errors( except DirectoryValidationErrors as e: # If there are DirectoryValidationErrors and the schema is deprecated/draft... # schema deprecation/draft status is more important. - schema_errors[f"{data_path} (as {schema_name})"].append(e.errors) + schema_errors[schema_name].append(e.errors) if schema_warning: - schema_errors[f"{data_path} (as {schema_name})"].append(schema_warning) + schema_errors[schema_name].append(schema_warning) errors.append(schema_errors) continue except OSError as e: # If there are OSErrors and the schema is deprecated/draft... # the OSErrors are more important. - schema_errors[f"{data_path} (as {schema_name})"].append(f"{e.strerror}: {e.filename}") + schema_errors[schema_name].append(f"{e.strerror}: {e.filename}") if schema_warning: - schema_errors[f"{data_path} (as {schema_name})"].append(schema_warning) + schema_errors[schema_name].append(schema_warning) if schema_errors: errors.append(schema_errors) continue From ffe9af904a71074b1eee22b7691c9ab8f37b333d Mon Sep 17 00:00:00 2001 From: Gesina Phillips Date: Thu, 21 Mar 2024 15:05:03 -0400 Subject: [PATCH 07/16] made sorting more reliable --- src/ingest_validation_tools/schema_loader.py | 4 +--- src/ingest_validation_tools/validation_utils.py | 3 ++- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/ingest_validation_tools/schema_loader.py b/src/ingest_validation_tools/schema_loader.py index 44baba9da..2191cc19e 100644 --- a/src/ingest_validation_tools/schema_loader.py +++ b/src/ingest_validation_tools/schema_loader.py @@ -263,9 +263,7 @@ def get_directory_schema( def get_possible_directory_schemas(dir_schema: str) -> Optional[Dict]: schemas = {} # this assumes that versions are numbered starting at x.0, no whole numbers - directory_schema_minor_versions = sorted( - _directory_schemas_path.glob(f"{dir_schema}*.yaml"), reverse=True - ) + directory_schema_minor_versions = _directory_schemas_path.glob(f"{dir_schema}*.yaml") if not directory_schema_minor_versions: return None for directory_schema_path in directory_schema_minor_versions: diff --git a/src/ingest_validation_tools/validation_utils.py b/src/ingest_validation_tools/validation_utils.py index 9799e7258..b4375313e 100644 --- a/src/ingest_validation_tools/validation_utils.py +++ b/src/ingest_validation_tools/validation_utils.py @@ -168,7 +168,8 @@ def get_data_dir_errors( # Collect errors, discard if schema validates against a minor version errors = [] - for schema_name, schema in possible_schemas.items(): + # Make sure possible_schemas is sorted by key (descending) to evaluate highest minor version first + for schema_name, schema in sorted(possible_schemas.items(), reverse=True): schema_errors = defaultdict(list) schema_warning_fields = [field for field in schema if field in ["deprecated", "draft"]] schema_warning = ( From 059898ba3683e9dec249d58b8c2a73587491b6b1 Mon Sep 17 00:00:00 2001 From: Gesina Phillips Date: Mon, 25 Mar 2024 12:22:17 -0400 Subject: [PATCH 08/16] changed dir schema names, docs broken --- docs/af/current/index.md | 34 ------------------ docs/af/deprecated/index.md | 35 ++++++++++++++++++- docs/bodyct/current/index.md | 8 ----- docs/bodyct/deprecated/index.md | 8 +++++ ...ultiome-v2.yaml => 10x-multiome-v2.0.yaml} | 0 .../{af-v0.yaml => af-v0.0.yaml} | 0 .../{af-v2.yaml => af-v2.0.yaml} | 0 .../{atacseq-v2.yaml => atacseq-v2.0.yaml} | 0 .../{bodyct-v2.yaml => bodyct-v2.0.yaml} | 0 ...katacseq-v0.yaml => bulkatacseq-v0.0.yaml} | 0 .../directory-schemas/bulkrnaseq-v0.0.yaml | 1 + .../directory-schemas/bulkrnaseq-v0.yaml | 1 - .../{celldive-v0.yaml => celldive-v0.0.yaml} | 0 .../{celldive-v2.yaml => celldive-v2.0.yaml} | 0 .../{cems-v0.yaml => cems-v0.0.yaml} | 0 .../{codex-v0.yaml => codex-v0.0.yaml} | 0 .../{codex-v2.yaml => codex-v2.0.yaml} | 0 .../{confocal-v2.yaml => confocal-v2.0.yaml} | 0 .../{cosmx-v2.yaml => cosmx-v2.0.yaml} | 0 .../{cycif-v2.yaml => cycif-v2.0.yaml} | 0 .../{dbit-v2.yaml => dbit-v2.0.yaml} | 0 .../{desi-v2.yaml => desi-v2.0.yaml} | 0 ...ced-srs-v2.yaml => enhanced-srs-v2.0.yaml} | 0 .../{fastq-v0.yaml => fastq-v0.0.yaml} | 0 .../{gcms-v0.yaml => gcms-v0.0.yaml} | 0 ...unter-v2.yaml => geomx-ncounter-v2.0.yaml} | 0 ...{geomx-ngs-v2.yaml => geomx-ngs-v2.0.yaml} | 0 .../{geomx-v0.yaml => geomx-v0.0.yaml} | 0 ...ifi-slide-v2.yaml => hifi-slide-v2.0.yaml} | 0 ...{histology-v2.yaml => histology-v2.0.yaml} | 0 .../{imc-2d-v2.yaml => imc-2d-v2.0.yaml} | 0 .../{imc-v0.yaml => imc-v0.0.yaml} | 0 .../{imc3d-v0.yaml => imc3d-v0.0.yaml} | 0 .../{ims-v0.yaml => ims-v0.0.yaml} | 0 .../{lcms-v0.yaml => lcms-v0.0.yaml} | 0 .../{lcms-v2.yaml => lcms-v2.0.yaml} | 0 ...ightsheet-v0.yaml => lightsheet-v0.0.yaml} | 0 ...ightsheet-v1.yaml => lightsheet-v1.0.yaml} | 0 ...ightsheet-v2.yaml => lightsheet-v2.0.yaml} | 0 .../{maldi-v2.yaml => maldi-v2.0.yaml} | 0 .../{mc-v2.yaml => mc-v2.0.yaml} | 0 .../{merfish-v2.yaml => merfish-v2.0.yaml} | 0 .../{mibi-v0.yaml => mibi-v0.0.yaml} | 0 .../{mibi-v2.yaml => mibi-v2.0.yaml} | 0 .../{microct-v2.yaml => microct-v2.0.yaml} | 0 .../{mri-v2.yaml => mri-v2.0.yaml} | 0 .../{mxif-v0.yaml => mxif-v0.0.yaml} | 0 ...o-splits-v2.yaml => nano-splits-v2.0.yaml} | 0 .../{nano-v0.yaml => nano-v0.0.yaml} | 0 .../{oct-v2.yaml => oct-v2.0.yaml} | 0 ...nocycler-v2.yaml => phenocycler-v2.0.yaml} | 0 ...lication-v0.yaml => publication-v0.0.yaml} | 0 ...lication-v2.yaml => publication-v2.0.yaml} | 0 ...q-geomx-v2.yaml => rnaseq-geomx-v2.0.yaml} | 0 .../{rnaseq-v2.yaml => rnaseq-v2.0.yaml} | 0 ...s-v2.yaml => rnaseq-with-probes-v2.0.yaml} | 0 .../directory-schemas/scatacseq-v0.0.yaml | 1 + .../directory-schemas/scatacseq-v0.yaml | 1 - ...{scatacseq-v2.yaml => scatacseq-v2.0.yaml} | 0 .../directory-schemas/scrnaseq-hca-v0.0.yaml | 1 + .../directory-schemas/scrnaseq-hca-v0.yaml | 1 - ...seq-hca-v2.yaml => scrnaseq-hca-v2.0.yaml} | 0 .../directory-schemas/scrnaseq-v0.0.yaml | 1 + .../directory-schemas/scrnaseq-v0.yaml | 1 - ...l => second-harmonic-generation-v2.0.yaml} | 0 ...sk-v2.yaml => segmentation-mask-v2.0.yaml} | 0 .../{seqfish-v0.yaml => seqfish-v0.0.yaml} | 0 .../{sims-v2.yaml => sims-v2.0.yaml} | 0 .../{slideseq-v0.yaml => slideseq-v0.0.yaml} | 0 ...{snareseq2-v2.yaml => snareseq2-v2.0.yaml} | 0 .../{stained-v0.yaml => stained-v0.0.yaml} | 0 .../{stained-v1.yaml => stained-v1.0.yaml} | 0 ... thick-section-multiphoton-mxif-v2.0.yaml} | 0 ...ltrasound-v2.yaml => ultrasound-v2.0.yaml} | 0 ...bes-v2.yaml => visium-no-probes-v2.0.yaml} | 0 ...s-v2.yaml => visium-with-probes-v2.0.yaml} | 0 .../{wgs-v0.yaml => wgs-v0.0.yaml} | 0 .../{wgs-v2.yaml => wgs-v2.0.yaml} | 0 .../{xenium-v2.yaml => xenium-v2.0.yaml} | 0 79 files changed, 46 insertions(+), 47 deletions(-) rename src/ingest_validation_tools/directory-schemas/{10x-multiome-v2.yaml => 10x-multiome-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{af-v0.yaml => af-v0.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{af-v2.yaml => af-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{atacseq-v2.yaml => atacseq-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{bodyct-v2.yaml => bodyct-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{bulkatacseq-v0.yaml => bulkatacseq-v0.0.yaml} (100%) create mode 120000 src/ingest_validation_tools/directory-schemas/bulkrnaseq-v0.0.yaml delete mode 120000 src/ingest_validation_tools/directory-schemas/bulkrnaseq-v0.yaml rename src/ingest_validation_tools/directory-schemas/{celldive-v0.yaml => celldive-v0.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{celldive-v2.yaml => celldive-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{cems-v0.yaml => cems-v0.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{codex-v0.yaml => codex-v0.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{codex-v2.yaml => codex-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{confocal-v2.yaml => confocal-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{cosmx-v2.yaml => cosmx-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{cycif-v2.yaml => cycif-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{dbit-v2.yaml => dbit-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{desi-v2.yaml => desi-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{enhanced-srs-v2.yaml => enhanced-srs-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{fastq-v0.yaml => fastq-v0.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{gcms-v0.yaml => gcms-v0.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{geomx-ncounter-v2.yaml => geomx-ncounter-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{geomx-ngs-v2.yaml => geomx-ngs-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{geomx-v0.yaml => geomx-v0.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{hifi-slide-v2.yaml => hifi-slide-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{histology-v2.yaml => histology-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{imc-2d-v2.yaml => imc-2d-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{imc-v0.yaml => imc-v0.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{imc3d-v0.yaml => imc3d-v0.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{ims-v0.yaml => ims-v0.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{lcms-v0.yaml => lcms-v0.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{lcms-v2.yaml => lcms-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{lightsheet-v0.yaml => lightsheet-v0.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{lightsheet-v1.yaml => lightsheet-v1.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{lightsheet-v2.yaml => lightsheet-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{maldi-v2.yaml => maldi-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{mc-v2.yaml => mc-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{merfish-v2.yaml => merfish-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{mibi-v0.yaml => mibi-v0.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{mibi-v2.yaml => mibi-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{microct-v2.yaml => microct-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{mri-v2.yaml => mri-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{mxif-v0.yaml => mxif-v0.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{nano-splits-v2.yaml => nano-splits-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{nano-v0.yaml => nano-v0.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{oct-v2.yaml => oct-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{phenocycler-v2.yaml => phenocycler-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{publication-v0.yaml => publication-v0.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{publication-v2.yaml => publication-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{rnaseq-geomx-v2.yaml => rnaseq-geomx-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{rnaseq-v2.yaml => rnaseq-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{rnaseq-with-probes-v2.yaml => rnaseq-with-probes-v2.0.yaml} (100%) create mode 120000 src/ingest_validation_tools/directory-schemas/scatacseq-v0.0.yaml delete mode 120000 src/ingest_validation_tools/directory-schemas/scatacseq-v0.yaml rename src/ingest_validation_tools/directory-schemas/{scatacseq-v2.yaml => scatacseq-v2.0.yaml} (100%) create mode 120000 src/ingest_validation_tools/directory-schemas/scrnaseq-hca-v0.0.yaml delete mode 120000 src/ingest_validation_tools/directory-schemas/scrnaseq-hca-v0.yaml rename src/ingest_validation_tools/directory-schemas/{scrnaseq-hca-v2.yaml => scrnaseq-hca-v2.0.yaml} (100%) create mode 120000 src/ingest_validation_tools/directory-schemas/scrnaseq-v0.0.yaml delete mode 120000 src/ingest_validation_tools/directory-schemas/scrnaseq-v0.yaml rename src/ingest_validation_tools/directory-schemas/{second-harmonic-generation-v2.yaml => second-harmonic-generation-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{segmentation-mask-v2.yaml => segmentation-mask-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{seqfish-v0.yaml => seqfish-v0.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{sims-v2.yaml => sims-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{slideseq-v0.yaml => slideseq-v0.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{snareseq2-v2.yaml => snareseq2-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{stained-v0.yaml => stained-v0.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{stained-v1.yaml => stained-v1.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{thick-section-multiphoton-mxif-v2.yaml => thick-section-multiphoton-mxif-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{ultrasound-v2.yaml => ultrasound-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{visium-no-probes-v2.yaml => visium-no-probes-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{visium-with-probes-v2.yaml => visium-with-probes-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{wgs-v0.yaml => wgs-v0.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{wgs-v2.yaml => wgs-v2.0.yaml} (100%) rename src/ingest_validation_tools/directory-schemas/{xenium-v2.yaml => xenium-v2.0.yaml} (100%) diff --git a/docs/af/current/index.md b/docs/af/current/index.md index bc19a39b7..acea5a873 100644 --- a/docs/af/current/index.md +++ b/docs/af/current/index.md @@ -27,37 +27,3 @@ This schema is for autofluorescence (AF). For an example of an AF dataset & dire
-## Directory schemas -Version 2 (use this one) - -| pattern | required? | description | dependent on | -| --- | --- | --- | --- | -| extras\/.* | ✓ | Folder for general lab-specific files related to the dataset. [Exists in all assays] | | -| extras\/microscope_hardware\.json | ✓ | **[QA/QC]** A file generated by the micro-meta app that contains a description of the hardware components of the microscope. Email HuBMAP Consortium Help Desk if help is required in generating this document. | | -| extras\/microscope_settings\.json | | **[QA/QC]** A file generated by the micro-meta app that contains a description of the settings that were used to acquire the image data. Email HuBMAP Consortium Help Desk if help is required in generating this document. | | -| raw\/.* | ✓ | Raw data files for the experiment. | | -| raw\/channel_layout\.tsv | ✓ | Table that includes a dictionary for channel to moiety, which may be a protein given in an OMAP panel or captured in the ASCT+B table. | | -| raw\/images\/.* | ✓ | Raw image files. Using this subdirectory allows for harmonization with other imaging assays. [This directory must include at least one raw file.] | | -| raw\/images\/[^\/]+\.(?:xml|nd2|oir|lif|czi|tiff) | ✓ | Raw microscope file for the experiment | | -| lab_processed\/.* | ✓ | Experiment files that were processed by the lab generating the data. | | -| lab_processed\/images\/.* | ✓ | Processed image files | | -| lab_processed\/images\/[^\/]+\.ome\.tiff (example: lab_processed/images/HBM892.MDXS.293.ome.tiff) | ✓ | OME-TIFF files (multichannel, multi-layered) produced by the microscopy experiment. If compressed, must use loss-less compression algorithm. See the following link for the set of fields that are required in the OME TIFF file XML header. | | -| lab_processed\/images\/[^\/]*ome-tiff\.channels\.csv | ✓ | This file provides essential documentation pertaining to each channel of the accommpanying OME TIFF. The file should contain one row per OME TIFF channel. The required fields are detailed | | -| lab_processed\/transformations\/.* | | This directory contains transformation matrices that capture how each modality is aligned with the other and can be used to visualize overlays of multimodal data. This is needed to overlay images from the exact same tissue section (e.g., MALDI imaging mass spec, autofluorescence microscopy, MxIF, histological stains). In these cases data type may have different pixel sizes and slightly different orientations (i.e., one may be rotated relative to another). | | -| lab_processed\/transformations\/[^\/]+\.txt | | Transformation matrices used to overlay images from the exact same tissue section (e.g., MALDI imaging mass spec, autofluorescence microscopy, MxIF, histological stains). | | -| qa_qc\/.* | ✓ | Directory containing QA and/or QC information. | | -| qa_qc\/resolution_report\/.* | ✓ | Directory containing the results of resolution tests and/or vendor preventative maintenance reports. | | -| qa_qc\/resolution_report\/resolution\.txt | | This file summarizes the results of resolution tests or vendor reports from preventative maintenance visits. | | -| qa_qc\/resolution_report\/[^\/]+\.pdf | | This file is a pdf from a vendor preventative maintenance visit or resolution check tool demonstrating resolution. This file may include illumination test results. | | -| qa_qc\/illumination_report\/.* | ✓ | Directory containing the results of illumination tests and/or vendor preventative maintenance reports. | | -| qa_qc\/illumination_report\/illumination.txt | | This file summarizes the results of illumination tests or vendor reports from preventative maintenance visits. | | -| qa_qc\/illumination_report\/[^\/]+\.pdf | | This file is a pdf from a vendor preventative maintenance visit or illumination check tool demonstrating illumination intensity. | | -| lab_processed\/annotations\/.* | | Directory containing segmentation masks. | | -| lab_processed\/annotations\/[^\/]+\.segmentations\.ome\.tiff | | The segmentation masks should be stored as multi-channel pyramidal OME TIFF bitmasks with one channel per mask, where a single mask contains all instances of a type of object (e.g., all cells, a class of FTUs, etc). The class of objects contained in the mask is documented in the segmentation-masks.csv file. Each individual object in a mask should be represented by a unique integer pixel value starting at 1, with 0 meaning background (e.g., all pixels belonging to the first instance of a T-cell have a value of 1, the pixels for the second instance of a T-cell have a value of 2, etc). The pixel values should be unique within a mask. FTUs and other structural elements should be captured the same way as cells with segmentation masks and the appropriate channel feature definitions. | lab_processed\/annotations\/.* | -| lab_processed\/annotations\/segmentation-masks\.csv | | This file contains details about each mask, with one row per mask. Each column in this file contains details describing the mask (e.g., channel number, mask name, ontological ID, etc). Each mask is stored as a channel in the segmentations.ome.tiff file and the mask name should be ontologically based and linked to the ASCT+B table where possible. The number of rows in this file should equal the number of channels in the segmentations.ome.tiff. For example, one row in this file would ontologically describe cells, if the segmentations.ome.tiff file contained a mask of all cells. A minimum set of fields (required and optional) is included below. If multiple segmentations.ome.tiff files are used, this segmentation-masks.csv file should document the masks across all of the OME TIFF files. | lab_processed\/annotations\/.* | -| lab_processed\/annotations\/[^\/]+-objects\.csv | | This is a matrix where each row describes an individual object (e.g., one row per cell in the case where a mask contains all cells) and columns are features (i.e., object type, marker intensity, classification strategies, etc). One file should be created per mask with the name of the mask prepended to the file name. For example, if there’s a cell segmentation map called “cells” then you would include a file called “cells-objects.csv” and that file would contain one row per cell in the “cells” mask and one column per feature, such as marker intensity and/or cell type. A minimum set of fields (required and optional) is included below. | lab_processed\/annotations\/.* | -| lab_processed\/annotations\/[^\/]+\.geojson | | A GeoJSON file(s) containing the geometries of each object within a mask. For example, if the mask contains multiple FTUs, multiple cells, etc, each of the objects in the mask would be independently documented in the GeoJSON file. There would be a single GeoJSON file per mask and the name of the file should be the name of the mask. If this file is generated by QuPath, the coordinates will be in pixel units with the origin (0, 0) as the top left corner of the full-resolution image. | lab_processed\/annotations\/.* | -| lab_processed\/annotations\/tissue-boundary\.geojson | | **[QA/QC]** If the boundaries of the tissue have been identified (e.g., by manual efforts), then the boundary geometry can be included as a GeoJSON file named “tissue-boundary.geojson”. | lab_processed\/annotations\/.* | -| lab_processed\/annotations\/regions-of-concern\.csv | | This file and the associated GeoJSON file can be used to denote any regions in the image that may contain QA/QC concerns. For example, if there are folds in the tissue, the region of the fold can be highlighted. This file should contain one row per region and include documentation about the region and why it's being flagged. | lab_processed\/annotations\/.* | -| lab_processed\/annotations\/regions-of-concern\.geojson | | This file and the associated CSV file can be used to denote any regions in the image that may contain QA/QC concerns. For example, if there are folds in the tissue, the region of the fold can be highlighted. This file should contain the geometric coordinates of each region being flagged. | lab_processed\/annotations\/.* | - diff --git a/docs/af/deprecated/index.md b/docs/af/deprecated/index.md index 3736ba1d7..644c24212 100644 --- a/docs/af/deprecated/index.md +++ b/docs/af/deprecated/index.md @@ -527,7 +527,40 @@ Relative path to file or directory with instrument data. Downstream processing w
## Directory schemas -Version 0 (use this one) +Version 2.0 (use this one) + +| pattern | required? | description | dependent on | +| --- | --- | --- | --- | +| extras\/.* | ✓ | Folder for general lab-specific files related to the dataset. [Exists in all assays] | | +| extras\/microscope_hardware\.json | ✓ | **[QA/QC]** A file generated by the micro-meta app that contains a description of the hardware components of the microscope. Email HuBMAP Consortium Help Desk if help is required in generating this document. | | +| extras\/microscope_settings\.json | | **[QA/QC]** A file generated by the micro-meta app that contains a description of the settings that were used to acquire the image data. Email HuBMAP Consortium Help Desk if help is required in generating this document. | | +| raw\/.* | ✓ | Raw data files for the experiment. | | +| raw\/channel_layout\.tsv | ✓ | Table that includes a dictionary for channel to moiety, which may be a protein given in an OMAP panel or captured in the ASCT+B table. | | +| raw\/images\/.* | ✓ | Raw image files. Using this subdirectory allows for harmonization with other imaging assays. [This directory must include at least one raw file.] | | +| raw\/images\/[^\/]+\.(?:xml|nd2|oir|lif|czi|tiff) | ✓ | Raw microscope file for the experiment | | +| lab_processed\/.* | ✓ | Experiment files that were processed by the lab generating the data. | | +| lab_processed\/images\/.* | ✓ | Processed image files | | +| lab_processed\/images\/[^\/]+\.ome\.tiff (example: lab_processed/images/HBM892.MDXS.293.ome.tiff) | ✓ | OME-TIFF files (multichannel, multi-layered) produced by the microscopy experiment. If compressed, must use loss-less compression algorithm. See the following link for the set of fields that are required in the OME TIFF file XML header. | | +| lab_processed\/images\/[^\/]*ome-tiff\.channels\.csv | ✓ | This file provides essential documentation pertaining to each channel of the accommpanying OME TIFF. The file should contain one row per OME TIFF channel. The required fields are detailed | | +| lab_processed\/transformations\/.* | | This directory contains transformation matrices that capture how each modality is aligned with the other and can be used to visualize overlays of multimodal data. This is needed to overlay images from the exact same tissue section (e.g., MALDI imaging mass spec, autofluorescence microscopy, MxIF, histological stains). In these cases data type may have different pixel sizes and slightly different orientations (i.e., one may be rotated relative to another). | | +| lab_processed\/transformations\/[^\/]+\.txt | | Transformation matrices used to overlay images from the exact same tissue section (e.g., MALDI imaging mass spec, autofluorescence microscopy, MxIF, histological stains). | | +| qa_qc\/.* | ✓ | Directory containing QA and/or QC information. | | +| qa_qc\/resolution_report\/.* | ✓ | Directory containing the results of resolution tests and/or vendor preventative maintenance reports. | | +| qa_qc\/resolution_report\/resolution\.txt | | This file summarizes the results of resolution tests or vendor reports from preventative maintenance visits. | | +| qa_qc\/resolution_report\/[^\/]+\.pdf | | This file is a pdf from a vendor preventative maintenance visit or resolution check tool demonstrating resolution. This file may include illumination test results. | | +| qa_qc\/illumination_report\/.* | ✓ | Directory containing the results of illumination tests and/or vendor preventative maintenance reports. | | +| qa_qc\/illumination_report\/illumination.txt | | This file summarizes the results of illumination tests or vendor reports from preventative maintenance visits. | | +| qa_qc\/illumination_report\/[^\/]+\.pdf | | This file is a pdf from a vendor preventative maintenance visit or illumination check tool demonstrating illumination intensity. | | +| lab_processed\/annotations\/.* | | Directory containing segmentation masks. | | +| lab_processed\/annotations\/[^\/]+\.segmentations\.ome\.tiff | | The segmentation masks should be stored as multi-channel pyramidal OME TIFF bitmasks with one channel per mask, where a single mask contains all instances of a type of object (e.g., all cells, a class of FTUs, etc). The class of objects contained in the mask is documented in the segmentation-masks.csv file. Each individual object in a mask should be represented by a unique integer pixel value starting at 1, with 0 meaning background (e.g., all pixels belonging to the first instance of a T-cell have a value of 1, the pixels for the second instance of a T-cell have a value of 2, etc). The pixel values should be unique within a mask. FTUs and other structural elements should be captured the same way as cells with segmentation masks and the appropriate channel feature definitions. | lab_processed\/annotations\/.* | +| lab_processed\/annotations\/segmentation-masks\.csv | | This file contains details about each mask, with one row per mask. Each column in this file contains details describing the mask (e.g., channel number, mask name, ontological ID, etc). Each mask is stored as a channel in the segmentations.ome.tiff file and the mask name should be ontologically based and linked to the ASCT+B table where possible. The number of rows in this file should equal the number of channels in the segmentations.ome.tiff. For example, one row in this file would ontologically describe cells, if the segmentations.ome.tiff file contained a mask of all cells. A minimum set of fields (required and optional) is included below. If multiple segmentations.ome.tiff files are used, this segmentation-masks.csv file should document the masks across all of the OME TIFF files. | lab_processed\/annotations\/.* | +| lab_processed\/annotations\/[^\/]+-objects\.csv | | This is a matrix where each row describes an individual object (e.g., one row per cell in the case where a mask contains all cells) and columns are features (i.e., object type, marker intensity, classification strategies, etc). One file should be created per mask with the name of the mask prepended to the file name. For example, if there’s a cell segmentation map called “cells” then you would include a file called “cells-objects.csv” and that file would contain one row per cell in the “cells” mask and one column per feature, such as marker intensity and/or cell type. A minimum set of fields (required and optional) is included below. | lab_processed\/annotations\/.* | +| lab_processed\/annotations\/[^\/]+\.geojson | | A GeoJSON file(s) containing the geometries of each object within a mask. For example, if the mask contains multiple FTUs, multiple cells, etc, each of the objects in the mask would be independently documented in the GeoJSON file. There would be a single GeoJSON file per mask and the name of the file should be the name of the mask. If this file is generated by QuPath, the coordinates will be in pixel units with the origin (0, 0) as the top left corner of the full-resolution image. | lab_processed\/annotations\/.* | +| lab_processed\/annotations\/tissue-boundary\.geojson | | **[QA/QC]** If the boundaries of the tissue have been identified (e.g., by manual efforts), then the boundary geometry can be included as a GeoJSON file named “tissue-boundary.geojson”. | lab_processed\/annotations\/.* | +| lab_processed\/annotations\/regions-of-concern\.csv | | This file and the associated GeoJSON file can be used to denote any regions in the image that may contain QA/QC concerns. For example, if there are folds in the tissue, the region of the fold can be highlighted. This file should contain one row per region and include documentation about the region and why it's being flagged. | lab_processed\/annotations\/.* | +| lab_processed\/annotations\/regions-of-concern\.geojson | | This file and the associated CSV file can be used to denote any regions in the image that may contain QA/QC concerns. For example, if there are folds in the tissue, the region of the fold can be highlighted. This file should contain the geometric coordinates of each region being flagged. | lab_processed\/annotations\/.* | + +Version 0.0 | pattern | required? | description | | --- | --- | --- | diff --git a/docs/bodyct/current/index.md b/docs/bodyct/current/index.md index d27f448b4..a0c680fcd 100644 --- a/docs/bodyct/current/index.md +++ b/docs/bodyct/current/index.md @@ -23,11 +23,3 @@ Excel and TSV templates for this schema will be available when the draft next-ge
-## Directory schemas -Version 2 (use this one) - -| pattern | required? | description | -| --- | --- | --- | -| TODO | ✓ | Directory structure not yet specified. | -| extras\/.* | ✓ | Folder for general lab-specific files related to the dataset. [Exists in all assays] | - diff --git a/docs/bodyct/deprecated/index.md b/docs/bodyct/deprecated/index.md index d7382ec55..75b632416 100644 --- a/docs/bodyct/deprecated/index.md +++ b/docs/bodyct/deprecated/index.md @@ -370,3 +370,11 @@ Relative path to file or directory with instrument data. Downstream processing w
+## Directory schemas +Version 2.0 (use this one) + +| pattern | required? | description | +| --- | --- | --- | +| TODO | ✓ | Directory structure not yet specified. | +| extras\/.* | ✓ | Folder for general lab-specific files related to the dataset. [Exists in all assays] | + diff --git a/src/ingest_validation_tools/directory-schemas/10x-multiome-v2.yaml b/src/ingest_validation_tools/directory-schemas/10x-multiome-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/10x-multiome-v2.yaml rename to src/ingest_validation_tools/directory-schemas/10x-multiome-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/af-v0.yaml b/src/ingest_validation_tools/directory-schemas/af-v0.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/af-v0.yaml rename to src/ingest_validation_tools/directory-schemas/af-v0.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/af-v2.yaml b/src/ingest_validation_tools/directory-schemas/af-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/af-v2.yaml rename to src/ingest_validation_tools/directory-schemas/af-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/atacseq-v2.yaml b/src/ingest_validation_tools/directory-schemas/atacseq-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/atacseq-v2.yaml rename to src/ingest_validation_tools/directory-schemas/atacseq-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/bodyct-v2.yaml b/src/ingest_validation_tools/directory-schemas/bodyct-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/bodyct-v2.yaml rename to src/ingest_validation_tools/directory-schemas/bodyct-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/bulkatacseq-v0.yaml b/src/ingest_validation_tools/directory-schemas/bulkatacseq-v0.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/bulkatacseq-v0.yaml rename to src/ingest_validation_tools/directory-schemas/bulkatacseq-v0.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/bulkrnaseq-v0.0.yaml b/src/ingest_validation_tools/directory-schemas/bulkrnaseq-v0.0.yaml new file mode 120000 index 000000000..c14abfab9 --- /dev/null +++ b/src/ingest_validation_tools/directory-schemas/bulkrnaseq-v0.0.yaml @@ -0,0 +1 @@ +fastq-v0.0.yaml \ No newline at end of file diff --git a/src/ingest_validation_tools/directory-schemas/bulkrnaseq-v0.yaml b/src/ingest_validation_tools/directory-schemas/bulkrnaseq-v0.yaml deleted file mode 120000 index c25e75f1e..000000000 --- a/src/ingest_validation_tools/directory-schemas/bulkrnaseq-v0.yaml +++ /dev/null @@ -1 +0,0 @@ -fastq-v0.yaml \ No newline at end of file diff --git a/src/ingest_validation_tools/directory-schemas/celldive-v0.yaml b/src/ingest_validation_tools/directory-schemas/celldive-v0.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/celldive-v0.yaml rename to src/ingest_validation_tools/directory-schemas/celldive-v0.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/celldive-v2.yaml b/src/ingest_validation_tools/directory-schemas/celldive-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/celldive-v2.yaml rename to src/ingest_validation_tools/directory-schemas/celldive-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/cems-v0.yaml b/src/ingest_validation_tools/directory-schemas/cems-v0.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/cems-v0.yaml rename to src/ingest_validation_tools/directory-schemas/cems-v0.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/codex-v0.yaml b/src/ingest_validation_tools/directory-schemas/codex-v0.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/codex-v0.yaml rename to src/ingest_validation_tools/directory-schemas/codex-v0.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/codex-v2.yaml b/src/ingest_validation_tools/directory-schemas/codex-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/codex-v2.yaml rename to src/ingest_validation_tools/directory-schemas/codex-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/confocal-v2.yaml b/src/ingest_validation_tools/directory-schemas/confocal-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/confocal-v2.yaml rename to src/ingest_validation_tools/directory-schemas/confocal-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/cosmx-v2.yaml b/src/ingest_validation_tools/directory-schemas/cosmx-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/cosmx-v2.yaml rename to src/ingest_validation_tools/directory-schemas/cosmx-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/cycif-v2.yaml b/src/ingest_validation_tools/directory-schemas/cycif-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/cycif-v2.yaml rename to src/ingest_validation_tools/directory-schemas/cycif-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/dbit-v2.yaml b/src/ingest_validation_tools/directory-schemas/dbit-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/dbit-v2.yaml rename to src/ingest_validation_tools/directory-schemas/dbit-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/desi-v2.yaml b/src/ingest_validation_tools/directory-schemas/desi-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/desi-v2.yaml rename to src/ingest_validation_tools/directory-schemas/desi-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/enhanced-srs-v2.yaml b/src/ingest_validation_tools/directory-schemas/enhanced-srs-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/enhanced-srs-v2.yaml rename to src/ingest_validation_tools/directory-schemas/enhanced-srs-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/fastq-v0.yaml b/src/ingest_validation_tools/directory-schemas/fastq-v0.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/fastq-v0.yaml rename to src/ingest_validation_tools/directory-schemas/fastq-v0.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/gcms-v0.yaml b/src/ingest_validation_tools/directory-schemas/gcms-v0.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/gcms-v0.yaml rename to src/ingest_validation_tools/directory-schemas/gcms-v0.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/geomx-ncounter-v2.yaml b/src/ingest_validation_tools/directory-schemas/geomx-ncounter-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/geomx-ncounter-v2.yaml rename to src/ingest_validation_tools/directory-schemas/geomx-ncounter-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/geomx-ngs-v2.yaml b/src/ingest_validation_tools/directory-schemas/geomx-ngs-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/geomx-ngs-v2.yaml rename to src/ingest_validation_tools/directory-schemas/geomx-ngs-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/geomx-v0.yaml b/src/ingest_validation_tools/directory-schemas/geomx-v0.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/geomx-v0.yaml rename to src/ingest_validation_tools/directory-schemas/geomx-v0.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/hifi-slide-v2.yaml b/src/ingest_validation_tools/directory-schemas/hifi-slide-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/hifi-slide-v2.yaml rename to src/ingest_validation_tools/directory-schemas/hifi-slide-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/histology-v2.yaml b/src/ingest_validation_tools/directory-schemas/histology-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/histology-v2.yaml rename to src/ingest_validation_tools/directory-schemas/histology-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/imc-2d-v2.yaml b/src/ingest_validation_tools/directory-schemas/imc-2d-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/imc-2d-v2.yaml rename to src/ingest_validation_tools/directory-schemas/imc-2d-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/imc-v0.yaml b/src/ingest_validation_tools/directory-schemas/imc-v0.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/imc-v0.yaml rename to src/ingest_validation_tools/directory-schemas/imc-v0.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/imc3d-v0.yaml b/src/ingest_validation_tools/directory-schemas/imc3d-v0.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/imc3d-v0.yaml rename to src/ingest_validation_tools/directory-schemas/imc3d-v0.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/ims-v0.yaml b/src/ingest_validation_tools/directory-schemas/ims-v0.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/ims-v0.yaml rename to src/ingest_validation_tools/directory-schemas/ims-v0.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/lcms-v0.yaml b/src/ingest_validation_tools/directory-schemas/lcms-v0.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/lcms-v0.yaml rename to src/ingest_validation_tools/directory-schemas/lcms-v0.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/lcms-v2.yaml b/src/ingest_validation_tools/directory-schemas/lcms-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/lcms-v2.yaml rename to src/ingest_validation_tools/directory-schemas/lcms-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/lightsheet-v0.yaml b/src/ingest_validation_tools/directory-schemas/lightsheet-v0.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/lightsheet-v0.yaml rename to src/ingest_validation_tools/directory-schemas/lightsheet-v0.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/lightsheet-v1.yaml b/src/ingest_validation_tools/directory-schemas/lightsheet-v1.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/lightsheet-v1.yaml rename to src/ingest_validation_tools/directory-schemas/lightsheet-v1.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/lightsheet-v2.yaml b/src/ingest_validation_tools/directory-schemas/lightsheet-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/lightsheet-v2.yaml rename to src/ingest_validation_tools/directory-schemas/lightsheet-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/maldi-v2.yaml b/src/ingest_validation_tools/directory-schemas/maldi-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/maldi-v2.yaml rename to src/ingest_validation_tools/directory-schemas/maldi-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/mc-v2.yaml b/src/ingest_validation_tools/directory-schemas/mc-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/mc-v2.yaml rename to src/ingest_validation_tools/directory-schemas/mc-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/merfish-v2.yaml b/src/ingest_validation_tools/directory-schemas/merfish-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/merfish-v2.yaml rename to src/ingest_validation_tools/directory-schemas/merfish-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/mibi-v0.yaml b/src/ingest_validation_tools/directory-schemas/mibi-v0.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/mibi-v0.yaml rename to src/ingest_validation_tools/directory-schemas/mibi-v0.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/mibi-v2.yaml b/src/ingest_validation_tools/directory-schemas/mibi-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/mibi-v2.yaml rename to src/ingest_validation_tools/directory-schemas/mibi-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/microct-v2.yaml b/src/ingest_validation_tools/directory-schemas/microct-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/microct-v2.yaml rename to src/ingest_validation_tools/directory-schemas/microct-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/mri-v2.yaml b/src/ingest_validation_tools/directory-schemas/mri-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/mri-v2.yaml rename to src/ingest_validation_tools/directory-schemas/mri-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/mxif-v0.yaml b/src/ingest_validation_tools/directory-schemas/mxif-v0.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/mxif-v0.yaml rename to src/ingest_validation_tools/directory-schemas/mxif-v0.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/nano-splits-v2.yaml b/src/ingest_validation_tools/directory-schemas/nano-splits-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/nano-splits-v2.yaml rename to src/ingest_validation_tools/directory-schemas/nano-splits-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/nano-v0.yaml b/src/ingest_validation_tools/directory-schemas/nano-v0.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/nano-v0.yaml rename to src/ingest_validation_tools/directory-schemas/nano-v0.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/oct-v2.yaml b/src/ingest_validation_tools/directory-schemas/oct-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/oct-v2.yaml rename to src/ingest_validation_tools/directory-schemas/oct-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/phenocycler-v2.yaml b/src/ingest_validation_tools/directory-schemas/phenocycler-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/phenocycler-v2.yaml rename to src/ingest_validation_tools/directory-schemas/phenocycler-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/publication-v0.yaml b/src/ingest_validation_tools/directory-schemas/publication-v0.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/publication-v0.yaml rename to src/ingest_validation_tools/directory-schemas/publication-v0.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/publication-v2.yaml b/src/ingest_validation_tools/directory-schemas/publication-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/publication-v2.yaml rename to src/ingest_validation_tools/directory-schemas/publication-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/rnaseq-geomx-v2.yaml b/src/ingest_validation_tools/directory-schemas/rnaseq-geomx-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/rnaseq-geomx-v2.yaml rename to src/ingest_validation_tools/directory-schemas/rnaseq-geomx-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/rnaseq-v2.yaml b/src/ingest_validation_tools/directory-schemas/rnaseq-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/rnaseq-v2.yaml rename to src/ingest_validation_tools/directory-schemas/rnaseq-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/rnaseq-with-probes-v2.yaml b/src/ingest_validation_tools/directory-schemas/rnaseq-with-probes-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/rnaseq-with-probes-v2.yaml rename to src/ingest_validation_tools/directory-schemas/rnaseq-with-probes-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/scatacseq-v0.0.yaml b/src/ingest_validation_tools/directory-schemas/scatacseq-v0.0.yaml new file mode 120000 index 000000000..c14abfab9 --- /dev/null +++ b/src/ingest_validation_tools/directory-schemas/scatacseq-v0.0.yaml @@ -0,0 +1 @@ +fastq-v0.0.yaml \ No newline at end of file diff --git a/src/ingest_validation_tools/directory-schemas/scatacseq-v0.yaml b/src/ingest_validation_tools/directory-schemas/scatacseq-v0.yaml deleted file mode 120000 index c25e75f1e..000000000 --- a/src/ingest_validation_tools/directory-schemas/scatacseq-v0.yaml +++ /dev/null @@ -1 +0,0 @@ -fastq-v0.yaml \ No newline at end of file diff --git a/src/ingest_validation_tools/directory-schemas/scatacseq-v2.yaml b/src/ingest_validation_tools/directory-schemas/scatacseq-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/scatacseq-v2.yaml rename to src/ingest_validation_tools/directory-schemas/scatacseq-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/scrnaseq-hca-v0.0.yaml b/src/ingest_validation_tools/directory-schemas/scrnaseq-hca-v0.0.yaml new file mode 120000 index 000000000..63b36d340 --- /dev/null +++ b/src/ingest_validation_tools/directory-schemas/scrnaseq-hca-v0.0.yaml @@ -0,0 +1 @@ +scrnaseq-v0.0.yaml \ No newline at end of file diff --git a/src/ingest_validation_tools/directory-schemas/scrnaseq-hca-v0.yaml b/src/ingest_validation_tools/directory-schemas/scrnaseq-hca-v0.yaml deleted file mode 120000 index 7c501ac05..000000000 --- a/src/ingest_validation_tools/directory-schemas/scrnaseq-hca-v0.yaml +++ /dev/null @@ -1 +0,0 @@ -scrnaseq-v0.yaml \ No newline at end of file diff --git a/src/ingest_validation_tools/directory-schemas/scrnaseq-hca-v2.yaml b/src/ingest_validation_tools/directory-schemas/scrnaseq-hca-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/scrnaseq-hca-v2.yaml rename to src/ingest_validation_tools/directory-schemas/scrnaseq-hca-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/scrnaseq-v0.0.yaml b/src/ingest_validation_tools/directory-schemas/scrnaseq-v0.0.yaml new file mode 120000 index 000000000..c14abfab9 --- /dev/null +++ b/src/ingest_validation_tools/directory-schemas/scrnaseq-v0.0.yaml @@ -0,0 +1 @@ +fastq-v0.0.yaml \ No newline at end of file diff --git a/src/ingest_validation_tools/directory-schemas/scrnaseq-v0.yaml b/src/ingest_validation_tools/directory-schemas/scrnaseq-v0.yaml deleted file mode 120000 index c25e75f1e..000000000 --- a/src/ingest_validation_tools/directory-schemas/scrnaseq-v0.yaml +++ /dev/null @@ -1 +0,0 @@ -fastq-v0.yaml \ No newline at end of file diff --git a/src/ingest_validation_tools/directory-schemas/second-harmonic-generation-v2.yaml b/src/ingest_validation_tools/directory-schemas/second-harmonic-generation-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/second-harmonic-generation-v2.yaml rename to src/ingest_validation_tools/directory-schemas/second-harmonic-generation-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/segmentation-mask-v2.yaml b/src/ingest_validation_tools/directory-schemas/segmentation-mask-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/segmentation-mask-v2.yaml rename to src/ingest_validation_tools/directory-schemas/segmentation-mask-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/seqfish-v0.yaml b/src/ingest_validation_tools/directory-schemas/seqfish-v0.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/seqfish-v0.yaml rename to src/ingest_validation_tools/directory-schemas/seqfish-v0.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/sims-v2.yaml b/src/ingest_validation_tools/directory-schemas/sims-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/sims-v2.yaml rename to src/ingest_validation_tools/directory-schemas/sims-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/slideseq-v0.yaml b/src/ingest_validation_tools/directory-schemas/slideseq-v0.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/slideseq-v0.yaml rename to src/ingest_validation_tools/directory-schemas/slideseq-v0.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/snareseq2-v2.yaml b/src/ingest_validation_tools/directory-schemas/snareseq2-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/snareseq2-v2.yaml rename to src/ingest_validation_tools/directory-schemas/snareseq2-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/stained-v0.yaml b/src/ingest_validation_tools/directory-schemas/stained-v0.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/stained-v0.yaml rename to src/ingest_validation_tools/directory-schemas/stained-v0.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/stained-v1.yaml b/src/ingest_validation_tools/directory-schemas/stained-v1.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/stained-v1.yaml rename to src/ingest_validation_tools/directory-schemas/stained-v1.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/thick-section-multiphoton-mxif-v2.yaml b/src/ingest_validation_tools/directory-schemas/thick-section-multiphoton-mxif-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/thick-section-multiphoton-mxif-v2.yaml rename to src/ingest_validation_tools/directory-schemas/thick-section-multiphoton-mxif-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/ultrasound-v2.yaml b/src/ingest_validation_tools/directory-schemas/ultrasound-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/ultrasound-v2.yaml rename to src/ingest_validation_tools/directory-schemas/ultrasound-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/visium-no-probes-v2.yaml b/src/ingest_validation_tools/directory-schemas/visium-no-probes-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/visium-no-probes-v2.yaml rename to src/ingest_validation_tools/directory-schemas/visium-no-probes-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/visium-with-probes-v2.yaml b/src/ingest_validation_tools/directory-schemas/visium-with-probes-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/visium-with-probes-v2.yaml rename to src/ingest_validation_tools/directory-schemas/visium-with-probes-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/wgs-v0.yaml b/src/ingest_validation_tools/directory-schemas/wgs-v0.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/wgs-v0.yaml rename to src/ingest_validation_tools/directory-schemas/wgs-v0.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/wgs-v2.yaml b/src/ingest_validation_tools/directory-schemas/wgs-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/wgs-v2.yaml rename to src/ingest_validation_tools/directory-schemas/wgs-v2.0.yaml diff --git a/src/ingest_validation_tools/directory-schemas/xenium-v2.yaml b/src/ingest_validation_tools/directory-schemas/xenium-v2.0.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/xenium-v2.yaml rename to src/ingest_validation_tools/directory-schemas/xenium-v2.0.yaml From fbfcb25b45e347f5644a23f2442f119ebb19a330 Mon Sep 17 00:00:00 2001 From: Gesina Phillips Date: Mon, 25 Mar 2024 13:36:59 -0400 Subject: [PATCH 09/16] updated docs, still need to update readmes --- docs/10x-multiome/current/index.md | 2 +- docs/af/current/index.md | 34 ++++++++++++++++++ docs/af/deprecated/index.md | 35 +------------------ docs/atacseq/current/index.md | 2 +- docs/bodyct/current/index.md | 8 +++++ docs/bodyct/deprecated/index.md | 8 ----- docs/bulkatacseq/deprecated/index.md | 2 +- docs/bulkrnaseq/deprecated/index.md | 2 +- docs/celldive/current/index.md | 2 +- docs/celldive/deprecated/index.md | 2 +- docs/cems/deprecated/index.md | 2 +- docs/codex/current/index.md | 2 +- docs/codex/deprecated/index.md | 4 +-- docs/confocal/current/index.md | 2 +- docs/cosmx/current/index.md | 2 +- docs/cycif/current/index.md | 2 +- docs/dbit/current/index.md | 2 +- docs/desi/current/index.md | 2 +- docs/enhanced-srs/current/index.md | 2 +- docs/gcms/deprecated/index.md | 2 +- docs/geomx-ncounter/current/index.md | 2 +- docs/geomx-ngs/current/index.md | 2 +- docs/hifi-slide/current/index.md | 2 +- docs/histology/current/index.md | 2 +- docs/imc-2d/current/index.md | 2 +- docs/imc/deprecated/index.md | 2 +- docs/imc3d/deprecated/index.md | 2 +- docs/ims/deprecated/index.md | 2 +- docs/lcms/current/index.md | 2 +- docs/lcms/deprecated/index.md | 2 +- docs/lightsheet/current/index.md | 2 +- docs/lightsheet/deprecated/index.md | 4 +-- docs/maldi/current/index.md | 2 +- docs/mc/current/index.md | 2 +- docs/merfish/current/index.md | 2 +- docs/mibi/current/index.md | 2 +- docs/mibi/deprecated/index.md | 2 +- docs/microct/current/index.md | 2 +- docs/mri/current/index.md | 2 +- docs/mxif/deprecated/index.md | 2 +- docs/nano-splits/current/index.md | 2 +- docs/nano/deprecated/index.md | 2 +- docs/oct/current/index.md | 2 +- docs/phenocycler/current/index.md | 2 +- docs/publication/current/index.md | 2 +- docs/publication/deprecated/index.md | 2 +- docs/rnaseq-with-probes/current/index.md | 2 +- docs/rnaseq/current/index.md | 2 +- docs/scatacseq/deprecated/index.md | 2 +- docs/scrnaseq-hca/deprecated/index.md | 2 +- docs/scrnaseq/deprecated/index.md | 2 +- .../current/index.md | 2 +- docs/segmentation-mask/current/index.md | 2 +- docs/seqfish/deprecated/index.md | 2 +- docs/sims/current/index.md | 2 +- docs/slideseq/deprecated/index.md | 2 +- docs/snareseq2/current/index.md | 2 +- docs/stained/deprecated/index.md | 4 +-- .../current/index.md | 2 +- docs/ultrasound/current/index.md | 2 +- docs/visium-no-probes/current/index.md | 2 +- docs/visium-with-probes/current/index.md | 2 +- docs/wgs/deprecated/index.md | 2 +- docs/xenium/current/index.md | 2 +- src/generate_docs.py | 5 ++- ...with-dataset-json.yaml => codex-v1.1.yaml} | 0 src/ingest_validation_tools/docs_utils.py | 4 +-- tests/test-generate-docs.sh | 4 +-- 68 files changed, 113 insertions(+), 111 deletions(-) rename src/ingest_validation_tools/directory-schemas/{codex-v1-with-dataset-json.yaml => codex-v1.1.yaml} (100%) diff --git a/docs/10x-multiome/current/index.md b/docs/10x-multiome/current/index.md index c3deb15ea..16b392e48 100644 --- a/docs/10x-multiome/current/index.md +++ b/docs/10x-multiome/current/index.md @@ -28,7 +28,7 @@ REQUIRED - For this assay, you must also prepare and submit two additional metad
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/af/current/index.md b/docs/af/current/index.md index acea5a873..f21aa1153 100644 --- a/docs/af/current/index.md +++ b/docs/af/current/index.md @@ -27,3 +27,37 @@ This schema is for autofluorescence (AF). For an example of an AF dataset & dire
+## Directory schemas +Version 2.0 (use this one) + +| pattern | required? | description | dependent on | +| --- | --- | --- | --- | +| extras\/.* | ✓ | Folder for general lab-specific files related to the dataset. [Exists in all assays] | | +| extras\/microscope_hardware\.json | ✓ | **[QA/QC]** A file generated by the micro-meta app that contains a description of the hardware components of the microscope. Email HuBMAP Consortium Help Desk if help is required in generating this document. | | +| extras\/microscope_settings\.json | | **[QA/QC]** A file generated by the micro-meta app that contains a description of the settings that were used to acquire the image data. Email HuBMAP Consortium Help Desk if help is required in generating this document. | | +| raw\/.* | ✓ | Raw data files for the experiment. | | +| raw\/channel_layout\.tsv | ✓ | Table that includes a dictionary for channel to moiety, which may be a protein given in an OMAP panel or captured in the ASCT+B table. | | +| raw\/images\/.* | ✓ | Raw image files. Using this subdirectory allows for harmonization with other imaging assays. [This directory must include at least one raw file.] | | +| raw\/images\/[^\/]+\.(?:xml|nd2|oir|lif|czi|tiff) | ✓ | Raw microscope file for the experiment | | +| lab_processed\/.* | ✓ | Experiment files that were processed by the lab generating the data. | | +| lab_processed\/images\/.* | ✓ | Processed image files | | +| lab_processed\/images\/[^\/]+\.ome\.tiff (example: lab_processed/images/HBM892.MDXS.293.ome.tiff) | ✓ | OME-TIFF files (multichannel, multi-layered) produced by the microscopy experiment. If compressed, must use loss-less compression algorithm. See the following link for the set of fields that are required in the OME TIFF file XML header. | | +| lab_processed\/images\/[^\/]*ome-tiff\.channels\.csv | ✓ | This file provides essential documentation pertaining to each channel of the accommpanying OME TIFF. The file should contain one row per OME TIFF channel. The required fields are detailed | | +| lab_processed\/transformations\/.* | | This directory contains transformation matrices that capture how each modality is aligned with the other and can be used to visualize overlays of multimodal data. This is needed to overlay images from the exact same tissue section (e.g., MALDI imaging mass spec, autofluorescence microscopy, MxIF, histological stains). In these cases data type may have different pixel sizes and slightly different orientations (i.e., one may be rotated relative to another). | | +| lab_processed\/transformations\/[^\/]+\.txt | | Transformation matrices used to overlay images from the exact same tissue section (e.g., MALDI imaging mass spec, autofluorescence microscopy, MxIF, histological stains). | | +| qa_qc\/.* | ✓ | Directory containing QA and/or QC information. | | +| qa_qc\/resolution_report\/.* | ✓ | Directory containing the results of resolution tests and/or vendor preventative maintenance reports. | | +| qa_qc\/resolution_report\/resolution\.txt | | This file summarizes the results of resolution tests or vendor reports from preventative maintenance visits. | | +| qa_qc\/resolution_report\/[^\/]+\.pdf | | This file is a pdf from a vendor preventative maintenance visit or resolution check tool demonstrating resolution. This file may include illumination test results. | | +| qa_qc\/illumination_report\/.* | ✓ | Directory containing the results of illumination tests and/or vendor preventative maintenance reports. | | +| qa_qc\/illumination_report\/illumination.txt | | This file summarizes the results of illumination tests or vendor reports from preventative maintenance visits. | | +| qa_qc\/illumination_report\/[^\/]+\.pdf | | This file is a pdf from a vendor preventative maintenance visit or illumination check tool demonstrating illumination intensity. | | +| lab_processed\/annotations\/.* | | Directory containing segmentation masks. | | +| lab_processed\/annotations\/[^\/]+\.segmentations\.ome\.tiff | | The segmentation masks should be stored as multi-channel pyramidal OME TIFF bitmasks with one channel per mask, where a single mask contains all instances of a type of object (e.g., all cells, a class of FTUs, etc). The class of objects contained in the mask is documented in the segmentation-masks.csv file. Each individual object in a mask should be represented by a unique integer pixel value starting at 1, with 0 meaning background (e.g., all pixels belonging to the first instance of a T-cell have a value of 1, the pixels for the second instance of a T-cell have a value of 2, etc). The pixel values should be unique within a mask. FTUs and other structural elements should be captured the same way as cells with segmentation masks and the appropriate channel feature definitions. | lab_processed\/annotations\/.* | +| lab_processed\/annotations\/segmentation-masks\.csv | | This file contains details about each mask, with one row per mask. Each column in this file contains details describing the mask (e.g., channel number, mask name, ontological ID, etc). Each mask is stored as a channel in the segmentations.ome.tiff file and the mask name should be ontologically based and linked to the ASCT+B table where possible. The number of rows in this file should equal the number of channels in the segmentations.ome.tiff. For example, one row in this file would ontologically describe cells, if the segmentations.ome.tiff file contained a mask of all cells. A minimum set of fields (required and optional) is included below. If multiple segmentations.ome.tiff files are used, this segmentation-masks.csv file should document the masks across all of the OME TIFF files. | lab_processed\/annotations\/.* | +| lab_processed\/annotations\/[^\/]+-objects\.csv | | This is a matrix where each row describes an individual object (e.g., one row per cell in the case where a mask contains all cells) and columns are features (i.e., object type, marker intensity, classification strategies, etc). One file should be created per mask with the name of the mask prepended to the file name. For example, if there’s a cell segmentation map called “cells” then you would include a file called “cells-objects.csv” and that file would contain one row per cell in the “cells” mask and one column per feature, such as marker intensity and/or cell type. A minimum set of fields (required and optional) is included below. | lab_processed\/annotations\/.* | +| lab_processed\/annotations\/[^\/]+\.geojson | | A GeoJSON file(s) containing the geometries of each object within a mask. For example, if the mask contains multiple FTUs, multiple cells, etc, each of the objects in the mask would be independently documented in the GeoJSON file. There would be a single GeoJSON file per mask and the name of the file should be the name of the mask. If this file is generated by QuPath, the coordinates will be in pixel units with the origin (0, 0) as the top left corner of the full-resolution image. | lab_processed\/annotations\/.* | +| lab_processed\/annotations\/tissue-boundary\.geojson | | **[QA/QC]** If the boundaries of the tissue have been identified (e.g., by manual efforts), then the boundary geometry can be included as a GeoJSON file named “tissue-boundary.geojson”. | lab_processed\/annotations\/.* | +| lab_processed\/annotations\/regions-of-concern\.csv | | This file and the associated GeoJSON file can be used to denote any regions in the image that may contain QA/QC concerns. For example, if there are folds in the tissue, the region of the fold can be highlighted. This file should contain one row per region and include documentation about the region and why it's being flagged. | lab_processed\/annotations\/.* | +| lab_processed\/annotations\/regions-of-concern\.geojson | | This file and the associated CSV file can be used to denote any regions in the image that may contain QA/QC concerns. For example, if there are folds in the tissue, the region of the fold can be highlighted. This file should contain the geometric coordinates of each region being flagged. | lab_processed\/annotations\/.* | + diff --git a/docs/af/deprecated/index.md b/docs/af/deprecated/index.md index 644c24212..f64c0410e 100644 --- a/docs/af/deprecated/index.md +++ b/docs/af/deprecated/index.md @@ -527,40 +527,7 @@ Relative path to file or directory with instrument data. Downstream processing w
## Directory schemas -Version 2.0 (use this one) - -| pattern | required? | description | dependent on | -| --- | --- | --- | --- | -| extras\/.* | ✓ | Folder for general lab-specific files related to the dataset. [Exists in all assays] | | -| extras\/microscope_hardware\.json | ✓ | **[QA/QC]** A file generated by the micro-meta app that contains a description of the hardware components of the microscope. Email HuBMAP Consortium Help Desk if help is required in generating this document. | | -| extras\/microscope_settings\.json | | **[QA/QC]** A file generated by the micro-meta app that contains a description of the settings that were used to acquire the image data. Email HuBMAP Consortium Help Desk if help is required in generating this document. | | -| raw\/.* | ✓ | Raw data files for the experiment. | | -| raw\/channel_layout\.tsv | ✓ | Table that includes a dictionary for channel to moiety, which may be a protein given in an OMAP panel or captured in the ASCT+B table. | | -| raw\/images\/.* | ✓ | Raw image files. Using this subdirectory allows for harmonization with other imaging assays. [This directory must include at least one raw file.] | | -| raw\/images\/[^\/]+\.(?:xml|nd2|oir|lif|czi|tiff) | ✓ | Raw microscope file for the experiment | | -| lab_processed\/.* | ✓ | Experiment files that were processed by the lab generating the data. | | -| lab_processed\/images\/.* | ✓ | Processed image files | | -| lab_processed\/images\/[^\/]+\.ome\.tiff (example: lab_processed/images/HBM892.MDXS.293.ome.tiff) | ✓ | OME-TIFF files (multichannel, multi-layered) produced by the microscopy experiment. If compressed, must use loss-less compression algorithm. See the following link for the set of fields that are required in the OME TIFF file XML header. | | -| lab_processed\/images\/[^\/]*ome-tiff\.channels\.csv | ✓ | This file provides essential documentation pertaining to each channel of the accommpanying OME TIFF. The file should contain one row per OME TIFF channel. The required fields are detailed | | -| lab_processed\/transformations\/.* | | This directory contains transformation matrices that capture how each modality is aligned with the other and can be used to visualize overlays of multimodal data. This is needed to overlay images from the exact same tissue section (e.g., MALDI imaging mass spec, autofluorescence microscopy, MxIF, histological stains). In these cases data type may have different pixel sizes and slightly different orientations (i.e., one may be rotated relative to another). | | -| lab_processed\/transformations\/[^\/]+\.txt | | Transformation matrices used to overlay images from the exact same tissue section (e.g., MALDI imaging mass spec, autofluorescence microscopy, MxIF, histological stains). | | -| qa_qc\/.* | ✓ | Directory containing QA and/or QC information. | | -| qa_qc\/resolution_report\/.* | ✓ | Directory containing the results of resolution tests and/or vendor preventative maintenance reports. | | -| qa_qc\/resolution_report\/resolution\.txt | | This file summarizes the results of resolution tests or vendor reports from preventative maintenance visits. | | -| qa_qc\/resolution_report\/[^\/]+\.pdf | | This file is a pdf from a vendor preventative maintenance visit or resolution check tool demonstrating resolution. This file may include illumination test results. | | -| qa_qc\/illumination_report\/.* | ✓ | Directory containing the results of illumination tests and/or vendor preventative maintenance reports. | | -| qa_qc\/illumination_report\/illumination.txt | | This file summarizes the results of illumination tests or vendor reports from preventative maintenance visits. | | -| qa_qc\/illumination_report\/[^\/]+\.pdf | | This file is a pdf from a vendor preventative maintenance visit or illumination check tool demonstrating illumination intensity. | | -| lab_processed\/annotations\/.* | | Directory containing segmentation masks. | | -| lab_processed\/annotations\/[^\/]+\.segmentations\.ome\.tiff | | The segmentation masks should be stored as multi-channel pyramidal OME TIFF bitmasks with one channel per mask, where a single mask contains all instances of a type of object (e.g., all cells, a class of FTUs, etc). The class of objects contained in the mask is documented in the segmentation-masks.csv file. Each individual object in a mask should be represented by a unique integer pixel value starting at 1, with 0 meaning background (e.g., all pixels belonging to the first instance of a T-cell have a value of 1, the pixels for the second instance of a T-cell have a value of 2, etc). The pixel values should be unique within a mask. FTUs and other structural elements should be captured the same way as cells with segmentation masks and the appropriate channel feature definitions. | lab_processed\/annotations\/.* | -| lab_processed\/annotations\/segmentation-masks\.csv | | This file contains details about each mask, with one row per mask. Each column in this file contains details describing the mask (e.g., channel number, mask name, ontological ID, etc). Each mask is stored as a channel in the segmentations.ome.tiff file and the mask name should be ontologically based and linked to the ASCT+B table where possible. The number of rows in this file should equal the number of channels in the segmentations.ome.tiff. For example, one row in this file would ontologically describe cells, if the segmentations.ome.tiff file contained a mask of all cells. A minimum set of fields (required and optional) is included below. If multiple segmentations.ome.tiff files are used, this segmentation-masks.csv file should document the masks across all of the OME TIFF files. | lab_processed\/annotations\/.* | -| lab_processed\/annotations\/[^\/]+-objects\.csv | | This is a matrix where each row describes an individual object (e.g., one row per cell in the case where a mask contains all cells) and columns are features (i.e., object type, marker intensity, classification strategies, etc). One file should be created per mask with the name of the mask prepended to the file name. For example, if there’s a cell segmentation map called “cells” then you would include a file called “cells-objects.csv” and that file would contain one row per cell in the “cells” mask and one column per feature, such as marker intensity and/or cell type. A minimum set of fields (required and optional) is included below. | lab_processed\/annotations\/.* | -| lab_processed\/annotations\/[^\/]+\.geojson | | A GeoJSON file(s) containing the geometries of each object within a mask. For example, if the mask contains multiple FTUs, multiple cells, etc, each of the objects in the mask would be independently documented in the GeoJSON file. There would be a single GeoJSON file per mask and the name of the file should be the name of the mask. If this file is generated by QuPath, the coordinates will be in pixel units with the origin (0, 0) as the top left corner of the full-resolution image. | lab_processed\/annotations\/.* | -| lab_processed\/annotations\/tissue-boundary\.geojson | | **[QA/QC]** If the boundaries of the tissue have been identified (e.g., by manual efforts), then the boundary geometry can be included as a GeoJSON file named “tissue-boundary.geojson”. | lab_processed\/annotations\/.* | -| lab_processed\/annotations\/regions-of-concern\.csv | | This file and the associated GeoJSON file can be used to denote any regions in the image that may contain QA/QC concerns. For example, if there are folds in the tissue, the region of the fold can be highlighted. This file should contain one row per region and include documentation about the region and why it's being flagged. | lab_processed\/annotations\/.* | -| lab_processed\/annotations\/regions-of-concern\.geojson | | This file and the associated CSV file can be used to denote any regions in the image that may contain QA/QC concerns. For example, if there are folds in the tissue, the region of the fold can be highlighted. This file should contain the geometric coordinates of each region being flagged. | lab_processed\/annotations\/.* | - -Version 0.0 +Version 0.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/atacseq/current/index.md b/docs/atacseq/current/index.md index 81d4f39b2..26e44150f 100644 --- a/docs/atacseq/current/index.md +++ b/docs/atacseq/current/index.md @@ -30,7 +30,7 @@ For additional documentation on this dataset type, please visit [here](https://d
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/bodyct/current/index.md b/docs/bodyct/current/index.md index a0c680fcd..1adace721 100644 --- a/docs/bodyct/current/index.md +++ b/docs/bodyct/current/index.md @@ -23,3 +23,11 @@ Excel and TSV templates for this schema will be available when the draft next-ge
+## Directory schemas +Version 2.0 (use this one) + +| pattern | required? | description | +| --- | --- | --- | +| TODO | ✓ | Directory structure not yet specified. | +| extras\/.* | ✓ | Folder for general lab-specific files related to the dataset. [Exists in all assays] | + diff --git a/docs/bodyct/deprecated/index.md b/docs/bodyct/deprecated/index.md index 75b632416..d7382ec55 100644 --- a/docs/bodyct/deprecated/index.md +++ b/docs/bodyct/deprecated/index.md @@ -370,11 +370,3 @@ Relative path to file or directory with instrument data. Downstream processing w
-## Directory schemas -Version 2.0 (use this one) - -| pattern | required? | description | -| --- | --- | --- | -| TODO | ✓ | Directory structure not yet specified. | -| extras\/.* | ✓ | Folder for general lab-specific files related to the dataset. [Exists in all assays] | - diff --git a/docs/bulkatacseq/deprecated/index.md b/docs/bulkatacseq/deprecated/index.md index 8218bae53..9570746ec 100644 --- a/docs/bulkatacseq/deprecated/index.md +++ b/docs/bulkatacseq/deprecated/index.md @@ -806,7 +806,7 @@ Relative path to file or directory with instrument data. Downstream processing w
## Directory schemas -Version 0 (use this one) +Version 0.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/bulkrnaseq/deprecated/index.md b/docs/bulkrnaseq/deprecated/index.md index 40b4dab36..ff26834cc 100644 --- a/docs/bulkrnaseq/deprecated/index.md +++ b/docs/bulkrnaseq/deprecated/index.md @@ -727,7 +727,7 @@ Relative path to file or directory with instrument data. Downstream processing w
## Directory schemas -Version 0 (use this one) +Version 0.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/celldive/current/index.md b/docs/celldive/current/index.md index 4a8037297..d83258c0a 100644 --- a/docs/celldive/current/index.md +++ b/docs/celldive/current/index.md @@ -28,5 +28,5 @@ Related files:
## Directory schemas -Version 2 (use this one) (draft - submission of data prepared using this schema will be supported by Sept. 30) +Version 2.0 (use this one) (draft - submission of data prepared using this schema will be supported by Sept. 30) diff --git a/docs/celldive/deprecated/index.md b/docs/celldive/deprecated/index.md index 09ff4111e..6e8cd5d2e 100644 --- a/docs/celldive/deprecated/index.md +++ b/docs/celldive/deprecated/index.md @@ -585,7 +585,7 @@ Relative path to file or directory with instrument data. Downstream processing w
## Directory schemas -Version 0 (use this one) +Version 0.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/cems/deprecated/index.md b/docs/cems/deprecated/index.md index 4c3f1e0c8..1b9d7bc7f 100644 --- a/docs/cems/deprecated/index.md +++ b/docs/cems/deprecated/index.md @@ -488,7 +488,7 @@ Relative path to file or directory with instrument data. Downstream processing w
## Directory schemas -Version 0 (use this one) +Version 0.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/codex/current/index.md b/docs/codex/current/index.md index 8b35e2c53..572220d31 100644 --- a/docs/codex/current/index.md +++ b/docs/codex/current/index.md @@ -28,7 +28,7 @@ Related files:
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | dependent on | | --- | --- | --- | --- | diff --git a/docs/codex/deprecated/index.md b/docs/codex/deprecated/index.md index 24b9c20e8..8de720d4d 100644 --- a/docs/codex/deprecated/index.md +++ b/docs/codex/deprecated/index.md @@ -650,7 +650,7 @@ Relative path to file or directory with instrument data. Downstream processing w
## Directory schemas -Version 1-with-dataset-json (use this one) +Version 1.1 (use this one) | pattern | required? | description | | --- | --- | --- | @@ -671,7 +671,7 @@ Relative path to file or directory with instrument data. Downstream processing w | extras/dir-schema-v1-with-dataset-json | ✓ | Empty file whose presence indicates the version of the directory schema in use | | extras\/.* | | Folder for general lab-specific files related to the dataset. [Exists in all assays] | -Version 0 +Version 0.0 | pattern | required? | description | | --- | --- | --- | diff --git a/docs/confocal/current/index.md b/docs/confocal/current/index.md index f6713e30f..4e250799d 100644 --- a/docs/confocal/current/index.md +++ b/docs/confocal/current/index.md @@ -28,7 +28,7 @@ Related files:
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | dependent on | | --- | --- | --- | --- | diff --git a/docs/cosmx/current/index.md b/docs/cosmx/current/index.md index 44f129334..94b1c0bc8 100644 --- a/docs/cosmx/current/index.md +++ b/docs/cosmx/current/index.md @@ -24,5 +24,5 @@ Excel and TSV templates for this schema will be available when the draft next-ge
## Directory schemas -Version 2 (use this one) (draft - submission of data prepared using this schema will be supported by Sept. 30) +Version 2.0 (use this one) (draft - submission of data prepared using this schema will be supported by Sept. 30) diff --git a/docs/cycif/current/index.md b/docs/cycif/current/index.md index 13afe9a33..a35d07e3a 100644 --- a/docs/cycif/current/index.md +++ b/docs/cycif/current/index.md @@ -28,5 +28,5 @@ Related files:
## Directory schemas -Version 2 (use this one) (draft - submission of data prepared using this schema will be supported by Sept. 30) +Version 2.0 (use this one) (draft - submission of data prepared using this schema will be supported by Sept. 30) diff --git a/docs/dbit/current/index.md b/docs/dbit/current/index.md index ac5f5b547..9d1226f05 100644 --- a/docs/dbit/current/index.md +++ b/docs/dbit/current/index.md @@ -24,5 +24,5 @@ Excel and TSV templates for this schema will be available when the draft next-ge
## Directory schemas -Version 2 (use this one) (draft - submission of data prepared using this schema will be supported by Sept. 30) +Version 2.0 (use this one) (draft - submission of data prepared using this schema will be supported by Sept. 30) diff --git a/docs/desi/current/index.md b/docs/desi/current/index.md index 8cd478fd3..7d7224334 100644 --- a/docs/desi/current/index.md +++ b/docs/desi/current/index.md @@ -28,7 +28,7 @@ Related files:
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/enhanced-srs/current/index.md b/docs/enhanced-srs/current/index.md index 4931f5c4e..b947f25e8 100644 --- a/docs/enhanced-srs/current/index.md +++ b/docs/enhanced-srs/current/index.md @@ -28,7 +28,7 @@ Related files:
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | dependent on | | --- | --- | --- | --- | diff --git a/docs/gcms/deprecated/index.md b/docs/gcms/deprecated/index.md index d7c043edf..ac5d6b8e4 100644 --- a/docs/gcms/deprecated/index.md +++ b/docs/gcms/deprecated/index.md @@ -518,7 +518,7 @@ Relative path to file or directory with instrument data. Downstream processing w
## Directory schemas -Version 0 (use this one) +Version 0.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/geomx-ncounter/current/index.md b/docs/geomx-ncounter/current/index.md index b82ec47ed..b765388ad 100644 --- a/docs/geomx-ncounter/current/index.md +++ b/docs/geomx-ncounter/current/index.md @@ -28,5 +28,5 @@ Related files:
## Directory schemas -Version 2 (use this one) (draft - submission of data prepared using this schema will be supported by Sept. 30) +Version 2.0 (use this one) (draft - submission of data prepared using this schema will be supported by Sept. 30) diff --git a/docs/geomx-ngs/current/index.md b/docs/geomx-ngs/current/index.md index 8df74e45d..7756542df 100644 --- a/docs/geomx-ngs/current/index.md +++ b/docs/geomx-ngs/current/index.md @@ -28,7 +28,7 @@ Related files:
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | dependent on | | --- | --- | --- | --- | diff --git a/docs/hifi-slide/current/index.md b/docs/hifi-slide/current/index.md index 2c96fb3a4..0815c6598 100644 --- a/docs/hifi-slide/current/index.md +++ b/docs/hifi-slide/current/index.md @@ -28,7 +28,7 @@ Related files:
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/histology/current/index.md b/docs/histology/current/index.md index 00c48f36e..fdc9d6756 100644 --- a/docs/histology/current/index.md +++ b/docs/histology/current/index.md @@ -28,7 +28,7 @@ Related files:
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | dependent on | | --- | --- | --- | --- | diff --git a/docs/imc-2d/current/index.md b/docs/imc-2d/current/index.md index 80f208c8b..b303e36eb 100644 --- a/docs/imc-2d/current/index.md +++ b/docs/imc-2d/current/index.md @@ -28,7 +28,7 @@ Related files:
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | dependent on | | --- | --- | --- | --- | diff --git a/docs/imc/deprecated/index.md b/docs/imc/deprecated/index.md index 9bbc4e7e9..86b846114 100644 --- a/docs/imc/deprecated/index.md +++ b/docs/imc/deprecated/index.md @@ -819,7 +819,7 @@ Relative path to file or directory with instrument data. Downstream processing w
## Directory schemas -Version 0 (use this one) +Version 0.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/imc3d/deprecated/index.md b/docs/imc3d/deprecated/index.md index 05fcb58c2..07ea89ec2 100644 --- a/docs/imc3d/deprecated/index.md +++ b/docs/imc3d/deprecated/index.md @@ -788,7 +788,7 @@ Relative path to file or directory with instrument data. Downstream processing w
## Directory schemas -Version 0 (use this one) +Version 0.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/ims/deprecated/index.md b/docs/ims/deprecated/index.md index 9f85c36cb..f534006e1 100644 --- a/docs/ims/deprecated/index.md +++ b/docs/ims/deprecated/index.md @@ -1010,7 +1010,7 @@ Relative path to file or directory with instrument data. Downstream processing w
## Directory schemas -Version 0 (use this one) +Version 0.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/lcms/current/index.md b/docs/lcms/current/index.md index 491ed17a1..a72cb1d52 100644 --- a/docs/lcms/current/index.md +++ b/docs/lcms/current/index.md @@ -28,7 +28,7 @@ Related files:
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/lcms/deprecated/index.md b/docs/lcms/deprecated/index.md index 2b384a612..e6e549a0c 100644 --- a/docs/lcms/deprecated/index.md +++ b/docs/lcms/deprecated/index.md @@ -1899,7 +1899,7 @@ Relative path to file or directory with instrument data. Downstream processing w
## Directory schemas -Version 0 (use this one) +Version 0.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/lightsheet/current/index.md b/docs/lightsheet/current/index.md index f73e1cb10..8bd8c9f21 100644 --- a/docs/lightsheet/current/index.md +++ b/docs/lightsheet/current/index.md @@ -28,7 +28,7 @@ Related files:
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | dependent on | | --- | --- | --- | --- | diff --git a/docs/lightsheet/deprecated/index.md b/docs/lightsheet/deprecated/index.md index 4093e6494..545d574ab 100644 --- a/docs/lightsheet/deprecated/index.md +++ b/docs/lightsheet/deprecated/index.md @@ -828,7 +828,7 @@ Relative path to file or directory with instrument data. Downstream processing w
## Directory schemas -Version 1 (use this one) +Version 1.0 (use this one) | pattern | required? | description | | --- | --- | --- | @@ -852,7 +852,7 @@ Relative path to file or directory with instrument data. Downstream processing w | Level3/Merged/MergedChannel[^/]+/[^/]+\.ome.tiff | | Merged image file. | | extras\/.* | | Folder for general lab-specific files related to the dataset. [Exists in all assays] | -Version 0 +Version 0.0 | pattern | required? | description | | --- | --- | --- | diff --git a/docs/maldi/current/index.md b/docs/maldi/current/index.md index c1d23f1e9..8bd2cf839 100644 --- a/docs/maldi/current/index.md +++ b/docs/maldi/current/index.md @@ -28,7 +28,7 @@ Related files:
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/mc/current/index.md b/docs/mc/current/index.md index 85684127e..9606b89b3 100644 --- a/docs/mc/current/index.md +++ b/docs/mc/current/index.md @@ -24,5 +24,5 @@ Excel and TSV templates for this schema will be available when the draft next-ge
## Directory schemas -Version 2 (use this one) (draft - submission of data prepared using this schema will be supported by Sept. 30) +Version 2.0 (use this one) (draft - submission of data prepared using this schema will be supported by Sept. 30) diff --git a/docs/merfish/current/index.md b/docs/merfish/current/index.md index 03dc359dc..160396327 100644 --- a/docs/merfish/current/index.md +++ b/docs/merfish/current/index.md @@ -24,7 +24,7 @@ Excel and TSV templates for this schema will be available when the draft next-ge
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/mibi/current/index.md b/docs/mibi/current/index.md index c501fd76b..5da6052ad 100644 --- a/docs/mibi/current/index.md +++ b/docs/mibi/current/index.md @@ -28,7 +28,7 @@ Related files:
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/mibi/deprecated/index.md b/docs/mibi/deprecated/index.md index 195eed29a..e374570ab 100644 --- a/docs/mibi/deprecated/index.md +++ b/docs/mibi/deprecated/index.md @@ -581,7 +581,7 @@ Relative path to file or directory with instrument data. Downstream processing w
## Directory schemas -Version 0 (use this one) +Version 0.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/microct/current/index.md b/docs/microct/current/index.md index 9bcbe98a9..6006318d2 100644 --- a/docs/microct/current/index.md +++ b/docs/microct/current/index.md @@ -24,7 +24,7 @@ Excel and TSV templates for this schema will be available when the draft next-ge
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/mri/current/index.md b/docs/mri/current/index.md index 61efa38d0..efe0d13bf 100644 --- a/docs/mri/current/index.md +++ b/docs/mri/current/index.md @@ -24,7 +24,7 @@ Excel and TSV templates for this schema will be available when the draft next-ge
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/mxif/deprecated/index.md b/docs/mxif/deprecated/index.md index af94d5ead..45546ecb4 100644 --- a/docs/mxif/deprecated/index.md +++ b/docs/mxif/deprecated/index.md @@ -567,7 +567,7 @@ Relative path to file or directory with instrument data. Downstream processing w
## Directory schemas -Version 0 (use this one) +Version 0.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/nano-splits/current/index.md b/docs/nano-splits/current/index.md index d13d24e20..54b0a08b5 100644 --- a/docs/nano-splits/current/index.md +++ b/docs/nano-splits/current/index.md @@ -28,5 +28,5 @@ Related files:
## Directory schemas -Version 2 (use this one) (draft - submission of data prepared using this schema will be supported by Sept. 30) +Version 2.0 (use this one) (draft - submission of data prepared using this schema will be supported by Sept. 30) diff --git a/docs/nano/deprecated/index.md b/docs/nano/deprecated/index.md index 2f6eb045f..9708782d0 100644 --- a/docs/nano/deprecated/index.md +++ b/docs/nano/deprecated/index.md @@ -564,7 +564,7 @@ Relative path to file or directory with instrument data. Downstream processing w
## Directory schemas -Version 0 (use this one) +Version 0.0 (use this one)
Deprecated | pattern | required? | description | diff --git a/docs/oct/current/index.md b/docs/oct/current/index.md index b43af7b28..dac51265e 100644 --- a/docs/oct/current/index.md +++ b/docs/oct/current/index.md @@ -24,7 +24,7 @@ Excel and TSV templates for this schema will be available when the draft next-ge
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/phenocycler/current/index.md b/docs/phenocycler/current/index.md index 712f07576..9dd7ecb45 100644 --- a/docs/phenocycler/current/index.md +++ b/docs/phenocycler/current/index.md @@ -28,7 +28,7 @@ Related files:
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/publication/current/index.md b/docs/publication/current/index.md index 7d398c92a..ab7e0f5ce 100644 --- a/docs/publication/current/index.md +++ b/docs/publication/current/index.md @@ -24,7 +24,7 @@ Excel and TSV templates for this schema will be available when the draft next-ge
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/publication/deprecated/index.md b/docs/publication/deprecated/index.md index 6572e563b..50b59ffdf 100644 --- a/docs/publication/deprecated/index.md +++ b/docs/publication/deprecated/index.md @@ -72,7 +72,7 @@ Relative path to file or directory with instrument data. Downstream processing w
## Directory schemas -Version 0 (use this one) +Version 0.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/rnaseq-with-probes/current/index.md b/docs/rnaseq-with-probes/current/index.md index 4eb58822c..859fcbb63 100644 --- a/docs/rnaseq-with-probes/current/index.md +++ b/docs/rnaseq-with-probes/current/index.md @@ -28,7 +28,7 @@ For additional documentation on this dataset type, please visit [here](https://d
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/rnaseq/current/index.md b/docs/rnaseq/current/index.md index ef9e54cc0..bba6f91d0 100644 --- a/docs/rnaseq/current/index.md +++ b/docs/rnaseq/current/index.md @@ -30,7 +30,7 @@ For additional documentation on this dataset type, please visit [here](https://d
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/scatacseq/deprecated/index.md b/docs/scatacseq/deprecated/index.md index 737dcc3b6..deb4fb5b4 100644 --- a/docs/scatacseq/deprecated/index.md +++ b/docs/scatacseq/deprecated/index.md @@ -886,7 +886,7 @@ Relative path to file or directory with instrument data. Downstream processing w ## Directory schemas The HIVE will process each dataset with [scATACseq Pipeline 1.4.3](https://github.com/hubmapconsortium/sc-atac-seq-pipeline/releases/tag/1.4.3). -Version 0 (use this one) +Version 0.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/scrnaseq-hca/deprecated/index.md b/docs/scrnaseq-hca/deprecated/index.md index 14685e8c8..9fc7a72b4 100644 --- a/docs/scrnaseq-hca/deprecated/index.md +++ b/docs/scrnaseq-hca/deprecated/index.md @@ -454,7 +454,7 @@ Relative path to file or directory with instrument data. Downstream processing w
## Directory schemas -Version 0 (use this one) +Version 0.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/scrnaseq/deprecated/index.md b/docs/scrnaseq/deprecated/index.md index ee934f658..41cef8f39 100644 --- a/docs/scrnaseq/deprecated/index.md +++ b/docs/scrnaseq/deprecated/index.md @@ -1678,7 +1678,7 @@ Relative path to file or directory with instrument data. Downstream processing w
## Directory schemas -Version 0 (use this one) +Version 0.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/second-harmonic-generation/current/index.md b/docs/second-harmonic-generation/current/index.md index ab76d620f..85be05c60 100644 --- a/docs/second-harmonic-generation/current/index.md +++ b/docs/second-harmonic-generation/current/index.md @@ -28,7 +28,7 @@ Related files:
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | dependent on | | --- | --- | --- | --- | diff --git a/docs/segmentation-mask/current/index.md b/docs/segmentation-mask/current/index.md index e30d3b5b9..5f75354f8 100644 --- a/docs/segmentation-mask/current/index.md +++ b/docs/segmentation-mask/current/index.md @@ -24,5 +24,5 @@ For additional documentation on Segmentation Masks, please visit [here](https://
## Directory schemas -Version 2 (use this one) (draft - submission of data prepared using this schema will be supported by Sept. 30) +Version 2.0 (use this one) (draft - submission of data prepared using this schema will be supported by Sept. 30) diff --git a/docs/seqfish/deprecated/index.md b/docs/seqfish/deprecated/index.md index aa9ffee1f..e7d9ba3d3 100644 --- a/docs/seqfish/deprecated/index.md +++ b/docs/seqfish/deprecated/index.md @@ -679,7 +679,7 @@ Relative path to file or directory with instrument data. Downstream processing w
## Directory schemas -Version 0 (use this one) +Version 0.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/sims/current/index.md b/docs/sims/current/index.md index fb3cbefeb..7e2a65173 100644 --- a/docs/sims/current/index.md +++ b/docs/sims/current/index.md @@ -28,7 +28,7 @@ Related files:
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/slideseq/deprecated/index.md b/docs/slideseq/deprecated/index.md index 5b6240b91..09ebbea86 100644 --- a/docs/slideseq/deprecated/index.md +++ b/docs/slideseq/deprecated/index.md @@ -709,7 +709,7 @@ Relative path to file or directory with instrument data. Downstream processing w
## Directory schemas -Version 0 (use this one) +Version 0.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/snareseq2/current/index.md b/docs/snareseq2/current/index.md index 7544daa70..35e82ec8d 100644 --- a/docs/snareseq2/current/index.md +++ b/docs/snareseq2/current/index.md @@ -28,7 +28,7 @@ Related files:
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/stained/deprecated/index.md b/docs/stained/deprecated/index.md index ac1edf01e..ae5e2e98c 100644 --- a/docs/stained/deprecated/index.md +++ b/docs/stained/deprecated/index.md @@ -546,7 +546,7 @@ Relative path to file or directory with instrument data. Downstream processing w
## Directory schemas -Version 1 (use this one) +Version 1.0 (use this one) | pattern | required? | description | | --- | --- | --- | @@ -556,7 +556,7 @@ Relative path to file or directory with instrument data. Downstream processing w | (rawMicroscopy|raw_microscopy)/[^/]+\.(scn|czi|tif|tiff) | ✓ | Raw microscope file for the experiment | | extras\/.* | | Folder for general lab-specific files related to the dataset. [Exists in all assays] | -Version 0 +Version 0.0 | pattern | required? | description | | --- | --- | --- | diff --git a/docs/thick-section-multiphoton-mxif/current/index.md b/docs/thick-section-multiphoton-mxif/current/index.md index 169791180..abf72ca2b 100644 --- a/docs/thick-section-multiphoton-mxif/current/index.md +++ b/docs/thick-section-multiphoton-mxif/current/index.md @@ -28,7 +28,7 @@ Related files:
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | dependent on | | --- | --- | --- | --- | diff --git a/docs/ultrasound/current/index.md b/docs/ultrasound/current/index.md index 1e3914fef..d8187950f 100644 --- a/docs/ultrasound/current/index.md +++ b/docs/ultrasound/current/index.md @@ -24,7 +24,7 @@ Excel and TSV templates for this schema will be available when the draft next-ge
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/visium-no-probes/current/index.md b/docs/visium-no-probes/current/index.md index c5934aa52..1adfd8b73 100644 --- a/docs/visium-no-probes/current/index.md +++ b/docs/visium-no-probes/current/index.md @@ -30,7 +30,7 @@ REQUIRED - For this assay, you must also prepare and submit two additional metad
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | dependent on | | --- | --- | --- | --- | diff --git a/docs/visium-with-probes/current/index.md b/docs/visium-with-probes/current/index.md index 5a468b05c..88461e8b0 100644 --- a/docs/visium-with-probes/current/index.md +++ b/docs/visium-with-probes/current/index.md @@ -28,7 +28,7 @@ REQUIRED - For this assay, you must also prepare and submit two additional metad
## Directory schemas -Version 2 (use this one) +Version 2.0 (use this one) | pattern | required? | description | dependent on | | --- | --- | --- | --- | diff --git a/docs/wgs/deprecated/index.md b/docs/wgs/deprecated/index.md index f5d6b80ad..7f1f6f106 100644 --- a/docs/wgs/deprecated/index.md +++ b/docs/wgs/deprecated/index.md @@ -645,7 +645,7 @@ Relative path to file or directory with instrument data. Downstream processing w
## Directory schemas -Version 0 (use this one) +Version 0.0 (use this one) | pattern | required? | description | | --- | --- | --- | diff --git a/docs/xenium/current/index.md b/docs/xenium/current/index.md index 1c00ae458..998c4b412 100644 --- a/docs/xenium/current/index.md +++ b/docs/xenium/current/index.md @@ -24,5 +24,5 @@ Excel and TSV templates for this schema will be available when the draft next-ge
## Directory schemas -Version 2 (use this one) (draft - submission of data prepared using this schema will be supported by Sept. 30) +Version 2.0 (use this one) (draft - submission of data prepared using this schema will be supported by Sept. 30) diff --git a/src/generate_docs.py b/src/generate_docs.py index 86e1f9c57..5c88482ec 100755 --- a/src/generate_docs.py +++ b/src/generate_docs.py @@ -77,7 +77,10 @@ def main(): current["metadata"][v] = schema for v, schema in directory_schemas.items(): - if not v.isdigit() or int(v) < 2: + try: + a = float(v) + assert a >= 2.0 + except AssertionError: deprecated["directories"][v] = schema else: current["directories"][v] = schema diff --git a/src/ingest_validation_tools/directory-schemas/codex-v1-with-dataset-json.yaml b/src/ingest_validation_tools/directory-schemas/codex-v1.1.yaml similarity index 100% rename from src/ingest_validation_tools/directory-schemas/codex-v1-with-dataset-json.yaml rename to src/ingest_validation_tools/directory-schemas/codex-v1.1.yaml diff --git a/src/ingest_validation_tools/docs_utils.py b/src/ingest_validation_tools/docs_utils.py index 4eb8bfb88..b05ad8ea1 100644 --- a/src/ingest_validation_tools/docs_utils.py +++ b/src/ingest_validation_tools/docs_utils.py @@ -118,9 +118,7 @@ def _get_portal_names_md(assay_types): return f'In the portal: {" / ".join(links)}' -def generate_readme_md( - table_schemas, pipeline_infos, directory_schemas, schema_name, is_assay=True -): +def generate_readme_md(table_schemas, pipeline_infos, directory_schemas, schema_name, is_assay=True): int_keys = [int(k) for k in table_schemas.keys()] max_version = max(int_keys) min_version = min(int_keys) diff --git a/tests/test-generate-docs.sh b/tests/test-generate-docs.sh index 01b7152ae..40fe69ee9 100755 --- a/tests/test-generate-docs.sh +++ b/tests/test-generate-docs.sh @@ -67,13 +67,13 @@ for TYPE in $(ls -d docs/*); do if [ -e $REAL_DEST/current ] && [ -e $TEST_DEST/current ]; then diff -r $REAL_DEST/current $TEST_DEST/current --exclude="*.tsv" --exclude="*.xlsx" \ || die "Update needed: $REAL_CMD - Or:" 'for D in `ls -d docs/*/`; do D=`basename $D`; [ -e docs/$D/*.tsv ] || continue; src/generate_docs.py $D docs/$D; done' + Or:" 'for D in `ls -d docs/*/`; do D=`basename $D`; src/generate_docs.py $D docs/$D; done' fi if [ -e $REAL_DEST/deprecated ] && [ -e $TEST_DEST/deprecated ]; then diff -r $REAL_DEST/deprecated $TEST_DEST/deprecated --exclude="*.tsv" --exclude="*.xlsx" \ || die "Update needed: $REAL_CMD - Or:" 'for D in `ls -d docs/*/`; do D=`basename $D`; [ -e docs/$D/*.tsv ] || continue; src/generate_docs.py $D docs/$D; done' + Or:" 'for D in `ls -d docs/*/`; do D=`basename $D`; src/generate_docs.py $D docs/$D; done' fi rm -rf $TEST_DEST From 89862cc27110688da3b8b733a2a474aa4a8e1757 Mon Sep 17 00:00:00 2001 From: Gesina Phillips Date: Mon, 25 Mar 2024 13:54:12 -0400 Subject: [PATCH 10/16] updating readmes, all codex broken because assayclassifier still returns codex-v1-with-dataset-json --- examples/dataset-examples/good-cedar-histology/README.md | 2 +- .../good-cedar-multi-assay-visium/README.md | 6 +++--- examples/dataset-examples/good-maldiims/README.md | 2 +- .../dataset-examples/good-scatacseq-metadata-v0/README.md | 2 +- .../dataset-examples/good-scatacseq-metadata-v1/README.md | 2 +- examples/dataset-iec-examples/good-example/README.md | 2 +- src/ingest_validation_tools/upload.py | 3 +++ 7 files changed, 11 insertions(+), 8 deletions(-) diff --git a/examples/dataset-examples/good-cedar-histology/README.md b/examples/dataset-examples/good-cedar-histology/README.md index 12aa61bfc..91a693c18 100644 --- a/examples/dataset-examples/good-cedar-histology/README.md +++ b/examples/dataset-examples/good-cedar-histology/README.md @@ -7,6 +7,6 @@ TSVs: validated-histology-metadata.tsv: Schema: h-and-e-v2 Metadata schema version: '2' - Directory schema version: histology-v2 + Directory schema version: histology-v2.0 ``` \ No newline at end of file diff --git a/examples/dataset-examples/good-cedar-multi-assay-visium/README.md b/examples/dataset-examples/good-cedar-multi-assay-visium/README.md index 8eb325868..f7aea6138 100644 --- a/examples/dataset-examples/good-cedar-multi-assay-visium/README.md +++ b/examples/dataset-examples/good-cedar-multi-assay-visium/README.md @@ -7,14 +7,14 @@ TSVs: good-visium-assay-metadata.tsv: Schema: visium-no-probes-v2 Metadata schema version: '2' - Directory schema version: visium-no-probes-v2 + Directory schema version: visium-no-probes-v2.0 good-visium-histology-metadata.tsv: Schema: h-and-e-v2 Metadata schema version: '2' - Directory schema version: histology-v2 + Directory schema version: visium-no-probes-v2.0 good-visium-rnaseq-metadata.tsv: Schema: scRNAseq-10xGenomics-v3-v2 Metadata schema version: '2' - Directory schema version: rnaseq-v2 + Directory schema version: visium-no-probes-v2.0 ``` \ No newline at end of file diff --git a/examples/dataset-examples/good-maldiims/README.md b/examples/dataset-examples/good-maldiims/README.md index 15dd152bb..a738bb250 100644 --- a/examples/dataset-examples/good-maldiims/README.md +++ b/examples/dataset-examples/good-maldiims/README.md @@ -7,6 +7,6 @@ TSVs: metadata.tsv: Schema: ims-v0 Metadata schema version: ims-v0 - Directory schema version: ims-v0 + Directory schema version: ims-v0.0 ``` \ No newline at end of file diff --git a/examples/dataset-examples/good-scatacseq-metadata-v0/README.md b/examples/dataset-examples/good-scatacseq-metadata-v0/README.md index 52abc4ede..0a3a1e0cc 100644 --- a/examples/dataset-examples/good-scatacseq-metadata-v0/README.md +++ b/examples/dataset-examples/good-scatacseq-metadata-v0/README.md @@ -7,6 +7,6 @@ TSVs: metadata.tsv: Schema: scatacseq-v0 Metadata schema version: scatacseq-v0 - Directory schema version: scatacseq-v0 + Directory schema version: scatacseq-v0.0 ``` \ No newline at end of file diff --git a/examples/dataset-examples/good-scatacseq-metadata-v1/README.md b/examples/dataset-examples/good-scatacseq-metadata-v1/README.md index 37ab71405..20c1b4f1d 100644 --- a/examples/dataset-examples/good-scatacseq-metadata-v1/README.md +++ b/examples/dataset-examples/good-scatacseq-metadata-v1/README.md @@ -7,6 +7,6 @@ TSVs: metadata.tsv: Schema: scatacseq-v1 Metadata schema version: scatacseq-v1 - Directory schema version: scatacseq-v0 + Directory schema version: scatacseq-v0.0 ``` \ No newline at end of file diff --git a/examples/dataset-iec-examples/good-example/README.md b/examples/dataset-iec-examples/good-example/README.md index cffa46dbd..d0d1a76ec 100644 --- a/examples/dataset-iec-examples/good-example/README.md +++ b/examples/dataset-iec-examples/good-example/README.md @@ -7,6 +7,6 @@ TSVs: metadata.tsv: Schema: scatacseq-v0 Metadata schema version: scatacseq-v0 - Directory schema version: scatacseq-v0 + Directory schema version: scatacseq-v0.0 ``` \ No newline at end of file diff --git a/src/ingest_validation_tools/upload.py b/src/ingest_validation_tools/upload.py index 109a01e86..15e1a4b96 100644 --- a/src/ingest_validation_tools/upload.py +++ b/src/ingest_validation_tools/upload.py @@ -263,6 +263,9 @@ def _get_directory_errors(self) -> dict: dir_errors = self._check_data_path( self.multi_parent, Path(self.multi_parent.path), data_path ) + for schema in self.effective_tsv_paths.values(): + if not schema.dataset_type == self.multi_parent.dataset_type: + schema.dir_schema = self.multi_parent.dir_schema if dir_errors: errors.update(dir_errors) else: From 288ad64515830389a0b0272a80586ce61aa863ed Mon Sep 17 00:00:00 2001 From: Gesina Phillips Date: Mon, 25 Mar 2024 14:37:06 -0400 Subject: [PATCH 11/16] testing around codex --- .../good-codex-akoya-directory-v1.json/README.md | 12 ++++++++++++ .../fixtures.json | 0 .../upload/antibodies.tsv | 0 .../upload/contributors.tsv | 0 .../upload/dataset-1/drv_something/channelNames.txt | 0 .../upload/dataset-1/drv_something/config.txt | 0 .../upload/dataset-1/drv_something/experiment.json | 0 .../dataset-1/drv_something/exposure_times.txt | 0 .../dataset-1/drv_something/originally-ppt.pdf | Bin .../processed_xyz/anything-goes-here.txt | 0 .../dataset-1/drv_something/segmentation.json | 0 .../extras/dir-schema-v1-with-dataset-json | 0 .../upload/dataset-1/src_something/channelnames.txt | 0 .../dataset-1/src_something/channelnames_report.csv | 0 .../src_something/cycX_regX_X/X_X_ZX_CHX.tif | 0 .../upload/dataset-1/src_something/dataset.json | 0 .../upload/dataset-1/src_something/experiment.json | 0 .../dataset-1/src_something/exposure_times.txt | 0 .../dataset-1/src_something/segmentation.json | 0 .../upload/name-just-needs-to-end-with-metadata.tsv | 0 src/ingest_validation_tools/schema_loader.py | 2 +- src/ingest_validation_tools/upload.py | 2 +- src/ingest_validation_tools/validation_utils.py | 2 +- 23 files changed, 15 insertions(+), 3 deletions(-) create mode 100644 examples/dataset-examples/good-codex-akoya-directory-v1.json/README.md rename examples/dataset-examples/{good-codex-akoya-directory-v1-with-dataset.json => good-codex-akoya-directory-v1.json}/fixtures.json (100%) rename examples/dataset-examples/{good-codex-akoya-directory-v1-with-dataset.json => good-codex-akoya-directory-v1.json}/upload/antibodies.tsv (100%) rename examples/dataset-examples/{good-codex-akoya-directory-v1-with-dataset.json => good-codex-akoya-directory-v1.json}/upload/contributors.tsv (100%) rename examples/dataset-examples/{good-codex-akoya-directory-v1-with-dataset.json => good-codex-akoya-directory-v1.json}/upload/dataset-1/drv_something/channelNames.txt (100%) rename examples/dataset-examples/{good-codex-akoya-directory-v1-with-dataset.json => good-codex-akoya-directory-v1.json}/upload/dataset-1/drv_something/config.txt (100%) rename examples/dataset-examples/{good-codex-akoya-directory-v1-with-dataset.json => good-codex-akoya-directory-v1.json}/upload/dataset-1/drv_something/experiment.json (100%) rename examples/dataset-examples/{good-codex-akoya-directory-v1-with-dataset.json => good-codex-akoya-directory-v1.json}/upload/dataset-1/drv_something/exposure_times.txt (100%) rename examples/dataset-examples/{good-codex-akoya-directory-v1-with-dataset.json => good-codex-akoya-directory-v1.json}/upload/dataset-1/drv_something/originally-ppt.pdf (100%) rename examples/dataset-examples/{good-codex-akoya-directory-v1-with-dataset.json => good-codex-akoya-directory-v1.json}/upload/dataset-1/drv_something/processed_xyz/anything-goes-here.txt (100%) rename examples/dataset-examples/{good-codex-akoya-directory-v1-with-dataset.json => good-codex-akoya-directory-v1.json}/upload/dataset-1/drv_something/segmentation.json (100%) rename examples/dataset-examples/{good-codex-akoya-directory-v1-with-dataset.json => good-codex-akoya-directory-v1.json}/upload/dataset-1/extras/dir-schema-v1-with-dataset-json (100%) rename examples/dataset-examples/{good-codex-akoya-directory-v1-with-dataset.json => good-codex-akoya-directory-v1.json}/upload/dataset-1/src_something/channelnames.txt (100%) rename examples/dataset-examples/{good-codex-akoya-directory-v1-with-dataset.json => good-codex-akoya-directory-v1.json}/upload/dataset-1/src_something/channelnames_report.csv (100%) rename examples/dataset-examples/{good-codex-akoya-directory-v1-with-dataset.json => good-codex-akoya-directory-v1.json}/upload/dataset-1/src_something/cycX_regX_X/X_X_ZX_CHX.tif (100%) rename examples/dataset-examples/{good-codex-akoya-directory-v1-with-dataset.json => good-codex-akoya-directory-v1.json}/upload/dataset-1/src_something/dataset.json (100%) rename examples/dataset-examples/{good-codex-akoya-directory-v1-with-dataset.json => good-codex-akoya-directory-v1.json}/upload/dataset-1/src_something/experiment.json (100%) rename examples/dataset-examples/{good-codex-akoya-directory-v1-with-dataset.json => good-codex-akoya-directory-v1.json}/upload/dataset-1/src_something/exposure_times.txt (100%) rename examples/dataset-examples/{good-codex-akoya-directory-v1-with-dataset.json => good-codex-akoya-directory-v1.json}/upload/dataset-1/src_something/segmentation.json (100%) rename examples/dataset-examples/{good-codex-akoya-directory-v1-with-dataset.json => good-codex-akoya-directory-v1.json}/upload/name-just-needs-to-end-with-metadata.tsv (100%) diff --git a/examples/dataset-examples/good-codex-akoya-directory-v1.json/README.md b/examples/dataset-examples/good-codex-akoya-directory-v1.json/README.md new file mode 100644 index 000000000..928ab97ea --- /dev/null +++ b/examples/dataset-examples/good-codex-akoya-directory-v1.json/README.md @@ -0,0 +1,12 @@ +``` +No errors! +Time: WILL_CHANGE +Git version: WILL_CHANGE +Directory: examples/dataset-examples/good-codex-akoya-directory-v1.json/upload +TSVs: + name-just-needs-to-end-with-metadata.tsv: + Schema: codex-v1 + Metadata schema version: codex-v1 + Directory schema version: examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1 + +``` \ No newline at end of file diff --git a/examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/fixtures.json b/examples/dataset-examples/good-codex-akoya-directory-v1.json/fixtures.json similarity index 100% rename from examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/fixtures.json rename to examples/dataset-examples/good-codex-akoya-directory-v1.json/fixtures.json diff --git a/examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/antibodies.tsv b/examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/antibodies.tsv similarity index 100% rename from examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/antibodies.tsv rename to examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/antibodies.tsv diff --git a/examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/contributors.tsv b/examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/contributors.tsv similarity index 100% rename from examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/contributors.tsv rename to examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/contributors.tsv diff --git a/examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/drv_something/channelNames.txt b/examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/drv_something/channelNames.txt similarity index 100% rename from examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/drv_something/channelNames.txt rename to examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/drv_something/channelNames.txt diff --git a/examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/drv_something/config.txt b/examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/drv_something/config.txt similarity index 100% rename from examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/drv_something/config.txt rename to examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/drv_something/config.txt diff --git a/examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/drv_something/experiment.json b/examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/drv_something/experiment.json similarity index 100% rename from examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/drv_something/experiment.json rename to examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/drv_something/experiment.json diff --git a/examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/drv_something/exposure_times.txt b/examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/drv_something/exposure_times.txt similarity index 100% rename from examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/drv_something/exposure_times.txt rename to examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/drv_something/exposure_times.txt diff --git a/examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/drv_something/originally-ppt.pdf b/examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/drv_something/originally-ppt.pdf similarity index 100% rename from examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/drv_something/originally-ppt.pdf rename to examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/drv_something/originally-ppt.pdf diff --git a/examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/drv_something/processed_xyz/anything-goes-here.txt b/examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/drv_something/processed_xyz/anything-goes-here.txt similarity index 100% rename from examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/drv_something/processed_xyz/anything-goes-here.txt rename to examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/drv_something/processed_xyz/anything-goes-here.txt diff --git a/examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/drv_something/segmentation.json b/examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/drv_something/segmentation.json similarity index 100% rename from examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/drv_something/segmentation.json rename to examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/drv_something/segmentation.json diff --git a/examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/extras/dir-schema-v1-with-dataset-json b/examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/extras/dir-schema-v1-with-dataset-json similarity index 100% rename from examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/extras/dir-schema-v1-with-dataset-json rename to examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/extras/dir-schema-v1-with-dataset-json diff --git a/examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/src_something/channelnames.txt b/examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/src_something/channelnames.txt similarity index 100% rename from examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/src_something/channelnames.txt rename to examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/src_something/channelnames.txt diff --git a/examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/src_something/channelnames_report.csv b/examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/src_something/channelnames_report.csv similarity index 100% rename from examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/src_something/channelnames_report.csv rename to examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/src_something/channelnames_report.csv diff --git a/examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/src_something/cycX_regX_X/X_X_ZX_CHX.tif b/examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/src_something/cycX_regX_X/X_X_ZX_CHX.tif similarity index 100% rename from examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/src_something/cycX_regX_X/X_X_ZX_CHX.tif rename to examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/src_something/cycX_regX_X/X_X_ZX_CHX.tif diff --git a/examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/src_something/dataset.json b/examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/src_something/dataset.json similarity index 100% rename from examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/src_something/dataset.json rename to examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/src_something/dataset.json diff --git a/examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/src_something/experiment.json b/examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/src_something/experiment.json similarity index 100% rename from examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/src_something/experiment.json rename to examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/src_something/experiment.json diff --git a/examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/src_something/exposure_times.txt b/examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/src_something/exposure_times.txt similarity index 100% rename from examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/src_something/exposure_times.txt rename to examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/src_something/exposure_times.txt diff --git a/examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/src_something/segmentation.json b/examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/src_something/segmentation.json similarity index 100% rename from examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/dataset-1/src_something/segmentation.json rename to examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1/src_something/segmentation.json diff --git a/examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/name-just-needs-to-end-with-metadata.tsv b/examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/name-just-needs-to-end-with-metadata.tsv similarity index 100% rename from examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload/name-just-needs-to-end-with-metadata.tsv rename to examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/name-just-needs-to-end-with-metadata.tsv diff --git a/src/ingest_validation_tools/schema_loader.py b/src/ingest_validation_tools/schema_loader.py index 2191cc19e..75c019db3 100644 --- a/src/ingest_validation_tools/schema_loader.py +++ b/src/ingest_validation_tools/schema_loader.py @@ -263,7 +263,7 @@ def get_directory_schema( def get_possible_directory_schemas(dir_schema: str) -> Optional[Dict]: schemas = {} # this assumes that versions are numbered starting at x.0, no whole numbers - directory_schema_minor_versions = _directory_schemas_path.glob(f"{dir_schema}*.yaml") + directory_schema_minor_versions = list(_directory_schemas_path.glob(f"{dir_schema}*.yaml")) if not directory_schema_minor_versions: return None for directory_schema_path in directory_schema_minor_versions: diff --git a/src/ingest_validation_tools/upload.py b/src/ingest_validation_tools/upload.py index 15e1a4b96..f5f3b74a2 100644 --- a/src/ingest_validation_tools/upload.py +++ b/src/ingest_validation_tools/upload.py @@ -657,7 +657,7 @@ def _check_data_path( ).popitem() if type(ref_errors[1]) is list: errors[ - f"{str(metadata_path)}, column 'data_path', value '{path_value}' (as {Path(ref_errors[0]).stem})" + f"{str(metadata_path)}, column 'data_path', value '{path_value}' (as {ref_errors[0]})" ] = ref_errors[1] schema_version.dir_schema = ref_errors[0] return errors diff --git a/src/ingest_validation_tools/validation_utils.py b/src/ingest_validation_tools/validation_utils.py index b4375313e..cf46bd61c 100644 --- a/src/ingest_validation_tools/validation_utils.py +++ b/src/ingest_validation_tools/validation_utils.py @@ -163,7 +163,7 @@ def get_data_dir_errors( possible_schemas = get_possible_directory_schemas(dir_schema) if possible_schemas is None: - return {"Undefined directory schema": dir_schema} + return {dir_schema: ["No matching directory schemas found."]} # Collect errors, discard if schema validates against a minor version errors = [] From 5271f10a0c75ff3364218b34b5095f306d8815ab Mon Sep 17 00:00:00 2001 From: Gesina Phillips Date: Mon, 25 Mar 2024 15:35:13 -0400 Subject: [PATCH 12/16] mocked up good codex data to test, will need to re-test when assayclassifier is updated --- .../bad-cedar-dir-histology/README.md | 4 +-- .../README.md | 2 +- .../README.md | 4 +-- .../fixtures.json | 2 +- .../dataset-examples/bad-codex-data/README.md | 29 +++++++++---------- .../bad-codex-data/fixtures.json | 2 +- .../bad-missing-data/README.md | 4 +-- .../bad-missing-data/fixtures.json | 2 +- .../dataset-examples/bad-mixed/fixtures.json | 2 +- .../bad-scatacseq-data/README.md | 2 +- .../bad-scrnaseq-v0/README.md | 2 +- .../bad-tsv-formats/README.md | 4 +-- .../bad-tsv-formats/fixtures.json | 2 +- .../README.md | 12 -------- .../README.md | 4 +-- .../fixtures.json | 2 +- .../good-codex-akoya-metadata-v1/README.md | 4 +-- .../fixtures.json | 2 +- .../bad-example/README.md | 2 +- .../fixtures.json | 2 +- tests-manual/update_test_data.py | 2 +- tests/test_dataset_examples.py | 16 ++++++---- 22 files changed, 49 insertions(+), 58 deletions(-) delete mode 100644 examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/README.md diff --git a/examples/dataset-examples/bad-cedar-dir-histology/README.md b/examples/dataset-examples/bad-cedar-dir-histology/README.md index a4977e423..136b5a6b6 100644 --- a/examples/dataset-examples/bad-cedar-dir-histology/README.md +++ b/examples/dataset-examples/bad-cedar-dir-histology/README.md @@ -2,10 +2,10 @@ Upload Errors: Directory Errors: ? examples/dataset-examples/bad-cedar-dir-histology/upload/bad-histology-metadata.tsv, - column 'data_path', value './dataset-1' (as histology-v2) + column 'data_path', value './dataset-1' (as histology-v2.0) : - 'No such file or directory: examples/dataset-examples/bad-cedar-dir-histology/upload/dataset-1' ? examples/dataset-examples/bad-cedar-dir-histology/upload/bad-histology-metadata.tsv, - column 'data_path', value './wrong' (as histology-v2) + column 'data_path', value './wrong' (as histology-v2.0) : - Not allowed: - not-allowed. Required but missing: diff --git a/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/README.md b/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/README.md index 71687dba7..2f457cfec 100644 --- a/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/README.md +++ b/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/README.md @@ -2,7 +2,7 @@ Upload Errors: Directory Errors: ? examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/upload/good-visium-assay-metadata.tsv, - column 'data_path', value './Visium_9OLC_A4_S1' (as visium-no-probes-v2) + column 'data_path', value './Visium_9OLC_A4_S1' (as visium-no-probes-v2.0) : - Required but missing: - lab_processed\/.*. - lab_processed\/images\/.*. diff --git a/examples/dataset-examples/bad-codex-akoya-directory-v1-missing-dataset.json/README.md b/examples/dataset-examples/bad-codex-akoya-directory-v1-missing-dataset.json/README.md index 838f0a2c8..421b385fa 100644 --- a/examples/dataset-examples/bad-codex-akoya-directory-v1-missing-dataset.json/README.md +++ b/examples/dataset-examples/bad-codex-akoya-directory-v1-missing-dataset.json/README.md @@ -2,10 +2,10 @@ Upload Errors: Directory Errors: ? examples/dataset-examples/bad-codex-akoya-directory-v1-missing-dataset.json/upload/name-just-needs-to-end-with-metadata.tsv, - column 'data_path', value 'dataset-1' (as codex-v1-with-dataset-json) + column 'data_path', value 'dataset-1' (as codex-v1.1) : - Required but missing: - (raw|src_[^/]*)/dataset\.json. Hint: 'If validation fails because of extra whitespace in the TSV, try: src/cleanup_whitespace.py --tsv_in original.tsv --tsv_out clean.tsv.' -``` \ No newline at end of file +``` diff --git a/examples/dataset-examples/bad-codex-akoya-directory-v1-missing-dataset.json/fixtures.json b/examples/dataset-examples/bad-codex-akoya-directory-v1-missing-dataset.json/fixtures.json index c62bb2160..fe40f56eb 100644 --- a/examples/dataset-examples/bad-codex-akoya-directory-v1-missing-dataset.json/fixtures.json +++ b/examples/dataset-examples/bad-codex-akoya-directory-v1-missing-dataset.json/fixtures.json @@ -1 +1 @@ -{"assaytype": {"CODEX": {"assaytype": "CODEX", "contains-pii": false, "dataset-type": "CODEX", "description": "CODEX", "dir-schema": "codex-v1-with-dataset-json", "primary": true, "tbl-schema": "codex-v1", "vitessce-hints": []}}, "validation": {}} \ No newline at end of file +{"assaytype": {"CODEX": {"assaytype": "CODEX", "contains-pii": false, "dataset-type": "CODEX", "description": "CODEX", "dir-schema": "codex-v1", "primary": true, "tbl-schema": "codex-v1", "vitessce-hints": []}}, "validation": {}} diff --git a/examples/dataset-examples/bad-codex-data/README.md b/examples/dataset-examples/bad-codex-data/README.md index 737eeb04e..aba9b56ac 100644 --- a/examples/dataset-examples/bad-codex-data/README.md +++ b/examples/dataset-examples/bad-codex-data/README.md @@ -20,20 +20,19 @@ Upload Errors: - "Decode Error: Invalid ascii because ordinal not in range(128): \"mber\tconjugated_tag\n\ \ [ \xF0 ] \x9F\x98\x83\t\tbad-value\t\t\tinv\"." Directory Errors: - ? examples/dataset-examples/bad-codex-data/upload/codex-metadata.tsv, column 'data_path', - value 'dataset-1' (as codex-v1-with-dataset-json) - : - Not allowed: - - channelnames.txt. - - cyc002_reg001_200216_112537/bad. - - experiment.json. - - exposure_times.txt. - - segmentation.json. - Required but missing: - - (processed|drv_[^/]*)/.*. - - (raw|src_.*)/.*. - - (raw|src_.*)/[cC]yc.*_reg.*/.*_Z.*_CH.*\.tif. - - (raw|src_[^/]*)/dataset\.json. - - extras/dir-schema-v1-with-dataset-json. + examples/dataset-examples/bad-codex-data/upload/codex-metadata.tsv, column 'data_path', value 'dataset-1' (as codex-v1.1): + - Not allowed: + - channelnames.txt. + - cyc002_reg001_200216_112537/bad. + - experiment.json. + - exposure_times.txt. + - segmentation.json. + Required but missing: + - (processed|drv_[^/]*)/.*. + - (raw|src_.*)/.*. + - (raw|src_.*)/[cC]yc.*_reg.*/.*_Z.*_CH.*\.tif. + - (raw|src_[^/]*)/dataset\.json. + - extras/dir-schema-v1-with-dataset-json. Metadata TSV Validation Errors: Local Validation Errors: examples/dataset-examples/bad-codex-data/upload/codex-metadata.tsv (as codex-v0): @@ -48,4 +47,4 @@ Metadata TSV Validation Errors: Hint: 'If validation fails because of extra whitespace in the TSV, try: src/cleanup_whitespace.py --tsv_in original.tsv --tsv_out clean.tsv.' -``` \ No newline at end of file +``` diff --git a/examples/dataset-examples/bad-codex-data/fixtures.json b/examples/dataset-examples/bad-codex-data/fixtures.json index f600cc588..8245bed8d 100644 --- a/examples/dataset-examples/bad-codex-data/fixtures.json +++ b/examples/dataset-examples/bad-codex-data/fixtures.json @@ -1 +1 @@ -{"assaytype": {"CODEX": {"assaytype": "CODEX", "contains-pii": false, "dataset-type": "CODEX", "description": "CODEX", "dir-schema": "codex-v1-with-dataset-json", "primary": true, "tbl-schema": "codex-v0", "vitessce-hints": []}}, "validation": {}} \ No newline at end of file +{"assaytype": {"CODEX": {"assaytype": "CODEX", "contains-pii": false, "dataset-type": "CODEX", "description": "CODEX", "dir-schema": "codex-v1", "primary": true, "tbl-schema": "codex-v0", "vitessce-hints": []}}, "validation": {}} diff --git a/examples/dataset-examples/bad-missing-data/README.md b/examples/dataset-examples/bad-missing-data/README.md index 9d71f2529..e84d9c7e5 100644 --- a/examples/dataset-examples/bad-missing-data/README.md +++ b/examples/dataset-examples/bad-missing-data/README.md @@ -9,7 +9,7 @@ Upload Errors: : - 'File does not exist: examples/dataset-examples/bad-missing-data/upload/antibodies-missing.tsv.' Directory Errors: ? examples/dataset-examples/bad-missing-data/upload/codex-metadata.tsv, column - 'data_path', value 'dataset-1' (as codex-v1-with-dataset-json) + 'data_path', value 'dataset-1' (as codex-v1.1) : - 'No such file or directory: examples/dataset-examples/bad-missing-data/upload/dataset-1' Metadata TSV Validation Errors: Local Validation Errors: @@ -22,4 +22,4 @@ Metadata TSV Validation Errors: Hint: 'If validation fails because of extra whitespace in the TSV, try: src/cleanup_whitespace.py --tsv_in original.tsv --tsv_out clean.tsv.' -``` \ No newline at end of file +``` diff --git a/examples/dataset-examples/bad-missing-data/fixtures.json b/examples/dataset-examples/bad-missing-data/fixtures.json index f600cc588..8245bed8d 100644 --- a/examples/dataset-examples/bad-missing-data/fixtures.json +++ b/examples/dataset-examples/bad-missing-data/fixtures.json @@ -1 +1 @@ -{"assaytype": {"CODEX": {"assaytype": "CODEX", "contains-pii": false, "dataset-type": "CODEX", "description": "CODEX", "dir-schema": "codex-v1-with-dataset-json", "primary": true, "tbl-schema": "codex-v0", "vitessce-hints": []}}, "validation": {}} \ No newline at end of file +{"assaytype": {"CODEX": {"assaytype": "CODEX", "contains-pii": false, "dataset-type": "CODEX", "description": "CODEX", "dir-schema": "codex-v1", "primary": true, "tbl-schema": "codex-v0", "vitessce-hints": []}}, "validation": {}} diff --git a/examples/dataset-examples/bad-mixed/fixtures.json b/examples/dataset-examples/bad-mixed/fixtures.json index ebc936201..8f25e1000 100644 --- a/examples/dataset-examples/bad-mixed/fixtures.json +++ b/examples/dataset-examples/bad-mixed/fixtures.json @@ -1 +1 @@ -{"assaytype": {"CODEX": {"assaytype": "CODEX", "contains-pii": false, "dataset-type": "CODEX", "description": "CODEX", "dir-schema": "codex-v1-with-dataset-json", "primary": true, "tbl-schema": "codex-v0", "vitessce-hints": []}, "SNARE-seq2": {"assaytype": "SNARE-ATACseq2", "contains-pii": true, "dataset-type": "ATACseq", "description": "snATACseq (SNARE-seq2)", "dir-schema": "scatacseq-v0", "primary": true, "tbl-schema": "scatacseq-v0", "vitessce-hints": []}}, "validation": {}} \ No newline at end of file +{"assaytype": {"CODEX": {"assaytype": "CODEX", "contains-pii": false, "dataset-type": "CODEX", "description": "CODEX", "dir-schema": "codex-v1", "primary": true, "tbl-schema": "codex-v0", "vitessce-hints": []}, "SNARE-seq2": {"assaytype": "SNARE-ATACseq2", "contains-pii": true, "dataset-type": "ATACseq", "description": "snATACseq (SNARE-seq2)", "dir-schema": "scatacseq-v0", "primary": true, "tbl-schema": "scatacseq-v0", "vitessce-hints": []}}, "validation": {}} diff --git a/examples/dataset-examples/bad-scatacseq-data/README.md b/examples/dataset-examples/bad-scatacseq-data/README.md index 4d42fa5ca..c52a960a3 100644 --- a/examples/dataset-examples/bad-scatacseq-data/README.md +++ b/examples/dataset-examples/bad-scatacseq-data/README.md @@ -5,7 +5,7 @@ Upload Errors: - 'Expected a TSV, but found a directory: examples/dataset-examples/bad-scatacseq-data/upload.' Directory Errors: ? examples/dataset-examples/bad-scatacseq-data/upload/scatacseq-metadata.tsv, - column 'data_path', value 'dataset-1' (as scatacseq-v0) + column 'data_path', value 'dataset-1' (as scatacseq-v0.0) : - Not allowed: - not-the-file-you-are-looking-for.txt. - unexpected-directory/place-holder.txt. diff --git a/examples/dataset-examples/bad-scrnaseq-v0/README.md b/examples/dataset-examples/bad-scrnaseq-v0/README.md index 94d29986a..4d5a44af6 100644 --- a/examples/dataset-examples/bad-scrnaseq-v0/README.md +++ b/examples/dataset-examples/bad-scrnaseq-v0/README.md @@ -1,7 +1,7 @@ ``` Upload Errors: Directory Errors: - examples/dataset-examples/bad-scrnaseq-v0/upload/metadata.tsv, column 'data_path', value 'data' (as scrnaseq-v0): + examples/dataset-examples/bad-scrnaseq-v0/upload/metadata.tsv, column 'data_path', value 'data' (as scrnaseq-v0.0): - 'No such file or directory: examples/dataset-examples/bad-scrnaseq-v0/upload/data.' Metadata TSV Validation Errors: Local Validation Errors: diff --git a/examples/dataset-examples/bad-tsv-formats/README.md b/examples/dataset-examples/bad-tsv-formats/README.md index a87df3a82..6c99a86b2 100644 --- a/examples/dataset-examples/bad-tsv-formats/README.md +++ b/examples/dataset-examples/bad-tsv-formats/README.md @@ -7,7 +7,7 @@ Upload Errors: - 'File does not exist: examples/dataset-examples/bad-tsv-formats/upload/antibodies.tsv.' Directory Errors: ? examples/dataset-examples/bad-tsv-formats/upload/codex-metadata.tsv, column - 'data_path', value 'dataset-1/' (as codex-v1-with-dataset-json) + 'data_path', value 'dataset-1/' (as codex-v1.1) : - Not allowed: - channelnames.txt. - cyc002_reg001_200216_112537/1_00001_Z001_CH1.tif. @@ -75,4 +75,4 @@ Metadata TSV Validation Errors: Hint: 'If validation fails because of extra whitespace in the TSV, try: src/cleanup_whitespace.py --tsv_in original.tsv --tsv_out clean.tsv.' -``` \ No newline at end of file +``` diff --git a/examples/dataset-examples/bad-tsv-formats/fixtures.json b/examples/dataset-examples/bad-tsv-formats/fixtures.json index f600cc588..8245bed8d 100644 --- a/examples/dataset-examples/bad-tsv-formats/fixtures.json +++ b/examples/dataset-examples/bad-tsv-formats/fixtures.json @@ -1 +1 @@ -{"assaytype": {"CODEX": {"assaytype": "CODEX", "contains-pii": false, "dataset-type": "CODEX", "description": "CODEX", "dir-schema": "codex-v1-with-dataset-json", "primary": true, "tbl-schema": "codex-v0", "vitessce-hints": []}}, "validation": {}} \ No newline at end of file +{"assaytype": {"CODEX": {"assaytype": "CODEX", "contains-pii": false, "dataset-type": "CODEX", "description": "CODEX", "dir-schema": "codex-v1", "primary": true, "tbl-schema": "codex-v0", "vitessce-hints": []}}, "validation": {}} diff --git a/examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/README.md b/examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/README.md deleted file mode 100644 index b1c13f59a..000000000 --- a/examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/README.md +++ /dev/null @@ -1,12 +0,0 @@ -``` -No errors! -Time: WILL_CHANGE -Git version: WILL_CHANGE -Directory: examples/dataset-examples/good-codex-akoya-directory-v1-with-dataset.json/upload -TSVs: - name-just-needs-to-end-with-metadata.tsv: - Schema: codex-v1 - Metadata schema version: codex-v1 - Directory schema version: codex-v1-with-dataset-json - -``` \ No newline at end of file diff --git a/examples/dataset-examples/good-codex-akoya-directory-v1.json/README.md b/examples/dataset-examples/good-codex-akoya-directory-v1.json/README.md index 928ab97ea..69dd3140d 100644 --- a/examples/dataset-examples/good-codex-akoya-directory-v1.json/README.md +++ b/examples/dataset-examples/good-codex-akoya-directory-v1.json/README.md @@ -7,6 +7,6 @@ TSVs: name-just-needs-to-end-with-metadata.tsv: Schema: codex-v1 Metadata schema version: codex-v1 - Directory schema version: examples/dataset-examples/good-codex-akoya-directory-v1.json/upload/dataset-1 + Directory schema version: codex-v1.1 -``` \ No newline at end of file +``` diff --git a/examples/dataset-examples/good-codex-akoya-directory-v1.json/fixtures.json b/examples/dataset-examples/good-codex-akoya-directory-v1.json/fixtures.json index c62bb2160..fe40f56eb 100644 --- a/examples/dataset-examples/good-codex-akoya-directory-v1.json/fixtures.json +++ b/examples/dataset-examples/good-codex-akoya-directory-v1.json/fixtures.json @@ -1 +1 @@ -{"assaytype": {"CODEX": {"assaytype": "CODEX", "contains-pii": false, "dataset-type": "CODEX", "description": "CODEX", "dir-schema": "codex-v1-with-dataset-json", "primary": true, "tbl-schema": "codex-v1", "vitessce-hints": []}}, "validation": {}} \ No newline at end of file +{"assaytype": {"CODEX": {"assaytype": "CODEX", "contains-pii": false, "dataset-type": "CODEX", "description": "CODEX", "dir-schema": "codex-v1", "primary": true, "tbl-schema": "codex-v1", "vitessce-hints": []}}, "validation": {}} diff --git a/examples/dataset-examples/good-codex-akoya-metadata-v1/README.md b/examples/dataset-examples/good-codex-akoya-metadata-v1/README.md index 14144ea43..090d21bd1 100644 --- a/examples/dataset-examples/good-codex-akoya-metadata-v1/README.md +++ b/examples/dataset-examples/good-codex-akoya-metadata-v1/README.md @@ -7,6 +7,6 @@ TSVs: name-just-needs-to-end-with-metadata.tsv: Schema: codex-v1 Metadata schema version: codex-v1 - Directory schema version: codex-v1-with-dataset-json + Directory schema version: codex-v1.1 -``` \ No newline at end of file +``` diff --git a/examples/dataset-examples/good-codex-akoya-metadata-v1/fixtures.json b/examples/dataset-examples/good-codex-akoya-metadata-v1/fixtures.json index c62bb2160..fe40f56eb 100644 --- a/examples/dataset-examples/good-codex-akoya-metadata-v1/fixtures.json +++ b/examples/dataset-examples/good-codex-akoya-metadata-v1/fixtures.json @@ -1 +1 @@ -{"assaytype": {"CODEX": {"assaytype": "CODEX", "contains-pii": false, "dataset-type": "CODEX", "description": "CODEX", "dir-schema": "codex-v1-with-dataset-json", "primary": true, "tbl-schema": "codex-v1", "vitessce-hints": []}}, "validation": {}} \ No newline at end of file +{"assaytype": {"CODEX": {"assaytype": "CODEX", "contains-pii": false, "dataset-type": "CODEX", "description": "CODEX", "dir-schema": "codex-v1", "primary": true, "tbl-schema": "codex-v1", "vitessce-hints": []}}, "validation": {}} diff --git a/examples/dataset-iec-examples/bad-example/README.md b/examples/dataset-iec-examples/bad-example/README.md index 85c50ceb8..383782ffe 100644 --- a/examples/dataset-iec-examples/bad-example/README.md +++ b/examples/dataset-iec-examples/bad-example/README.md @@ -4,7 +4,7 @@ Upload Errors: examples/dataset-iec-examples/bad-example/upload/metadata.tsv, column 'contributors_path', value 'extras/contributors.tsv': Schema version is deprecated: contributors-v0 Directory Errors: - examples/dataset-iec-examples/bad-example/upload/metadata.tsv, column 'data_path', value '.' (as scatacseq-v0): + examples/dataset-iec-examples/bad-example/upload/metadata.tsv, column 'data_path', value '.' (as scatacseq-v0.0): - Not allowed: - should-not-be-here.txt. Metadata TSV Validation Errors: diff --git a/examples/plugin-tests/prev-gen-codex-expected-failure/fixtures.json b/examples/plugin-tests/prev-gen-codex-expected-failure/fixtures.json index c62bb2160..fe40f56eb 100644 --- a/examples/plugin-tests/prev-gen-codex-expected-failure/fixtures.json +++ b/examples/plugin-tests/prev-gen-codex-expected-failure/fixtures.json @@ -1 +1 @@ -{"assaytype": {"CODEX": {"assaytype": "CODEX", "contains-pii": false, "dataset-type": "CODEX", "description": "CODEX", "dir-schema": "codex-v1-with-dataset-json", "primary": true, "tbl-schema": "codex-v1", "vitessce-hints": []}}, "validation": {}} \ No newline at end of file +{"assaytype": {"CODEX": {"assaytype": "CODEX", "contains-pii": false, "dataset-type": "CODEX", "description": "CODEX", "dir-schema": "codex-v1", "primary": true, "tbl-schema": "codex-v1", "vitessce-hints": []}}, "validation": {}} diff --git a/tests-manual/update_test_data.py b/tests-manual/update_test_data.py index 5bf40b1bc..63915b321 100644 --- a/tests-manual/update_test_data.py +++ b/tests-manual/update_test_data.py @@ -207,7 +207,7 @@ def manual_test(test_dir: Union[str, List], verbose: bool = False): assert Path( test_dir ).resolve(), f"Arg {test_dir} passed to manual_test is not a directory!" - elif type(test_dir) is list: + elif type(test_dir) is list and len(test_dir) > 1: test_dir = [dir for dir in test_dir if Path(dir).is_dir()] test = TestDatasetExamples() setattr(test, "dataset_test_dirs", test_dir) diff --git a/tests/test_dataset_examples.py b/tests/test_dataset_examples.py index dd6148899..9b3b33809 100644 --- a/tests/test_dataset_examples.py +++ b/tests/test_dataset_examples.py @@ -33,7 +33,6 @@ class MockException(Exception): def __init__(self, error): super().__init__(error) - def dataset_test(test_dir: str, dataset_opts: Dict, verbose: bool = False): dataset_opts = dataset_opts | {"verbose": verbose} print(f"Testing {test_dir}...") @@ -135,7 +134,6 @@ def _assaytype_side_effect(path: str, row: Dict, *args, **kwargs): class TestDatasetExamples(unittest.TestCase): - dataset_paths = {} dataset_test_dirs = [ test_dir for test_dir in [ @@ -151,17 +149,23 @@ def setUp(self): self.get_paths() def tearDown(self): - errors = "\n".join([str(error) for error in self.errors]) + error_lines = "\n".join([str(error) for error in self.errors]) + errors = " ".join([str(error) for error in self.errors]) try: self.assertEqual([], self.errors) except AssertionError: print( - f"""-------ERRORS------- - {errors} + f""" + -------ERRORS------- + {error_lines} + + Run for more detailed output: + env PYTHONPATH=src:$PYTHONPATH python -m tests-manual.update_test_data -t {errors} --verbose --globus_token "" --manual_test --dry_run """ ) def get_paths(self): + self.dataset_paths = {} for test_dir in self.dataset_test_dirs: metadata_paths = [path for path in Path(f"{test_dir}/upload").glob("*metadata.tsv")] self.dataset_paths[test_dir] = metadata_paths @@ -196,7 +200,7 @@ def test_validate_dataset_examples(self, verbose: bool = False): continue except AssertionError as e: print(e) - self.errors.append(e) + self.errors.append(test_dir) continue if len(tsv_paths) == 1: self.single_dataset_assert(tsv_paths[0], mock_assaytype_data) From e6929134197789cd1214cb95a8933888a83f9a79 Mon Sep 17 00:00:00 2001 From: Gesina Phillips Date: Mon, 25 Mar 2024 15:38:23 -0400 Subject: [PATCH 13/16] linter fix --- src/ingest_validation_tools/docs_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/ingest_validation_tools/docs_utils.py b/src/ingest_validation_tools/docs_utils.py index b05ad8ea1..4eb8bfb88 100644 --- a/src/ingest_validation_tools/docs_utils.py +++ b/src/ingest_validation_tools/docs_utils.py @@ -118,7 +118,9 @@ def _get_portal_names_md(assay_types): return f'In the portal: {" / ".join(links)}' -def generate_readme_md(table_schemas, pipeline_infos, directory_schemas, schema_name, is_assay=True): +def generate_readme_md( + table_schemas, pipeline_infos, directory_schemas, schema_name, is_assay=True +): int_keys = [int(k) for k in table_schemas.keys()] max_version = max(int_keys) min_version = min(int_keys) From 5b9e8ad03f40ab44f6d818df1c0b57c72f72fb4f Mon Sep 17 00:00:00 2001 From: Gesina Phillips Date: Tue, 26 Mar 2024 12:28:27 -0400 Subject: [PATCH 14/16] updated testing --- tests/fixtures.py | 123 +++++++++++++++++++++++++++++++++ tests/test_dataset_examples.py | 118 +++++++++++++++++++++++-------- 2 files changed, 211 insertions(+), 30 deletions(-) create mode 100644 tests/fixtures.py diff --git a/tests/fixtures.py b/tests/fixtures.py new file mode 100644 index 000000000..d9dece45a --- /dev/null +++ b/tests/fixtures.py @@ -0,0 +1,123 @@ +SCATACSEQ_HIGHER_VERSION_VALID = { + "test-schema-v0.0": { + "files": [ + { + "pattern": "[^/]+\\.fastq\\.gz", + "description": "Compressed FastQ file", + "required": True, + }, + { + "pattern": "extras\\/.*", + "required": True, + "description": "Folder for general lab-specific files related to the dataset. [Exists in all assays]", + }, + ] + }, + "test-schema-v0.1": { + "files": [ + { + "pattern": "[^/]+\\.fastq\\.gz", + "description": "Compressed FastQ file", + "required": True, + }, + { + "pattern": "extras\\/.*", + "required": False, + "description": "Folder for general lab-specific files related to the dataset. [Exists in all assays]", + }, + ] + }, +} + +SCATACSEQ_LOWER_VERSION_VALID = { + "test-schema-v0.0": { + "files": [ + { + "pattern": "[^/]+\\.fastq\\.gz", + "description": "Compressed FastQ file", + "required": True, + }, + { + "pattern": "extras\\/.*", + "required": False, + "description": "Folder for general lab-specific files related to the dataset. [Exists in all assays]", + }, + ] + }, + "test-schema-v0.1": { + "files": [ + { + "pattern": "[^/]+\\.fastq\\.gz", + "description": "Compressed FastQ file", + "required": True, + }, + { + "pattern": "extras\\/.*", + "required": True, + "description": "Folder for general lab-specific files related to the dataset. [Exists in all assays]", + }, + ] + }, +} + +SCATACSEQ_NEITHER_VERSION_VALID = { + "test-schema-v0.0": { + "files": [ + { + "pattern": "[^/]+\\.fastq\\.gz", + "description": "Compressed FastQ file", + "required": True, + }, + { + "pattern": "extras\\/.*", + "required": True, + "description": "Folder for general lab-specific files related to the dataset. [Exists in all assays]", + }, + ] + }, + "test-schema-v0.1": { + "files": [ + { + "pattern": "[^/]+\\.fastq\\.gz", + "description": "Compressed FastQ file", + "required": True, + }, + { + "pattern": "extras\\/.*", + "required": True, + "description": "Folder for general lab-specific files related to the dataset. [Exists in all assays]", + }, + ] + }, +} + +SCATACSEQ_BOTH_VERSIONS_VALID = { + "test-schema-v0.0": { + "files": [ + { + "pattern": "[^/]+\\.fastq\\.gz", + "description": "Compressed FastQ file", + "required": True, + }, + { + "pattern": "extras\\/.*", + "required": False, + "description": "Folder for general lab-specific files related to the dataset. [Exists in all assays]", + }, + ] + }, + "test-schema-v0.1": { + "files": [ + { + "pattern": "[^/]+\\.fastq\\.gz", + "description": "Compressed FastQ file", + "required": True, + }, + { + "pattern": "extras\\/.*", + "required": False, + "description": "Folder for general lab-specific files related to the dataset. [Exists in all assays]", + }, + ] + }, +} diff --git a/tests/test_dataset_examples.py b/tests/test_dataset_examples.py index 9b3b33809..28c53ad31 100644 --- a/tests/test_dataset_examples.py +++ b/tests/test_dataset_examples.py @@ -12,6 +12,13 @@ from ingest_validation_tools.error_report import ErrorReport from ingest_validation_tools.upload import Upload +from .fixtures import ( + SCATACSEQ_BOTH_VERSIONS_VALID, + SCATACSEQ_HIGHER_VERSION_VALID, + SCATACSEQ_LOWER_VERSION_VALID, + SCATACSEQ_NEITHER_VERSION_VALID, +) + SHARED_OPTS = { "encoding": "ascii", "run_plugins": True, @@ -33,6 +40,7 @@ class MockException(Exception): def __init__(self, error): super().__init__(error) + def dataset_test(test_dir: str, dataset_opts: Dict, verbose: bool = False): dataset_opts = dataset_opts | {"verbose": verbose} print(f"Testing {test_dir}...") @@ -242,7 +250,7 @@ def multi_dataset_assert(self, tsv_paths: List[str], mock_assaytype_data: Mock): print(e) self.errors.append(e) - def prep_upload(self, test_dir: str, opts: Dict): + def prep_upload(self, test_dir: str, opts: Dict, patch_data: Dict): with patch( "ingest_validation_tools.validation_utils.get_assaytype_data", side_effect=lambda row, ingest_url: _assaytype_side_effect(test_dir, row, ingest_url), @@ -253,32 +261,82 @@ def prep_upload(self, test_dir: str, opts: Dict): schema_name, test_dir, tsv_path, report_type ), ): - upload = Upload(Path(f"{test_dir}/upload"), **opts) - upload.get_errors() - return upload - - # @patch( - # "ingest_validation_tools.schema_loader.get_possible_directory_schemas", - # {"test-schema-v1.0": {}, "test-schema-v1.1": {}}, - # ) - # def test_data_dir_versions_highest_version(self): - # # pick 1 good and 1 bad example dir; assert names (or numbers) of effective TSVs inside - # test_dirs = [] - # for test_dir in test_dirs: - # upload = self.prep_upload(test_dir, DATASET_EXAMPLES_OPTS) - # dir_schemas = upload.get_dir_schema_versions() - # expected_result = {upload.effective_tsv_paths.popitem()[0]: "test-schema-v1.1"} - # self.assertEqual(dir_schemas, expected_result) - # - # @patch( - # "ingest_validation_tools.schema_loader.get_possible_directory_schemas", - # {"test-schema-v1.0": {}, "test-schema-v1.1": {}}, - # ) - # def test_data_dir_versions_lower_version(self): - # # pick 1 good and 1 bad example dir; assert names (or numbers) of effective TSVs inside - # test_dirs = [] - # for test_dir in test_dirs: - # upload = self.prep_upload(test_dir, DATASET_EXAMPLES_OPTS) - # dir_schemas = upload.get_dir_schema_versions() - # expected_result = {upload.effective_tsv_paths.popitem()[0]: "test-schema-v1.0"} - # self.assertEqual(dir_schemas, expected_result) + with patch( + "ingest_validation_tools.validation_utils.get_possible_directory_schemas", + ) as dir_schemas_func_patch: + dir_schemas_func_patch.return_value = patch_data + upload = Upload(Path(f"{test_dir}/upload"), **opts) + upload.get_errors() + return upload + + def test_data_dir_versions_highest_version(self): + test_dirs = [ + "examples/dataset-examples/bad-scatacseq-data", + "examples/dataset-examples/good-scatacseq-metadata-v0", + ] + for test_dir in test_dirs: + upload = self.prep_upload( + test_dir, DATASET_EXAMPLES_OPTS, SCATACSEQ_HIGHER_VERSION_VALID + ) + info = upload.get_info() + for path in upload.effective_tsv_paths.keys(): + dir_schema_version = ( + info.get("TSVs", {}).get(Path(path).name, {}).get("Directory schema version") + ) + self.assertEqual(dir_schema_version, "test-schema-v0.1") + + def test_data_dir_versions_lower_version(self): + test_dirs = [ + "examples/dataset-examples/bad-scatacseq-data", + "examples/dataset-examples/good-scatacseq-metadata-v0", + ] + test_dirs = [] + for test_dir in test_dirs: + upload = self.prep_upload( + test_dir, DATASET_EXAMPLES_OPTS, SCATACSEQ_LOWER_VERSION_VALID + ) + info = upload.get_info() + for path in upload.effective_tsv_paths.keys(): + dir_schema_version = ( + info.get("TSVs", {}).get(Path(path).name, {}).get("Directory schema version") + ) + self.assertEqual(dir_schema_version, "test-schema-v1.0") + + def test_data_dir_versions_both_versions(self): + test_dirs = [ + "examples/dataset-examples/bad-scatacseq-data", + "examples/dataset-examples/good-scatacseq-metadata-v0", + ] + test_dirs = [] + for test_dir in test_dirs: + upload = self.prep_upload( + test_dir, DATASET_EXAMPLES_OPTS, SCATACSEQ_BOTH_VERSIONS_VALID + ) + info = upload.get_info() + for path in upload.effective_tsv_paths.keys(): + dir_schema_version = ( + info.get("TSVs", {}).get(Path(path).name, {}).get("Directory schema version") + ) + self.assertEqual(dir_schema_version, "test-schema-v0.1") + + def test_data_dir_versions_neither_version(self): + test_dirs = [ + "examples/dataset-examples/bad-scatacseq-data", + "examples/dataset-examples/good-scatacseq-metadata-v0", + ] + test_dirs = [] + for test_dir in test_dirs: + upload = self.prep_upload( + test_dir, DATASET_EXAMPLES_OPTS, SCATACSEQ_NEITHER_VERSION_VALID + ) + info = upload.get_info() + for path in upload.effective_tsv_paths.keys(): + dir_schema_version = ( + info.get("TSVs", {}).get(Path(path).name, {}).get("Directory schema version") + ) + self.assertEqual(dir_schema_version, None) + + +# if __name__ == "__main__": +# suite = unittest.TestLoader().loadTestsFromTestCase(TestDatasetExamples) +# suite.debug() From 5cc3e8b8ca094109c925401f081e180004f806ed Mon Sep 17 00:00:00 2001 From: Gesina Phillips Date: Tue, 26 Mar 2024 13:02:41 -0400 Subject: [PATCH 15/16] fixing logic --- src/generate_docs.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/generate_docs.py b/src/generate_docs.py index 5c88482ec..598d02d23 100755 --- a/src/generate_docs.py +++ b/src/generate_docs.py @@ -78,9 +78,8 @@ def main(): for v, schema in directory_schemas.items(): try: - a = float(v) - assert a >= 2.0 - except AssertionError: + assert float(v) >= 2.0 + except (AssertionError, ValueError): deprecated["directories"][v] = schema else: current["directories"][v] = schema From ab43f4c95ba3df96b6e0d6b17d0a89a617298cac Mon Sep 17 00:00:00 2001 From: Gesina Phillips Date: Tue, 9 Apr 2024 13:29:29 -0400 Subject: [PATCH 16/16] updating tests, added command line arg to test using DEV --- .../fixtures.json | 2 +- .../fixtures.json | 2 +- .../fixtures.json | 2 +- .../fixtures.json | 2 +- .../bad-repeated/fixtures.json | 2 +- .../good-cedar-multi-assay-visium/README.md | 2 +- .../fixtures.json | 2 +- .../expected-failure/fixtures.json | 2 +- tests-manual/update_test_data.py | 37 ++++++++++++++++++- tests/test_dataset_examples.py | 17 +++++++-- 10 files changed, 57 insertions(+), 13 deletions(-) diff --git a/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-child-metadata/fixtures.json b/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-child-metadata/fixtures.json index 33127e6ed..0fa0740d9 100644 --- a/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-child-metadata/fixtures.json +++ b/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-child-metadata/fixtures.json @@ -1 +1 @@ -{"assaytype": {"RNAseq": {"assaytype": "scRNAseq-10xGenomics-v3", "contains-pii": true, "dataset-type": "RNAseq", "description": "scRNA-seq (10x Genomics v3)", "dir-schema": "rnaseq-v2", "primary": true, "vitessce-hints": []}, "Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (no probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}}, "validation": {"scRNAseq-10xGenomics-v3": {"URL Errors": ["On row 3, column \"parent_sample_id\", value \"\" fails because of error \"HTTPError\": 404 Client Error: Not Found for url: https://entity.api.hubmapconsortium.org/entities/"], "Validation Errors": ["On row 1, column \"parent_sample_id\", value \"\" fails because of error \"missingRequired\"", "On row 2, column \"preparation_protocol_doi\", value \"wrong\" fails because of error \"invalidUrl\""]}, "contributors": null, "visium-no-probes": null, "h-and-e": null}} +{"assaytype": {"RNAseq": {"assaytype": "rnaseq-visium-no-probes", "contains-pii": true, "dataset-type": "RNAseq", "description": "Capture bead RNAseq (10x Genomics v3)", "dir-schema": "rnaseq-v2", "primary": true, "vitessce-hints": []}, "Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (no probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}}, "validation": {"rnaseq-visium-no-probes": {"URL Errors": ["On row 3, column \"parent_sample_id\", value \"\" fails because of error \"HTTPError\": 404 Client Error: Not Found for url: https://entity.api.hubmapconsortium.org/entities/"], "Validation Errors": ["On row 1, column \"parent_sample_id\", value \"\" fails because of error \"missingRequired\"", "On row 2, column \"preparation_protocol_doi\", value \"wrong\" fails because of error \"invalidUrl\""]}, "contributors": null, "visium-no-probes": null, "h-and-e": null}} \ No newline at end of file diff --git a/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/fixtures.json b/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/fixtures.json index 4c99b98f8..b627ebf31 100644 --- a/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/fixtures.json +++ b/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/fixtures.json @@ -1 +1 @@ -{"assaytype": {"Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (no probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}, "RNAseq": {"assaytype": "scRNAseq-10xGenomics-v3", "contains-pii": true, "dataset-type": "RNAseq", "description": "scRNA-seq (10x Genomics v3)", "dir-schema": "rnaseq-v2", "primary": true, "vitessce-hints": []}}, "validation": {"visium-no-probes": null, "contributors": null, "h-and-e": null, "scRNAseq-10xGenomics-v3": null}} \ No newline at end of file +{"assaytype": {"Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (no probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}, "RNAseq": {"assaytype": "rnaseq-visium-no-probes", "contains-pii": true, "dataset-type": "RNAseq", "description": "Capture bead RNAseq (10x Genomics v3)", "dir-schema": "rnaseq-v2", "primary": true, "vitessce-hints": []}}, "validation": {"visium-no-probes": null, "contributors": null, "h-and-e": null, "rnaseq-visium-no-probes": null}} \ No newline at end of file diff --git a/examples/dataset-examples/bad-cedar-multi-assay-visium-unreferenced-parent-path/fixtures.json b/examples/dataset-examples/bad-cedar-multi-assay-visium-unreferenced-parent-path/fixtures.json index b22ffa40f..4ca1b64e0 100644 --- a/examples/dataset-examples/bad-cedar-multi-assay-visium-unreferenced-parent-path/fixtures.json +++ b/examples/dataset-examples/bad-cedar-multi-assay-visium-unreferenced-parent-path/fixtures.json @@ -1 +1 @@ -{"assaytype": {"Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (no probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}, "RNAseq": {"assaytype": "scRNAseq-10xGenomics-v3", "contains-pii": true, "dataset-type": "RNAseq", "description": "scRNA-seq (10x Genomics v3)", "dir-schema": "rnaseq-v2", "primary": true, "vitessce-hints": []}}, "validation": {}} \ No newline at end of file +{"assaytype": {"Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (no probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}, "RNAseq": {"assaytype": "rnaseq-visium-no-probes", "contains-pii": true, "dataset-type": "RNAseq", "description": "Capture bead RNAseq (10x Genomics v3)", "dir-schema": "rnaseq-v2", "primary": true, "vitessce-hints": []}}, "validation": {}} \ No newline at end of file diff --git a/examples/dataset-examples/bad-cedar-multi-assay-visium-with-standalone-histology/fixtures.json b/examples/dataset-examples/bad-cedar-multi-assay-visium-with-standalone-histology/fixtures.json index b22ffa40f..4ca1b64e0 100644 --- a/examples/dataset-examples/bad-cedar-multi-assay-visium-with-standalone-histology/fixtures.json +++ b/examples/dataset-examples/bad-cedar-multi-assay-visium-with-standalone-histology/fixtures.json @@ -1 +1 @@ -{"assaytype": {"Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (no probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}, "RNAseq": {"assaytype": "scRNAseq-10xGenomics-v3", "contains-pii": true, "dataset-type": "RNAseq", "description": "scRNA-seq (10x Genomics v3)", "dir-schema": "rnaseq-v2", "primary": true, "vitessce-hints": []}}, "validation": {}} \ No newline at end of file +{"assaytype": {"Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (no probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}, "RNAseq": {"assaytype": "rnaseq-visium-no-probes", "contains-pii": true, "dataset-type": "RNAseq", "description": "Capture bead RNAseq (10x Genomics v3)", "dir-schema": "rnaseq-v2", "primary": true, "vitessce-hints": []}}, "validation": {}} \ No newline at end of file diff --git a/examples/dataset-examples/bad-repeated/fixtures.json b/examples/dataset-examples/bad-repeated/fixtures.json index f600cc588..a67942bbf 100644 --- a/examples/dataset-examples/bad-repeated/fixtures.json +++ b/examples/dataset-examples/bad-repeated/fixtures.json @@ -1 +1 @@ -{"assaytype": {"CODEX": {"assaytype": "CODEX", "contains-pii": false, "dataset-type": "CODEX", "description": "CODEX", "dir-schema": "codex-v1-with-dataset-json", "primary": true, "tbl-schema": "codex-v0", "vitessce-hints": []}}, "validation": {}} \ No newline at end of file +{"assaytype": {"CODEX": {"assaytype": "CODEX", "contains-pii": false, "dataset-type": "CODEX", "description": "CODEX", "dir-schema": "codex-v1", "primary": true, "tbl-schema": "codex-v0", "vitessce-hints": []}}, "validation": {}} \ No newline at end of file diff --git a/examples/dataset-examples/good-cedar-multi-assay-visium/README.md b/examples/dataset-examples/good-cedar-multi-assay-visium/README.md index f7aea6138..b17bc6df5 100644 --- a/examples/dataset-examples/good-cedar-multi-assay-visium/README.md +++ b/examples/dataset-examples/good-cedar-multi-assay-visium/README.md @@ -13,7 +13,7 @@ TSVs: Metadata schema version: '2' Directory schema version: visium-no-probes-v2.0 good-visium-rnaseq-metadata.tsv: - Schema: scRNAseq-10xGenomics-v3-v2 + Schema: rnaseq-visium-no-probes-v2 Metadata schema version: '2' Directory schema version: visium-no-probes-v2.0 diff --git a/examples/dataset-examples/good-cedar-multi-assay-visium/fixtures.json b/examples/dataset-examples/good-cedar-multi-assay-visium/fixtures.json index 4c99b98f8..b627ebf31 100644 --- a/examples/dataset-examples/good-cedar-multi-assay-visium/fixtures.json +++ b/examples/dataset-examples/good-cedar-multi-assay-visium/fixtures.json @@ -1 +1 @@ -{"assaytype": {"Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (no probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}, "RNAseq": {"assaytype": "scRNAseq-10xGenomics-v3", "contains-pii": true, "dataset-type": "RNAseq", "description": "scRNA-seq (10x Genomics v3)", "dir-schema": "rnaseq-v2", "primary": true, "vitessce-hints": []}}, "validation": {"visium-no-probes": null, "contributors": null, "h-and-e": null, "scRNAseq-10xGenomics-v3": null}} \ No newline at end of file +{"assaytype": {"Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (no probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}, "RNAseq": {"assaytype": "rnaseq-visium-no-probes", "contains-pii": true, "dataset-type": "RNAseq", "description": "Capture bead RNAseq (10x Genomics v3)", "dir-schema": "rnaseq-v2", "primary": true, "vitessce-hints": []}}, "validation": {"visium-no-probes": null, "contributors": null, "h-and-e": null, "rnaseq-visium-no-probes": null}} \ No newline at end of file diff --git a/examples/plugin-tests/expected-failure/fixtures.json b/examples/plugin-tests/expected-failure/fixtures.json index 4c99b98f8..b627ebf31 100644 --- a/examples/plugin-tests/expected-failure/fixtures.json +++ b/examples/plugin-tests/expected-failure/fixtures.json @@ -1 +1 @@ -{"assaytype": {"Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (no probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}, "RNAseq": {"assaytype": "scRNAseq-10xGenomics-v3", "contains-pii": true, "dataset-type": "RNAseq", "description": "scRNA-seq (10x Genomics v3)", "dir-schema": "rnaseq-v2", "primary": true, "vitessce-hints": []}}, "validation": {"visium-no-probes": null, "contributors": null, "h-and-e": null, "scRNAseq-10xGenomics-v3": null}} \ No newline at end of file +{"assaytype": {"Visium (no probes)": {"assaytype": "visium-no-probes", "contains-pii": true, "dataset-type": "Visium (no probes)", "description": "Visium (no probes)", "dir-schema": "visium-no-probes-v2", "is-multi-assay": true, "must-contain": ["Histology", "RNAseq"], "primary": true, "vitessce-hints": []}, "Histology": {"assaytype": "h-and-e", "contains-pii": false, "dataset-type": "Histology", "description": "H&E Stained Microscopy", "dir-schema": "histology-v2", "primary": true, "vitessce-hints": []}, "RNAseq": {"assaytype": "rnaseq-visium-no-probes", "contains-pii": true, "dataset-type": "RNAseq", "description": "Capture bead RNAseq (10x Genomics v3)", "dir-schema": "rnaseq-v2", "primary": true, "vitessce-hints": []}}, "validation": {"visium-no-probes": null, "contributors": null, "h-and-e": null, "rnaseq-visium-no-probes": null}} \ No newline at end of file diff --git a/tests-manual/update_test_data.py b/tests-manual/update_test_data.py index a0702675e..37e942303 100644 --- a/tests-manual/update_test_data.py +++ b/tests-manual/update_test_data.py @@ -20,6 +20,7 @@ TestDatasetExamples, clean_report, dataset_test, + dev_url_replace, diff_test, ) @@ -35,6 +36,7 @@ def __init__( verbose: bool = False, dry_run: bool = True, full_diff: bool = False, + env: str = "PROD", ): self.dir = dir self.globus_token = globus_token @@ -44,6 +46,7 @@ def __init__( self.upload_verbose = True if "plugin-tests" in dir else False self.dry_run = dry_run self.full_diff = full_diff + self.env = env def log(self, verbose_message, short_message: Optional[str] = None): if self.verbose: @@ -73,8 +76,23 @@ def update_test_data(self) -> Dict[str, List]: ) if "fixtures" not in self.exclude: new_data = self.update_fixtures(report) + if self.env == "DEV": + for value in new_data.get("validation", {}).values() or {}: + if value is not None: + new_url_data = [ + dev_url_replace(v) + for v in value.get("URL Errors", []) + if value is not None + ] + if new_url_data: + value["URL Errors"] = new_url_data fixtures = self.open_or_create_fixtures() - diff = DeepDiff(fixtures, new_data, ignore_order=True, report_repetition=True) + diff = DeepDiff( + fixtures, + new_data, + ignore_order=True, + report_repetition=True, + ) if not diff: print(f"No diff found, skipping {self.dir}/fixtures.json...") elif self.dry_run: @@ -105,6 +123,7 @@ def update_test_data(self) -> Dict[str, List]: cleaned_report, verbose=self.verbose, full_diff=self.full_diff, + env=self.env, ) readme.close() print(f"No diff found, skipping {self.dir}/README.md") @@ -232,6 +251,14 @@ def call_update(dir: str, args) -> Dict: } else: opts = {} + if args.env == "DEV": + opts = opts | { + "app_context": { + "ingest_url": "https://ingest-api.dev.hubmapconsortium.org/", + "entities_url": "https://entity-api.dev.hubmapconsortium.org/entities/", + "request_header": {"X-Hubmap-Application": "ingest-pipeline"}, + } + } change_report = UpdateData( dir, args.globus_token, @@ -240,6 +267,7 @@ def call_update(dir: str, args) -> Dict: verbose=args.verbose, exclude=args.exclude, full_diff=args.full_diff, + env=args.env, ).update_test_data() return change_report @@ -267,6 +295,7 @@ def call_update(dir: str, args) -> Dict: type=str, ) parser.add_argument( + "-d", "--dry_run", action="store_true", help="Default is False. If specified, do not write data but instead print output.", @@ -296,6 +325,12 @@ def call_update(dir: str, args) -> Dict: action="store_true", help="Default is False. Show full and cleaned README diff.", ) +parser.add_argument( + "--env", + choices=["DEV", "PROD"], + default=["PROD"], + help="Run tests against an env other than PROD by passing dev-specific app_context.", +) args = parser.parse_args() # tsv-examples not currently integrated, could be if needed. diff --git a/tests/test_dataset_examples.py b/tests/test_dataset_examples.py index 28c53ad31..c38c75d39 100644 --- a/tests/test_dataset_examples.py +++ b/tests/test_dataset_examples.py @@ -58,25 +58,34 @@ def dataset_test(test_dir: str, dataset_opts: Dict, verbose: bool = False): def clean_report(report: ErrorReport): clean_report = [] - regex = re.compile(r"((Time|Git version): )(.*)") + will_change_regex = re.compile(r"((Time|Git version): )(.*)") for line in report.as_md().splitlines(keepends=True): - match = regex.search(line) - if match: - new_line = line.replace(match.group(3), "WILL_CHANGE") + will_change_match = will_change_regex.search(line) + if will_change_match: + new_line = line.replace(will_change_match.group(3), "WILL_CHANGE") clean_report.append(new_line) else: clean_report.append(line) return "".join(clean_report) +def dev_url_replace(report: str): + dev_regex = re.compile(r"-api.dev") + report = re.sub(dev_regex, ".api", report) + return report + + def diff_test( test_dir: str, readme: TextIOWrapper, report: str, verbose: bool = True, full_diff: bool = False, + env: str = "PROD", ): d = difflib.Differ() + if env == "DEV": + report = dev_url_replace(report) diff = list(d.compare(readme.readlines(), report.splitlines(keepends=True))) readme.close() ignore_strings = ["Time:", "Git version:", "```"]