From 03bf16e8da95af8bb7000e98aaface4a566a52d7 Mon Sep 17 00:00:00 2001 From: j-uranic <117292295+j-uranic@users.noreply.github.com> Date: Thu, 3 Oct 2024 13:42:56 -0400 Subject: [PATCH 1/3] Create visium-no-probes-v3.2.yaml Fix raw\/fastq\/oligo\/ to raw\/fastq\/oligo\/.* (allow all files) --- .../visium-no-probes-v3.2.yaml | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 src/ingest_validation_tools/directory-schemas/visium-no-probes-v3.2.yaml diff --git a/src/ingest_validation_tools/directory-schemas/visium-no-probes-v3.2.yaml b/src/ingest_validation_tools/directory-schemas/visium-no-probes-v3.2.yaml new file mode 100644 index 00000000..9c98499e --- /dev/null +++ b/src/ingest_validation_tools/directory-schemas/visium-no-probes-v3.2.yaml @@ -0,0 +1,86 @@ +files: + - + pattern: extras\/.* + required: True + description: Folder for general lab-specific files related to the dataset + - + pattern: extras\/microscope_hardware\.json + required: True + description: A file generated by the micro-meta app that contains a description of the hardware components of the microscope. Email HuBMAP Consortium Help Desk if help is required in generating this document. + is_qa_qc: True + - + pattern: extras\/microscope_settings\.json + required: False + description: A file generated by the micro-meta app that contains a description of the settings that were used to acquire the image data. Email HuBMAP Consortium Help Desk if help is required in generating this document. + is_qa_qc: True + - + pattern: raw\/.* + required: True + description: All raw data files for the experiment. + - + pattern: raw\/[^\/]+\.gpr + required: True + description: This is a 10X Genomics layout file that's generated by 10X and individualized for each Visium slide. This is a text file and can be generated using this 10X web form along with the unique 10X Visium slide ID. + is_qa_qc: False + - + pattern: raw\/additional_panels_used\.csv + required: False + description: If multiple commercial probe panels were used, then the primary probe panel should be selected in the "oligo_probe_panel" metadata field. The additional panels must be included in this file. Each panel record should include:manufacturer, model/name, product code. + - + pattern: raw\/custom_probe_set\.csv + required: False + description: This file should contain any custom probes used and must be included if the metadata field "is_custom_probes_used" is "Yes". The file should minimally include:target gene id, probe seq, probe id. The contents of this file are modeled after the 10x Genomics probe set file (see ). + - + pattern: raw\/fastq\/.* + required: True + description: Raw sequencing files for the experiment + - + pattern: raw\/fastq\/oligo\/.* + required: True + description: Directory containing fastq files pertaining to oligo sequencing. + - + pattern: raw\/fastq\/oligo\/[^\/]+\.fastq\.gz + required: True + description: This is a gzip version of the fastq file. This file contains the cell barcode and unique molecular identifier (technical). + is_qa_qc: False + - + pattern: raw\/images\/.* + required: True + description: Directory containing raw image files. This directory should include at least one raw file. + - + pattern: raw\/images\/[^\/]+_tissue\.(?:tif|tiff) + required: False + description: Raw microscope file for the experiment. For 10X Visium CytAssist, this would be the high resolution image produced. + is_qa_qc: False + - + pattern: raw\/images\/[^\/]+_fiducial\.(?:tif|tiff) + required: True + description: This is the low resolution image from the 10X CytAssist instrument that includes the fiduciary markings. + - + pattern: raw\/images\/[^\/]+\.ndpi + required: False + description: Raw microscope file for the experiment + is_qa_qc: False + - + pattern: lab_processed\/.* + required: True + description: Experiment files that were processed by the lab generating the data. + - + pattern: lab_processed\/alignment\.json + required: True + description: JSON file for the manual tissue alignment created using Loupe browser and used as input to Space Ranger. + - + pattern: lab_processed\/images\/.* + required: True + description: Processed image files + - + pattern: lab_processed\/images\/[^\/]+\.ome\.tiff + required: True + description: OME-TIFF files (multichannel, multi-layered) produced by the microscopy experiment. If compressed, must use loss-less compression algorithm. For Visium this stitched file should only include the single capture area relevant to the current dataset. For GeoMx there will be one OME TIFF file per slide, with each slide including multiple AOIs. See the following link for the set of fields that are required in the OME TIFF file XML header. + is_qa_qc: False + example: lab_processed/images/HBM892.MDXS.293.ome.tiff + - + pattern: lab_processed\/images\/[^\/]*ome-tiff\.channels\.csv + required: True + description: This file provides essential documentation pertaining to each channel of the accommpanying OME TIFF. The file should contain one row per OME TIFF channel. The required fields are detailed + is_qa_qc: False From 6c57540ae3438365e8a3b6009b872c0c7cb5a20b Mon Sep 17 00:00:00 2001 From: j-uranic <117292295+j-uranic@users.noreply.github.com> Date: Thu, 3 Oct 2024 13:47:03 -0400 Subject: [PATCH 2/3] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ca18d7fc..066a0913 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ - Update GeoMx NGS directory schema - Update MERFISH directory schema - Update LC-MS directory schema +- Upate Visium with probes directory schema ## v0.0.25 - Update GeoMx NGS directory schema From a3cc0647ea911aff211075050843cb6e58df2d79 Mon Sep 17 00:00:00 2001 From: Juan Puerto <=> Date: Thu, 3 Oct 2024 15:48:16 -0400 Subject: [PATCH 3/3] Documentation: Update Visium no probes docs --- docs/visium-no-probes/current/index.md | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/docs/visium-no-probes/current/index.md b/docs/visium-no-probes/current/index.md index 1a926606..910408d4 100644 --- a/docs/visium-no-probes/current/index.md +++ b/docs/visium-no-probes/current/index.md @@ -30,7 +30,31 @@ REQUIRED - For this assay, you must also prepare and submit two additional metad
## Directory schemas -Version 3.1 (use this one) +Version 3.2 (use this one) + +| pattern | required? | description | +| --- | --- | --- | +| extras\/.* | ✓ | Folder for general lab-specific files related to the dataset | +| extras\/microscope_hardware\.json | ✓ | **[QA/QC]** A file generated by the micro-meta app that contains a description of the hardware components of the microscope. Email HuBMAP Consortium Help Desk if help is required in generating this document. | +| extras\/microscope_settings\.json | | **[QA/QC]** A file generated by the micro-meta app that contains a description of the settings that were used to acquire the image data. Email HuBMAP Consortium Help Desk if help is required in generating this document. | +| raw\/.* | ✓ | All raw data files for the experiment. | +| raw\/[^\/]+\.gpr | ✓ | This is a 10X Genomics layout file that's generated by 10X and individualized for each Visium slide. This is a text file and can be generated using this 10X web form along with the unique 10X Visium slide ID. | +| raw\/additional_panels_used\.csv | | If multiple commercial probe panels were used, then the primary probe panel should be selected in the "oligo_probe_panel" metadata field. The additional panels must be included in this file. Each panel record should include:manufacturer, model/name, product code. | +| raw\/custom_probe_set\.csv | | This file should contain any custom probes used and must be included if the metadata field "is_custom_probes_used" is "Yes". The file should minimally include:target gene id, probe seq, probe id. The contents of this file are modeled after the 10x Genomics probe set file (see ). | +| raw\/fastq\/.* | ✓ | Raw sequencing files for the experiment | +| raw\/fastq\/oligo\/.* | ✓ | Directory containing fastq files pertaining to oligo sequencing. | +| raw\/fastq\/oligo\/[^\/]+\.fastq\.gz | ✓ | This is a gzip version of the fastq file. This file contains the cell barcode and unique molecular identifier (technical). | +| raw\/images\/.* | ✓ | Directory containing raw image files. This directory should include at least one raw file. | +| raw\/images\/[^\/]+_tissue\.(?:tif|tiff) | | Raw microscope file for the experiment. For 10X Visium CytAssist, this would be the high resolution image produced. | +| raw\/images\/[^\/]+_fiducial\.(?:tif|tiff) | ✓ | This is the low resolution image from the 10X CytAssist instrument that includes the fiduciary markings. | +| raw\/images\/[^\/]+\.ndpi | | Raw microscope file for the experiment | +| lab_processed\/.* | ✓ | Experiment files that were processed by the lab generating the data. | +| lab_processed\/alignment\.json | ✓ | JSON file for the manual tissue alignment created using Loupe browser and used as input to Space Ranger. | +| lab_processed\/images\/.* | ✓ | Processed image files | +| lab_processed\/images\/[^\/]+\.ome\.tiff (example: lab_processed/images/HBM892.MDXS.293.ome.tiff) | ✓ | OME-TIFF files (multichannel, multi-layered) produced by the microscopy experiment. If compressed, must use loss-less compression algorithm. For Visium this stitched file should only include the single capture area relevant to the current dataset. For GeoMx there will be one OME TIFF file per slide, with each slide including multiple AOIs. See the following link for the set of fields that are required in the OME TIFF file XML header. | +| lab_processed\/images\/[^\/]*ome-tiff\.channels\.csv | ✓ | This file provides essential documentation pertaining to each channel of the accommpanying OME TIFF. The file should contain one row per OME TIFF channel. The required fields are detailed | + +Version 3.1 | pattern | required? | description | | --- | --- | --- |