Skip to content

Commit

Permalink
no more unexpecteds
Browse files Browse the repository at this point in the history
  • Loading branch information
turbomam committed Oct 28, 2024
1 parent e528d7a commit 4954e08
Show file tree
Hide file tree
Showing 10 changed files with 14 additions and 86 deletions.
4 changes: 3 additions & 1 deletion project.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ local/with_shuttles_yq.yaml: local/with_shuttles.yaml
yq -i '(.classes.[].slot_usage.[] | select(.name=="chem_administration") | .examples) = [{"value": "agar [CHEBI:2509];2018-05-11|agar [CHEBI:2509];2018-05-22"}, {"value": "agar [CHEBI:2509];2018-05"}]' $@

# use yq to add patterns with a secondary condition like mutivalued
yq -i '(.classes.[].slot_usage.[] | select(.range == "GeolocationValue") | .pattern) = "^[-+]?([1-8]?\d(\.\d+)?|90(\.0+)?)\s[-+]?(180(\.0+)?|((1[0-7]\d)|([1-9]?\d))(\.\d+)?)$$"' $@
yq -i '(.classes.[].slot_usage.[] | select(.range == "GeolocationValue") | .pattern) = "^[-+]?([1-8]?\d(\.\d{1,8})?|90(\.0{1,8})?)\s[-+]?(180(\.0{1,8})?|((1[0-7]\d)|([1-9]?\d))(\.\d{1,8})?)$$"' $@
yq -i '(.classes.[].slot_usage.[] | select(.range == "GeolocationValue") | .range) = "string"' $@

yq -i '(.classes.[].slot_usage.[] | select(.range == "QuantityValue") | .pattern) = "^[-+]?[0-9]*\.?[0-9]+ +\S.*$$"' $@
Expand Down Expand Up @@ -209,6 +209,8 @@ local/nmdc.yaml

src/nmdc_submission_schema/schema/nmdc_submission_schema.yaml: local/with_modifications.yaml project/thirdparty/GoldEcosystemTree.json
$(RUN) inject-gold-pathway-terms -g $(word 2,$^) -i $< -o $@
#cp $< $@

# remove the multivalued true annotation from these gloabl slot definitions for the sake of linkml-convert
# esp to tsv? and dumping to SQLite?
# follow the .string_serialization=="{text};{float} {unit}" and .multivalued == true pattern?
Expand Down
2 changes: 2 additions & 0 deletions sheets_and_friends/tsv_in/modifications_long.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ JgiMgInterface|JgiMgLrInterface dna_sample_format replace_attribute required t
JgiMgInterface|JgiMgLrInterface dna_sample_format replace_attribute recommended false
JgiMgInterface|JgiMgLrInterface dna_sample_name replace_attribute required true
JgiMgInterface|JgiMgLrInterface dna_sample_name replace_attribute recommended false
JgiMgInterface|JgiMgLrInterface dna_sample_name replace_attribute pattern ^[_a-zA-Z0-9-]*$

This comment has been minimized.

Copy link
@mslarae13

mslarae13 Dec 24, 2024

Contributor

@turbomam when and why was this regex added? I can't find the PR, and this regex has made valid DNA sample names invalid.

The change of validation and regular expressions on the submission schema without my knowledge needs to change. It's causing issues for submitters.

I'd like to discuss this at the next schema and metadata meeting.

JgiMgInterface|JgiMgLrInterface dna_seq_project replace_attribute required true
JgiMgInterface|JgiMgLrInterface dna_seq_project replace_attribute recommended false
JgiMgInterface|JgiMgLrInterface dna_seq_project_name replace_attribute required true
Expand Down Expand Up @@ -191,6 +192,7 @@ JgiMtInterface rna_sample_format replace_attribute required true
JgiMtInterface rna_sample_format replace_attribute recommended false
JgiMtInterface rna_sample_name replace_attribute required true
JgiMtInterface rna_sample_name replace_attribute recommended false
JgiMtInterface rna_sample_name replace_attribute pattern ^[_a-zA-Z0-9-]*$
JgiMtInterface rna_seq_project replace_attribute required true
JgiMtInterface rna_seq_project replace_attribute recommended false
JgiMtInterface rna_seq_project_name replace_attribute required true
Expand Down
4 changes: 2 additions & 2 deletions sheets_and_friends/tsv_in/validation_converter.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ from_val from_type to_type to_val len
{float}-{float} MIxS string serialization DH pattern regex ^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?\s*-\s*[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$
{float}|{float}-{float} MIxS string serialization DH pattern regex ^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?(\s*-\s*[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)?$
{integer} MIxS string serialization DH datatype integer 7
{lat lon} linkml string_serialization DH pattern regex ^[-+]?([1-8]?\d(\.\d+)?|90(\.0+)?)\s[-+]?(180(\.0+)?|((1[0-7]\d)|([1-9]?\d))(\.\d+)?)$ 86
{lat lon} linkml string_serialization DH pattern regex ^[-+]?([1-8]?\d(\.\d{1,8})?|90(\.0{1,8})?)\s[-+]?(180(\.0{1,8})?|((1[0-7]\d)|([1-9]?\d))(\.\d{1,8})?)$ 86
{termLabel} {[termID]} MIxS string serialization DH pattern regex ^\S+.*\S+ \[[a-zA-Z]{2,}:\d+\]$
{termLabel} {[termID]}; {timestamp} MIxS string serialization DH pattern regex ^\S+.*\S+ \[[a-zA-Z]{2,}:\d+\]; ([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))([T\s]((([01]\d|2[0-3])((:?)[0-5]\d)?|24\:?00)([\.,]\d+(?!:))?)?(\17[0-5]\d([\.,]\d+)?)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)?$ 314
{termLabel} {[termID]};{timestamp} MIxS string serialization DH pattern regex ^\S+.*\S+ \[[a-zA-Z]{2,}:\d+\];([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))([T\s]((([01]\d|2[0-3])((:?)[0-5]\d)?|24\:?00)([\.,]\d+(?!:))?)?(\17[0-5]\d([\.,]\d+)?)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)?$ 313
Expand All @@ -24,7 +24,7 @@ ControlledIdentifiedTermValue linkml range DH datatype string
ControlledTermValue linkml range DH datatype string
ControlledTermValue linkml range DH pattern regex ^\S+.*\S+ \[[a-zA-Z]{2,}:\d+\]$
GeolocationValue linkml range DH datatype string
GeolocationValue linkml range DH pattern regex ^[-+]?([1-8]?\d(\.\d+)?|90(\.0+)?)\s[-+]?(180(\.0+)?|((1[0-7]\d)|([1-9]?\d))(\.\d+)?)$
GeolocationValue linkml range DH pattern regex ^[-+]?([1-8]?\d(\.\d{1,8})?|90(\.0{1,8})?)\s[-+]?(180(\.0{1,8})?|((1[0-7]\d)|([1-9]?\d))(\.\d{1,8})?)$
OntologyClass linkml range DH datatype string
QuantityValue linkml range DH datatype string
TextValue linkml range DH datatype string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jgi_mg_data:
dna_project_contact: xxx
dna_samp_id: xxx
dna_sample_format: DNAStable
dna_sample_name: DNA:0546789 # contains colon and should not be allowed 'a-z, A-Z, 0-9, - and _ only'
dna_sample_name: DNA:0546789 # description says "Sample names must ... contain a-z, A-Z, 0-9, - and _ only." but there's no patten constraint
dna_seq_project: xxx
dna_seq_project_name: xxx
dna_seq_project_pi: xxx
Expand Down

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
jgi_mt_data:
- analysis_type:
- metatranscriptomics
- samp_name: sample name
source_mat_id: MPI:012345
dnase_rna: "no"
proposal_rna: '504000'
rna_concentration: 100.3
Expand All @@ -12,9 +12,9 @@ jgi_mt_data:
rna_samp_id: '123456'
rna_sample_format: MDA reaction buffer
rna_sample_name: JGI_lagoon_14343
rna_seq_project: An RNA Sequencing Project # Values will be prefilled by NMDC. Appears to accept any string.
rna_seq_project: '123456789'
rna_seq_project_name: JGI Lagoon metatranscritpomics
rna_seq_project_pi: Patty Smith
rna_volume: 25.1
samp_name: sample name
source_mat_id: MPI:012345
rna_volume: 5.1 # comments say "This form accepts values < 25, but JGI may refuse to process them unless permission has been granted by a project manager" but minimum_value says 0
analysis_type:
- metatranscriptomics

0 comments on commit 4954e08

Please sign in to comment.