Skip to content

Commit

Permalink
Merge pull request #45 from microbiomedata/19-super-issue-updates-to-…
Browse files Browse the repository at this point in the history
…mutts-for-emsl-export

Resolve super issue #19: Updates to MUTTs for EMSL export
  • Loading branch information
Natalie-Winans authored Jan 14, 2025
2 parents 9803813 + f8e3114 commit ad12664
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 9 deletions.
11 changes: 8 additions & 3 deletions etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,14 @@ def retrieve_metadata_records(self, unique_field: str) -> pd.DataFrame:
df["collection_month"] = df["collection_date"].str.split("-").str[1]
df["collection_day"] = df["collection_date"].str.split("-").str[2]

df["collection_month_name"] = df["collection_month"].apply(
lambda x: calendar.month_name[int(x)]
)
# Safely map collection_month to month_name (account for NaN values)
def get_month_name(month):
try:
return calendar.month_name[int(month)]
except (ValueError, TypeError):
return "" # return empty string for invalid cases

df["collection_month_name"] = df["collection_month"].apply(get_month_name)

return df

Expand Down
12 changes: 6 additions & 6 deletions input-files/emsl_header.json
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
"1": "Total amount of sample sent to EMSL, include units",
"2": "{value}{text}",
"header": "shipped_sample_size",
"sub_port_mapping": "shipped_sample_size"
"sub_port_mapping": "sample_shipped"
},
"Collection Date": {
"1": "The date of sampling, either as an instance (single point) or interval.",
Expand All @@ -81,7 +81,7 @@
"1": "Detail how your sample was stored",
"2": "{YYYY-MM-DD}",
"header": "storage_condt",
"sub_port_mapping": "storage_condt"
"sub_port_mapping": "store_cond"
},
"Other Storage Condition": {
"1": "Please specify if \"other\"",
Expand All @@ -93,7 +93,7 @@
"1": "Details what temperature samples should be stored at",
"2": "enumeration",
"header": "samp_store_temp",
"sub_port_mapping": "samp_store_temp"
"sub_port_mapping": "emsl_store_temp"
},
"Other Storage temperature": {
"1": "Please specify if \"other\"",
Expand Down Expand Up @@ -123,13 +123,13 @@
"1": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed.",
"2": "{text}|{termLabel} {[termID]}",
"header": "sample_processing",
"sub_port_mapping": "sample_processing"
"sub_port_mapping": "samp_mat_process"
},
"sample collection method": {
"1": "The method employed for collecting the sample. This can be a citation or description",
"2": "{PMID}|{DOI}|{URL}|{text}",
"header": "sample_collection_method",
"sub_port_mapping": "sample_collection_method"
"sub_port_mapping": "samp_collec_method"
},
"depth": {
"1": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples.",
Expand All @@ -141,7 +141,7 @@
"1": "The device used to collect an environmental sample. Include dimensions of device if applicable",
"2": "{text} {dimensions}",
"header": "sample_collection_dev",
"sub_port_mapping": "sample_collection_dev"
"sub_port_mapping": "samp_collec_device"
},
"Sieve Size": {
"1": "Collection design of pooled samples and/or sieve size and amount of sample sieved",
Expand Down

0 comments on commit ad12664

Please sign in to comment.