Skip to content

Commit

Permalink
WIP: support eventual "quiet" use through SPP
Browse files Browse the repository at this point in the history
  • Loading branch information
AmandaBirmingham committed Sep 16, 2024
1 parent 07e5d4b commit 8403915
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 15 deletions.
7 changes: 4 additions & 3 deletions qiimp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
FUNCTION_KEY, PRE_TRANSFORMERS_KEY, POST_TRANSFORMERS_KEY, \
extract_config_dict, deepcopy_dict, load_df_with_best_fit_encoding
from qiimp.src.metadata_extender import \
write_extended_metadata, write_extended_metadata_from_df
write_extended_metadata, write_extended_metadata_from_df, \
write_quiet_extended_metadata_from_df
from qiimp.src.metadata_merger import merge_sample_and_subject_metadata
from qiimp.src.metadata_transformers import format_a_datetime

Expand All @@ -19,8 +20,8 @@
"extract_config_dict",
"deepcopy_dict", "load_df_with_best_fit_encoding",
"merge_sample_and_subject_metadata",
"write_extended_metadata",
"write_extended_metadata_from_df",
"write_extended_metadata", "write_extended_metadata_from_df",
"write_quiet_extended_metadata_from_df",
"format_a_datetime"]

from . import _version
Expand Down
3 changes: 1 addition & 2 deletions qiimp/src/metadata_configurator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from typing import Dict, Optional
from qiimp.src.util import extract_config_dict, extract_stds_config, \
deepcopy_dict, \
from qiimp.src.util import extract_stds_config, deepcopy_dict, \
METADATA_FIELDS_KEY, STUDY_SPECIFIC_METADATA_KEY, \
HOST_TYPE_SPECIFIC_METADATA_KEY, \
SAMPLE_TYPE_SPECIFIC_METADATA_KEY, ALIAS_KEY, BASE_TYPE_KEY, \
Expand Down
49 changes: 39 additions & 10 deletions qiimp/src/metadata_extender.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,42 @@ def write_extended_metadata_from_df(
study_specific_transformers_dict=None, sep="\t",
suppress_empty_fails=False, internal_col_names=None):

out_ext = get_extension(sep)
metadata_df, validation_msgs = write_quiet_extended_metadata_from_df(
raw_metadata_df, study_specific_config_dict, out_dir, out_name_base,
out_ext, sep=sep, suppress_empty_fails=suppress_empty_fails,
study_specific_transformers_dict=study_specific_transformers_dict,
internal_col_names=internal_col_names, use_timestamp=True)
output_validation_msgs(validation_msgs, out_dir, out_name_base, sep=",",
suppress_empty_fails=suppress_empty_fails)
return metadata_df


def write_quiet_extended_metadata_from_df(
raw_metadata_df, study_specific_config_dict, out_dir, out_name_base,
out_ext, study_specific_transformers_dict=None, sep="\t",
suppress_empty_fails=False, internal_col_names=None,
use_timestamp=True):

if internal_col_names is None:
internal_col_names = INTERNAL_COL_KEYS

metadata_df, validation_msgs = _extend_metadata_from_df(
raw_metadata_df, study_specific_config_dict,
study_specific_transformers_dict=study_specific_transformers_dict)

_output_to_df(metadata_df, out_dir, out_name_base, out_ext,
internal_col_names, sep=sep, remove_internals=True,
suppress_empty_fails=suppress_empty_fails,
use_timestamp=use_timestamp)

return metadata_df, validation_msgs


def _extend_metadata_from_df(
raw_metadata_df, study_specific_config_dict,
study_specific_transformers_dict=None):

validate_required_columns_exist(
raw_metadata_df, REQUIRED_RAW_METADATA_FIELDS,
"metadata missing required columns")
Expand All @@ -105,12 +138,7 @@ def write_extended_metadata_from_df(
raw_metadata_df, study_specific_transformers_dict,
study_specific_config_dict)

_output_to_df(metadata_df, out_dir, out_name_base,
internal_col_names, remove_internals=True, sep=sep,
suppress_empty_fails=suppress_empty_fails)
output_validation_msgs(validation_msgs, out_dir, out_name_base, sep=",",
suppress_empty_fails=suppress_empty_fails)
return metadata_df
return metadata_df, validation_msgs


def _populate_metadata_df(
Expand Down Expand Up @@ -360,12 +388,11 @@ def _fill_na_if_default(metadata_df, specific_dict, settings_dict):
return metadata_df


def _output_to_df(a_df, out_dir, out_base, internal_col_names,
def _output_to_df(a_df, out_dir, out_base, out_ext, internal_col_names,
sep="\t", remove_internals=False,
suppress_empty_fails=False):
suppress_empty_fails=False, use_timestamp=True):

timestamp_str = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
extension = get_extension(sep)

# sort columns alphabetically
a_df = a_df.reindex(sorted(a_df.columns), axis=1)
Expand Down Expand Up @@ -398,7 +425,9 @@ def _output_to_df(a_df, out_dir, out_base, internal_col_names,
col_names.insert(0, col_names.pop(col_names.index(SAMPLE_NAME_KEY)))
output_df = a_df.loc[:, col_names].copy()

out_fp = os.path.join(out_dir, f"{timestamp_str}_{out_base}.{extension}")
out_fname = f"{out_base}.{out_ext}"
out_fname = f"{timestamp_str}_{out_fname}" if use_timestamp else out_fname
out_fp = os.path.join(out_dir, out_fname)
output_df.to_csv(out_fp, sep=sep, index=False)


Expand Down

0 comments on commit 8403915

Please sign in to comment.