-
Notifications
You must be signed in to change notification settings - Fork 7
/
metadata.py
57 lines (45 loc) · 1.92 KB
/
metadata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import logging
from typing import Optional
import numpy as np
import os
import pandas as pd
def read_metadata_table(path: str) -> Optional[pd.DataFrame]:
"""Read a subtype metadata table into a Pandas DataFrame.
This table must have a column labeled `subtype` and have one of the following file extensions:
- `.tab` - tab-delimited file
- `.tsv` - tab-delimited file
- `.csv` - comma-separated values file
The top row must be the header row.
Args:
path: File path of table.
Returns:
DataFrame of table file if `path` is one of the acceptable file formats, otherwise, return `None`
"""
FILE_EXT_TO_PD_READ_FUNC = {
'.tab': pd.read_table,
'.tsv': pd.read_table,
'.csv': pd.read_csv
}
_, file_ext = os.path.splitext(os.path.basename(path))
file_ext = file_ext.lower()
if file_ext not in FILE_EXT_TO_PD_READ_FUNC:
logging.error('File extension of metadata file "{}" not one of the expected "{}"'.format(
path,
list(FILE_EXT_TO_PD_READ_FUNC.keys())
))
return None
dfmd: pd.DataFrame = FILE_EXT_TO_PD_READ_FUNC[file_ext](path)
assert np.any(dfmd.columns == 'subtype'), 'Column with name "subtype" expected in metadata file "{}"'.format(path)
dfmd.subtype.fillna('#N/A', inplace=True)
dfmd.subtype = dfmd.subtype.astype(str)
logging.info('Read scheme metadata file "{}" into DataFrame with shape {}'.format(path, dfmd.shape))
return dfmd
def merge_results_with_metadata(df_results: pd.DataFrame, df_metadata: pd.DataFrame) -> pd.DataFrame:
"""Merge subtype results table with metadata table.
Args:
df_results: Subtyping results table.
df_metadata: Subtype metadata table.
Returns:
Subtyping results with subtype metadata merged in if metadata is present for subtype results.
"""
return pd.merge(df_results, df_metadata, how='left', on='subtype')