-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathrun_inference.py
95 lines (69 loc) · 3.71 KB
/
run_inference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import argparse
from pathlib import Path
from pprint import pprint
from typing import Literal
from inference_pipeline.inference import InferenceCfg, inference_pipeline, FetchFromCacheCfg
from utils.azure_storage import download_meeting_subset, download_models
from utils.conf import load_yaml_to_dataclass, update_dataclass
ConfigName = Literal['full_dev_set_mc', 'full_dev_set_sc', 'dev_set_mc_debug']
def get_project_root() -> Path:
""" Returns project root folder """
return Path(__file__).parent
def load_config(config_name: ConfigName) -> InferenceCfg:
""" Returns the config file path and session query for the given config name """
project_root = get_project_root()
updates = {}
if config_name == 'full_dev_set_mc':
# all multi-channel (MC) dev-set sessions
conf_file = project_root / 'configs/inference/inference_v1.yaml'
session_query = "is_mc == True" # filter only MC
elif config_name == 'full_dev_set_sc':
# all single-channel (SC) dev-set sessions
conf_file = project_root / 'configs/inference/inference_v1.yaml'
session_query = "is_mc == False" # filter only SC
elif config_name == 'dev_set_mc_debug':
# for quick debug: 'tiny' Whisper, one MC (multi-channel) session
conf_file = project_root / 'configs/inference/debug_inference.yaml'
session_query = 'device_name == "plaza_0" and is_mc == True and meeting_id == "MTG_30860"'
else:
raise ValueError(f'unknown config name: {config_name}')
cfg: InferenceCfg = load_yaml_to_dataclass(str(conf_file), InferenceCfg)
cfg = update_dataclass(cfg, updates)
if session_query is not None:
assert cfg.session_query is None, 'overriding session_query from yaml'
cfg.session_query = session_query
return cfg
def main(config_name: ConfigName = 'dev_set_mc_debug', output_dir: str = ""):
project_root = get_project_root()
cfg: InferenceCfg = load_config(config_name)
# download the entire dev-set (all sessions, multi-channel and single-channel)
meetings_root = project_root / 'artifacts' / 'meeting_data'
dev_meetings_dir = download_meeting_subset(subset_name='dev_set',
version='240825.1_dev1', # dev-set-1, GT included
destination_dir=str(meetings_root))
if dev_meetings_dir is None:
raise RuntimeError('failed to download benchmark dataset')
# download models
models_dir = project_root / 'artifacts' / 'css_models'
download_models(destination_dir=str(models_dir))
# outputs per module will be written here
outputs_dir = (project_root if output_dir == "" else Path(output_dir)) / 'artifacts' / 'outputs'
cache_cfg = FetchFromCacheCfg() # no cache, use this at your own risk.
exp_name = ('pass_through' if cfg.css.pass_through_ch0 else 'css') + '_' + cfg.asr.model_name
outputs_dir = outputs_dir / exp_name
pprint(f'{config_name=}')
pprint(cfg)
# run inference pipeline
inference_pipeline(meetings_dir=str(dev_meetings_dir),
models_dir=str(models_dir),
out_dir=str(outputs_dir),
cfg=cfg,
cache=cache_cfg)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Run inference pipeline')
parser.add_argument('--config-name', type=str, default="dev_set_mc_debug",
help='Config scenario for the inference, default: dev_set_mc_debug')
parser.add_argument('--output-dir', type=str, default="",
help='Output directory path, default: ./artifacts/outputs')
args = parser.parse_args()
main(args.config_name, args.output_dir)