-
Notifications
You must be signed in to change notification settings - Fork 1
/
run.py
77 lines (61 loc) · 2.4 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from argparse import ArgumentParser
from os import getcwd
from os.path import join
from pathlib import Path
from src.utils import get_config, get_data_sets
CONFIG = get_config(join(getcwd(), "config", "config.yml"))
def main(cmd_args):
if cmd_args.download or cmd_args.extract:
import urllib.request
from src.utils import get_config, get_links
from src.dataset_handler import DataSetsHandler
with urllib.request.urlopen(CONFIG["data_sets_url"]) as response:
imdb_page_content = response.read()
data_sets = get_data_sets(
urls=get_links(imdb_page_content, CONFIG), root=Path(cmd_args.root)
)
handler = DataSetsHandler(data_sets)
if cmd_args.download:
handler.download()
if cmd_args.extract:
handler.extract()
if cmd_args.parse:
from src.dataset_parser import DatasetParser
parser = DatasetParser(cmd_args, config=CONFIG)
parser.parse_dataset()
if cmd_args.load:
from src.dataset_loader import DatasetLoader
loader = DatasetLoader(cmd_args, config=CONFIG)
loader.db_init()
loader.load_dataset()
if __name__ == "__main__":
cmd_line_parser = ArgumentParser()
cmd_line_parser.add_argument(
"--root",
"-r",
help="Directory where data sets will be downloaded",
required=True,
)
cmd_line_parser.add_argument("--download", "-d", action="store_true")
cmd_line_parser.add_argument("--extract", "-x", action="store_true")
cmd_line_parser.add_argument("--parse", "-p", action="store_true")
cmd_line_parser.add_argument("--load", "-l", action="store_true")
cmd_line_parser.add_argument(
"--dburi", "-db", default=CONFIG["default_database_uri"], help="Database URI"
)
cmd_line_parser.add_argument(
"--resume",
choices=["name", "principal", "rating"],
default=None,
help="Start parsing not from first table",
)
cmd_line_parser.add_argument("--debug", "-dd", action="store_true")
cmd_line_parser.add_argument("--quiet", "-q", action="store_true")
args = cmd_line_parser.parse_args()
print(args)
main(args)
# TODO: implement click for better cli experience
# TODO: implement alembic, invoke
# TODO: investigate polling db operation to get progress
# TODO: implement pytest instead of UnitTest
# TODO: implement rich (colored text)