-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathhooks.py
59 lines (44 loc) · 1.77 KB
/
hooks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import logging
import time
from typing import Callable, Tuple, Any
from kedro.framework.hooks import hook_impl
from kedro.pipeline.node import Node
log = logging.getLogger(__name__)
class TimeDatasetLoadingHooks:
def __init__(self):
self._start_times = {}
@hook_impl
def before_dataset_loaded(self, dataset_name: str) -> None:
self._start_times[dataset_name] = time.time()
@hook_impl
def after_dataset_loaded(self, dataset_name: str) -> None:
elapsed_time = time.time() - self._start_times[dataset_name]
log.info(f"Loading `{dataset_name}` took {elapsed_time:.3} seconds")
import inspect
import pandas as pd
class InspectHooks:
@hook_impl
def before_node_run(self, node: Node) -> None:
if "no_inspect" in node.tags:
return
node_name = node.name
location, number_lines = _inspect_func(node.func)
log.info(
f"`{node_name}` is defined at {location} and is {number_lines} lines long"
)
@hook_impl
def after_dataset_loaded(self, dataset_name: str, data: Any) -> None:
if isinstance(data, pd.DataFrame):
log.info(f"{dataset_name} has shape {data.shape}")
def _inspect_func(func: Callable) -> Tuple[str, int]:
"""Gives the location (file and line number) and number of lines in `func`."""
file = inspect.getsourcefile(func)
lines, first_line = inspect.getsourcelines(func)
location = f"{file}:{first_line}"
return location, len(lines)
def _inspect_func(func: Callable) -> Tuple[str, int]:
"""Gives the location (file and line number) and number of lines in `func`."""
file = inspect.getsourcefile(func)
lines, first_line = inspect.getsourcelines(func)
location = f"{file}:{first_line}"
return location, len(lines)