-
Notifications
You must be signed in to change notification settings - Fork 187
/
dvc.yaml
75 lines (75 loc) · 1.62 KB
/
dvc.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
artifacts:
stackoverflow-dataset:
path: data/data.xml
type: dataset
desc: Initial XML StackOverflow dataset (raw data)
text-classification:
path: model.pkl
desc: Detect whether the given stackoverflow question should have R language tag
type: model
labels:
- nlp
- classification
- stackoverflow
stages:
prepare:
cmd: python src/prepare.py data/data.xml
deps:
- data/data.xml
- src/prepare.py
params:
- prepare.seed
- prepare.split
outs:
- data/prepared
featurize:
cmd: python src/featurization.py data/prepared data/features
deps:
- data/prepared
- src/featurization.py
params:
- featurize.max_features
- featurize.ngrams
outs:
- data/features
train:
cmd: python src/train.py data/features model.pkl
deps:
- data/features
- src/train.py
params:
- train.min_split
- train.n_est
- train.seed
outs:
- model.pkl
evaluate:
cmd: python src/evaluate.py model.pkl data/features
deps:
- data/features
- model.pkl
- src/evaluate.py
outs:
- eval
metrics:
- eval/metrics.json
plots:
- ROC:
template: simple
x: fpr
y:
eval/plots/sklearn/roc/train.json: tpr
eval/plots/sklearn/roc/test.json: tpr
- Confusion-Matrix:
template: confusion
x: actual
y:
eval/plots/sklearn/cm/train.json: predicted
eval/plots/sklearn/cm/test.json: predicted
- Precision-Recall:
template: simple
x: recall
y:
eval/plots/sklearn/prc/train.json: precision
eval/plots/sklearn/prc/test.json: precision
- eval/plots/images/importance.png