From ed51f26a7b670087b5554935dd776fae93d1fbe5 Mon Sep 17 00:00:00 2001 From: mattseddon <37993418+mattseddon@users.noreply.github.com> Date: Fri, 21 Oct 2022 06:40:34 +1100 Subject: [PATCH] Add survival exp show test fixture (#2638) * add survivial test fixture (https://dagshub.com/kingabzpro/kaggle-titanic-dvc) * add survival story --- .../src/experiments/columns/model.test.ts | 8 + extension/src/experiments/model/index.test.ts | 10 + .../test/fixtures/expShow/survival/columns.ts | 1607 +++++++++++++++++ .../test/fixtures/expShow/survival/output.ts | 1232 +++++++++++++ .../test/fixtures/expShow/survival/rows.ts | 1098 +++++++++++ .../fixtures/expShow/survival/tableData.ts | 19 + webview/src/stories/Table.stories.tsx | 6 + 7 files changed, 3980 insertions(+) create mode 100644 extension/src/test/fixtures/expShow/survival/columns.ts create mode 100644 extension/src/test/fixtures/expShow/survival/output.ts create mode 100644 extension/src/test/fixtures/expShow/survival/rows.ts create mode 100644 extension/src/test/fixtures/expShow/survival/tableData.ts diff --git a/extension/src/experiments/columns/model.test.ts b/extension/src/experiments/columns/model.test.ts index 46ad35c186..b09fab983c 100644 --- a/extension/src/experiments/columns/model.test.ts +++ b/extension/src/experiments/columns/model.test.ts @@ -17,6 +17,8 @@ import deeplyNestedColumnsFixture from '../../test/fixtures/expShow/deeplyNested import deeplyNestedOutputFixture from '../../test/fixtures/expShow/deeplyNested/output' import dataTypesColumnsFixture from '../../test/fixtures/expShow/dataTypes/columns' import dataTypesOutputFixture from '../../test/fixtures/expShow/dataTypes/output' +import survivalOutputFixture from '../../test/fixtures/expShow/survival/output' +import survivalColumnsFixture from '../../test/fixtures/expShow/survival/columns' import { getConfigValue } from '../../vscode/config' jest.mock('../../vscode/config') @@ -38,6 +40,12 @@ describe('ColumnsModel', () => { expect(model.getSelected()).toStrictEqual(columnsFixture) }) + it('should return the expected columns when given the survival output fixture', async () => { + const model = new ColumnsModel('', buildMockMemento()) + await model.transformAndSet(survivalOutputFixture) + expect(model.getSelected()).toStrictEqual(survivalColumnsFixture) + }) + it('should return the expected columns when given the deeply nested output fixture', async () => { const model = new ColumnsModel('', buildMockMemento()) await model.transformAndSet(deeplyNestedOutputFixture) diff --git a/extension/src/experiments/model/index.test.ts b/extension/src/experiments/model/index.test.ts index 9300d64fc0..b4b1c8c881 100644 --- a/extension/src/experiments/model/index.test.ts +++ b/extension/src/experiments/model/index.test.ts @@ -15,6 +15,9 @@ import { Experiment, ColumnType } from '../webview/contract' import { definedAndNonEmpty } from '../../util/array' import dataTypesRowsFixture from '../../test/fixtures/expShow/dataTypes/rows' import dataTypesOutputFixture from '../../test/fixtures/expShow/dataTypes/output' +import survivalOutputFixture from '../../test/fixtures/expShow/survival/output' +import survivalRowsFixture from '../../test/fixtures/expShow/survival/rows' + import { ExperimentStatus } from '../../cli/dvc/contract' jest.mock('vscode') @@ -74,6 +77,13 @@ describe('ExperimentsModel', () => { expect(model.getRowData()).toStrictEqual(rowsFixture) }) + it('should return the expected rows when given the survival fixture', () => { + const model = new ExperimentsModel('', buildMockMemento()) + model.transformAndSet(survivalOutputFixture) + + expect(model.getRowData()).toStrictEqual(survivalRowsFixture) + }) + // eslint-disable-next-line sonarjs/cognitive-complexity it('should handle a new dep file being introduced in the workspace', () => { const newDep = join('data', '.ldb_workspace') diff --git a/extension/src/test/fixtures/expShow/survival/columns.ts b/extension/src/test/fixtures/expShow/survival/columns.ts new file mode 100644 index 0000000000..7fa01c7be9 --- /dev/null +++ b/extension/src/test/fixtures/expShow/survival/columns.ts @@ -0,0 +1,1607 @@ +import { timestampColumn } from '../../../../experiments/columns/constants' +import { + buildDepPath, + buildMetricOrParamPath +} from '../../../../experiments/columns/paths' +import { Column, ColumnType } from '../../../../experiments/webview/contract' +import { join } from '../../../util/path' + +const data: Column[] = [ + timestampColumn, + { + hasChildren: true, + label: join('results', 'metrics.json'), + parentPath: ColumnType.METRICS, + path: buildMetricOrParamPath( + ColumnType.METRICS, + join('results', 'metrics.json') + ), + type: ColumnType.METRICS + }, + { + hasChildren: false, + label: 'fit_time', + maxStringLength: 18, + parentPath: buildMetricOrParamPath( + ColumnType.METRICS, + join('results', 'metrics.json') + ), + path: buildMetricOrParamPath( + ColumnType.METRICS, + join('results', 'metrics.json'), + 'fit_time' + ), + pathArray: [ + ColumnType.METRICS, + join('results', 'metrics.json'), + 'fit_time' + ], + type: ColumnType.METRICS, + types: ['number'], + maxNumber: 0.6337410688400269, + minNumber: 0.6337410688400269 + }, + { + hasChildren: false, + label: 'score_time', + maxStringLength: 19, + parentPath: buildMetricOrParamPath( + ColumnType.METRICS, + join('results', 'metrics.json') + ), + path: buildMetricOrParamPath( + ColumnType.METRICS, + join('results', 'metrics.json'), + 'score_time' + ), + pathArray: [ + ColumnType.METRICS, + join('results', 'metrics.json'), + 'score_time' + ], + type: ColumnType.METRICS, + types: ['number'], + maxNumber: 0.07778854370117187, + minNumber: 0.07778854370117187 + }, + { + hasChildren: false, + label: 'accuracy', + maxStringLength: 18, + parentPath: buildMetricOrParamPath( + ColumnType.METRICS, + join('results', 'metrics.json') + ), + path: buildMetricOrParamPath( + ColumnType.METRICS, + join('results', 'metrics.json'), + 'accuracy' + ), + pathArray: [ + ColumnType.METRICS, + join('results', 'metrics.json'), + 'accuracy' + ], + type: ColumnType.METRICS, + types: ['number'], + maxNumber: 0.8293632958801498, + minNumber: 0.8293632958801498 + }, + { + hasChildren: false, + label: 'balanced_accuracy', + maxStringLength: 18, + parentPath: buildMetricOrParamPath( + ColumnType.METRICS, + join('results', 'metrics.json') + ), + path: buildMetricOrParamPath( + ColumnType.METRICS, + join('results', 'metrics.json'), + 'balanced_accuracy' + ), + pathArray: [ + ColumnType.METRICS, + join('results', 'metrics.json'), + 'balanced_accuracy' + ], + type: ColumnType.METRICS, + types: ['number'], + maxNumber: 0.8040020654726536, + minNumber: 0.8040020654726536 + }, + { + hasChildren: false, + label: 'f1', + maxStringLength: 18, + parentPath: buildMetricOrParamPath( + ColumnType.METRICS, + join('results', 'metrics.json') + ), + path: buildMetricOrParamPath( + ColumnType.METRICS, + join('results', 'metrics.json'), + 'f1' + ), + pathArray: [ColumnType.METRICS, join('results', 'metrics.json'), 'f1'], + type: ColumnType.METRICS, + types: ['number'], + maxNumber: 0.7572265847252886, + minNumber: 0.7572265847252886 + }, + { + hasChildren: false, + label: 'gmpr', + maxStringLength: 18, + parentPath: buildMetricOrParamPath( + ColumnType.METRICS, + join('results', 'metrics.json') + ), + path: buildMetricOrParamPath( + ColumnType.METRICS, + join('results', 'metrics.json'), + 'gmpr' + ), + pathArray: [ColumnType.METRICS, join('results', 'metrics.json'), 'gmpr'], + type: ColumnType.METRICS, + types: ['number'], + maxNumber: 0.7615174102573903, + minNumber: 0.7615174102573903 + }, + { + hasChildren: false, + label: 'jaccard', + maxStringLength: 18, + parentPath: buildMetricOrParamPath( + ColumnType.METRICS, + join('results', 'metrics.json') + ), + path: buildMetricOrParamPath( + ColumnType.METRICS, + join('results', 'metrics.json'), + 'jaccard' + ), + pathArray: [ColumnType.METRICS, join('results', 'metrics.json'), 'jaccard'], + type: ColumnType.METRICS, + types: ['number'], + maxNumber: 0.6113136909663465, + minNumber: 0.6113136909663465 + }, + { + hasChildren: false, + label: 'precision', + maxStringLength: 18, + parentPath: buildMetricOrParamPath( + ColumnType.METRICS, + join('results', 'metrics.json') + ), + path: buildMetricOrParamPath( + ColumnType.METRICS, + join('results', 'metrics.json'), + 'precision' + ), + pathArray: [ + ColumnType.METRICS, + join('results', 'metrics.json'), + 'precision' + ], + type: ColumnType.METRICS, + types: ['number'], + maxNumber: 0.8361572183378356, + minNumber: 0.8361572183378356 + }, + { + hasChildren: false, + label: 'recall', + maxStringLength: 17, + parentPath: buildMetricOrParamPath( + ColumnType.METRICS, + join('results', 'metrics.json') + ), + path: buildMetricOrParamPath( + ColumnType.METRICS, + join('results', 'metrics.json'), + 'recall' + ), + pathArray: [ColumnType.METRICS, join('results', 'metrics.json'), 'recall'], + type: ColumnType.METRICS, + types: ['number'], + maxNumber: 0.695546218487395, + minNumber: 0.695546218487395 + }, + { + hasChildren: false, + label: 'roc_auc', + maxStringLength: 18, + parentPath: buildMetricOrParamPath( + ColumnType.METRICS, + join('results', 'metrics.json') + ), + path: buildMetricOrParamPath( + ColumnType.METRICS, + join('results', 'metrics.json'), + 'roc_auc' + ), + pathArray: [ColumnType.METRICS, join('results', 'metrics.json'), 'roc_auc'], + type: ColumnType.METRICS, + types: ['number'], + maxNumber: 0.8703211951447246, + minNumber: 0.8703211951447246 + }, + { + hasChildren: true, + label: 'params.yaml', + parentPath: ColumnType.PARAMS, + path: buildMetricOrParamPath(ColumnType.PARAMS, 'params.yaml'), + type: ColumnType.PARAMS + }, + { + hasChildren: false, + label: 'classifier', + maxStringLength: 13, + parentPath: buildMetricOrParamPath(ColumnType.PARAMS, 'params.yaml'), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'classifier' + ), + pathArray: [ColumnType.PARAMS, 'params.yaml', 'classifier'], + type: ColumnType.PARAMS, + types: ['string'] + }, + { + hasChildren: false, + label: 'drop_cols', + maxStringLength: 17, + parentPath: buildMetricOrParamPath(ColumnType.PARAMS, 'params.yaml'), + path: buildMetricOrParamPath(ColumnType.PARAMS, 'params.yaml', 'drop_cols'), + pathArray: [ColumnType.PARAMS, 'params.yaml', 'drop_cols'], + type: ColumnType.PARAMS, + types: ['array'] + }, + { + hasChildren: true, + label: 'dtypes', + parentPath: buildMetricOrParamPath(ColumnType.PARAMS, 'params.yaml'), + path: buildMetricOrParamPath(ColumnType.PARAMS, 'params.yaml', 'dtypes'), + type: ColumnType.PARAMS + }, + { + hasChildren: false, + label: 'Age', + maxStringLength: 5, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'dtypes' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'dtypes', + 'Age' + ), + pathArray: [ColumnType.PARAMS, 'params.yaml', 'dtypes', 'Age'], + type: ColumnType.PARAMS, + types: ['string'] + }, + { + hasChildren: false, + label: 'Embarked', + maxStringLength: 8, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'dtypes' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'dtypes', + 'Embarked' + ), + pathArray: [ColumnType.PARAMS, 'params.yaml', 'dtypes', 'Embarked'], + type: ColumnType.PARAMS, + types: ['string'] + }, + { + hasChildren: false, + label: 'Fare', + maxStringLength: 5, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'dtypes' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'dtypes', + 'Fare' + ), + pathArray: [ColumnType.PARAMS, 'params.yaml', 'dtypes', 'Fare'], + type: ColumnType.PARAMS, + types: ['string'] + }, + { + hasChildren: false, + label: 'Parch', + maxStringLength: 3, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'dtypes' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'dtypes', + 'Parch' + ), + pathArray: [ColumnType.PARAMS, 'params.yaml', 'dtypes', 'Parch'], + type: ColumnType.PARAMS, + types: ['string'] + }, + { + hasChildren: false, + label: 'Pclass', + maxStringLength: 8, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'dtypes' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'dtypes', + 'Pclass' + ), + pathArray: [ColumnType.PARAMS, 'params.yaml', 'dtypes', 'Pclass'], + type: ColumnType.PARAMS, + types: ['string'] + }, + { + hasChildren: false, + label: 'Sex', + maxStringLength: 8, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'dtypes' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'dtypes', + 'Sex' + ), + pathArray: [ColumnType.PARAMS, 'params.yaml', 'dtypes', 'Sex'], + type: ColumnType.PARAMS, + types: ['string'] + }, + { + hasChildren: false, + label: 'SibSp', + maxStringLength: 3, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'dtypes' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'dtypes', + 'SibSp' + ), + pathArray: [ColumnType.PARAMS, 'params.yaml', 'dtypes', 'SibSp'], + type: ColumnType.PARAMS, + types: ['string'] + }, + { + hasChildren: false, + label: 'Survived', + maxStringLength: 8, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'dtypes' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'dtypes', + 'Survived' + ), + pathArray: [ColumnType.PARAMS, 'params.yaml', 'dtypes', 'Survived'], + type: ColumnType.PARAMS, + types: ['string'] + }, + { + hasChildren: true, + label: 'feature_eng', + parentPath: buildMetricOrParamPath(ColumnType.PARAMS, 'params.yaml'), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'feature_eng' + ), + type: ColumnType.PARAMS + }, + { + hasChildren: false, + label: 'featurize', + maxStringLength: 4, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'feature_eng' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'feature_eng', + 'featurize' + ), + pathArray: [ColumnType.PARAMS, 'params.yaml', 'feature_eng', 'featurize'], + type: ColumnType.PARAMS, + types: ['boolean'] + }, + { + hasChildren: true, + label: 'imputation', + parentPath: buildMetricOrParamPath(ColumnType.PARAMS, 'params.yaml'), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'imputation' + ), + type: ColumnType.PARAMS + }, + { + hasChildren: false, + label: 'Age', + maxStringLength: 7, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'imputation' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'imputation', + 'Age' + ), + pathArray: [ColumnType.PARAMS, 'params.yaml', 'imputation', 'Age'], + type: ColumnType.PARAMS, + types: ['number'], + maxNumber: 29.6991, + minNumber: 29.6991 + }, + { + hasChildren: false, + label: 'Fare', + maxStringLength: 7, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'imputation' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'imputation', + 'Fare' + ), + pathArray: [ColumnType.PARAMS, 'params.yaml', 'imputation', 'Fare'], + type: ColumnType.PARAMS, + types: ['number'], + maxNumber: 32.2042, + minNumber: 32.2042 + }, + { + hasChildren: false, + label: 'method', + maxStringLength: 4, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'imputation' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'imputation', + 'method' + ), + pathArray: [ColumnType.PARAMS, 'params.yaml', 'imputation', 'method'], + type: ColumnType.PARAMS, + types: ['string'] + }, + { + hasChildren: true, + label: 'model_params', + parentPath: buildMetricOrParamPath(ColumnType.PARAMS, 'params.yaml'), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'model_params' + ), + type: ColumnType.PARAMS + }, + { + hasChildren: false, + label: 'logistic_regression', + maxStringLength: 4, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'model_params' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'logistic_regression' + ), + pathArray: [ + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'logistic_regression' + ], + type: ColumnType.PARAMS, + types: ['null'] + }, + { + hasChildren: false, + label: 'naive_bayes', + maxStringLength: 4, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'model_params' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'naive_bayes' + ), + pathArray: [ + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'naive_bayes' + ], + type: ColumnType.PARAMS, + types: ['null'] + }, + { + hasChildren: false, + label: 'neural_network', + maxStringLength: 4, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'model_params' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'neural_network' + ), + pathArray: [ + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'neural_network' + ], + type: ColumnType.PARAMS, + types: ['null'] + }, + { + hasChildren: true, + label: 'random_forest', + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'model_params' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'random_forest' + ), + type: ColumnType.PARAMS + }, + { + hasChildren: false, + label: 'criterion', + maxStringLength: 4, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'random_forest' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'random_forest', + 'criterion' + ), + pathArray: [ + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'random_forest', + 'criterion' + ], + type: ColumnType.PARAMS, + types: ['string'] + }, + { + hasChildren: false, + label: 'max_depth', + maxStringLength: 2, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'random_forest' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'random_forest', + 'max_depth' + ), + pathArray: [ + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'random_forest', + 'max_depth' + ], + type: ColumnType.PARAMS, + types: ['number'], + maxNumber: 15, + minNumber: 15 + }, + { + hasChildren: false, + label: 'max_features', + maxStringLength: 4, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'random_forest' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'random_forest', + 'max_features' + ), + pathArray: [ + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'random_forest', + 'max_features' + ], + type: ColumnType.PARAMS, + types: ['string'] + }, + { + hasChildren: false, + label: 'min_samples_leaf', + maxStringLength: 1, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'random_forest' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'random_forest', + 'min_samples_leaf' + ), + pathArray: [ + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'random_forest', + 'min_samples_leaf' + ], + type: ColumnType.PARAMS, + types: ['number'], + maxNumber: 6, + minNumber: 6 + }, + { + hasChildren: false, + label: 'min_samples_split', + maxStringLength: 1, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'random_forest' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'random_forest', + 'min_samples_split' + ), + pathArray: [ + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'random_forest', + 'min_samples_split' + ], + type: ColumnType.PARAMS, + types: ['number'], + maxNumber: 9, + minNumber: 9 + }, + { + hasChildren: false, + label: 'n_estimators', + maxStringLength: 3, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'random_forest' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'random_forest', + 'n_estimators' + ), + pathArray: [ + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'random_forest', + 'n_estimators' + ], + type: ColumnType.PARAMS, + types: ['number'], + maxNumber: 460, + minNumber: 460 + }, + { + hasChildren: false, + label: 'support_vector_machine', + maxStringLength: 4, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'model_params' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'support_vector_machine' + ), + pathArray: [ + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'support_vector_machine' + ], + type: ColumnType.PARAMS, + types: ['null'] + }, + { + hasChildren: false, + label: 'xgboost', + maxStringLength: 4, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'model_params' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'model_params', + 'xgboost' + ), + pathArray: [ColumnType.PARAMS, 'params.yaml', 'model_params', 'xgboost'], + type: ColumnType.PARAMS, + types: ['null'] + }, + { + hasChildren: false, + label: 'normalize', + maxStringLength: 4, + parentPath: buildMetricOrParamPath(ColumnType.PARAMS, 'params.yaml'), + path: buildMetricOrParamPath(ColumnType.PARAMS, 'params.yaml', 'normalize'), + pathArray: [ColumnType.PARAMS, 'params.yaml', 'normalize'], + type: ColumnType.PARAMS, + types: ['null'] + }, + { + hasChildren: true, + label: 'param_tuning', + parentPath: buildMetricOrParamPath(ColumnType.PARAMS, 'params.yaml'), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning' + ), + type: ColumnType.PARAMS + }, + { + hasChildren: false, + label: 'logistic_regression', + maxStringLength: 4, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'logistic_regression' + ), + pathArray: [ + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'logistic_regression' + ], + type: ColumnType.PARAMS, + types: ['null'] + }, + { + hasChildren: false, + label: 'naive_bayes', + maxStringLength: 4, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'naive_bayes' + ), + pathArray: [ + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'naive_bayes' + ], + type: ColumnType.PARAMS, + types: ['null'] + }, + { + hasChildren: false, + label: 'neural_network', + maxStringLength: 4, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'neural_network' + ), + pathArray: [ + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'neural_network' + ], + type: ColumnType.PARAMS, + types: ['null'] + }, + { + hasChildren: false, + label: 'num_eval', + maxStringLength: 3, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'num_eval' + ), + pathArray: [ColumnType.PARAMS, 'params.yaml', 'param_tuning', 'num_eval'], + type: ColumnType.PARAMS, + types: ['number'], + maxNumber: 100, + minNumber: 100 + }, + { + hasChildren: true, + label: 'random_forest', + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'random_forest' + ), + type: ColumnType.PARAMS + }, + { + hasChildren: false, + label: 'criterion', + maxStringLength: 4, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'random_forest' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'random_forest', + 'criterion' + ), + pathArray: [ + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'random_forest', + 'criterion' + ], + type: ColumnType.PARAMS, + types: ['string'] + }, + { + hasChildren: false, + label: 'max_depth', + maxStringLength: 2, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'random_forest' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'random_forest', + 'max_depth' + ), + pathArray: [ + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'random_forest', + 'max_depth' + ], + type: ColumnType.PARAMS, + types: ['number'], + maxNumber: 15, + minNumber: 15 + }, + { + hasChildren: false, + label: 'max_features', + maxStringLength: 4, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'random_forest' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'random_forest', + 'max_features' + ), + pathArray: [ + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'random_forest', + 'max_features' + ], + type: ColumnType.PARAMS, + types: ['string'] + }, + { + hasChildren: false, + label: 'min_samples_leaf', + maxStringLength: 1, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'random_forest' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'random_forest', + 'min_samples_leaf' + ), + pathArray: [ + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'random_forest', + 'min_samples_leaf' + ], + type: ColumnType.PARAMS, + types: ['number'], + maxNumber: 6, + minNumber: 6 + }, + { + hasChildren: false, + label: 'min_samples_split', + maxStringLength: 1, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'random_forest' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'random_forest', + 'min_samples_split' + ), + pathArray: [ + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'random_forest', + 'min_samples_split' + ], + type: ColumnType.PARAMS, + types: ['number'], + maxNumber: 9, + minNumber: 9 + }, + { + hasChildren: false, + label: 'n_estimators', + maxStringLength: 3, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'random_forest' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'random_forest', + 'n_estimators' + ), + pathArray: [ + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'random_forest', + 'n_estimators' + ], + type: ColumnType.PARAMS, + types: ['number'], + maxNumber: 460, + minNumber: 460 + }, + { + hasChildren: false, + label: 'scoring', + maxStringLength: 8, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'scoring' + ), + pathArray: [ColumnType.PARAMS, 'params.yaml', 'param_tuning', 'scoring'], + type: ColumnType.PARAMS, + types: ['string'] + }, + { + hasChildren: false, + label: 'support_vector_machine', + maxStringLength: 4, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'support_vector_machine' + ), + pathArray: [ + ColumnType.PARAMS, + 'params.yaml', + 'param_tuning', + 'support_vector_machine' + ], + type: ColumnType.PARAMS, + types: ['null'] + }, + { + hasChildren: true, + label: 'predict', + parentPath: buildMetricOrParamPath(ColumnType.PARAMS, 'params.yaml'), + path: buildMetricOrParamPath(ColumnType.PARAMS, 'params.yaml', 'predict'), + type: ColumnType.PARAMS + }, + { + hasChildren: false, + label: 'js_estimator', + maxStringLength: 4, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'predict' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'predict', + 'js_estimator' + ), + pathArray: [ColumnType.PARAMS, 'params.yaml', 'predict', 'js_estimator'], + type: ColumnType.PARAMS, + types: ['boolean'] + }, + { + hasChildren: false, + label: 'random_seed', + maxStringLength: 5, + parentPath: buildMetricOrParamPath(ColumnType.PARAMS, 'params.yaml'), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'random_seed' + ), + pathArray: [ColumnType.PARAMS, 'params.yaml', 'random_seed'], + type: ColumnType.PARAMS, + types: ['number'], + maxNumber: 12345, + minNumber: 12345 + }, + { + hasChildren: true, + label: 'train_test_split', + parentPath: buildMetricOrParamPath(ColumnType.PARAMS, 'params.yaml'), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'train_test_split' + ), + type: ColumnType.PARAMS + }, + { + hasChildren: false, + label: 'n_split', + maxStringLength: 2, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'train_test_split' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'train_test_split', + 'n_split' + ), + pathArray: [ + ColumnType.PARAMS, + 'params.yaml', + 'train_test_split', + 'n_split' + ], + type: ColumnType.PARAMS, + types: ['number'], + maxNumber: 10, + minNumber: 10 + }, + { + hasChildren: false, + label: 'shuffle', + maxStringLength: 4, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'train_test_split' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'train_test_split', + 'shuffle' + ), + pathArray: [ + ColumnType.PARAMS, + 'params.yaml', + 'train_test_split', + 'shuffle' + ], + type: ColumnType.PARAMS, + types: ['boolean'] + }, + { + hasChildren: false, + label: 'target_class', + maxStringLength: 8, + parentPath: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'train_test_split' + ), + path: buildMetricOrParamPath( + ColumnType.PARAMS, + 'params.yaml', + 'train_test_split', + 'target_class' + ), + pathArray: [ + ColumnType.PARAMS, + 'params.yaml', + 'train_test_split', + 'target_class' + ], + type: ColumnType.PARAMS, + types: ['string'] + }, + { + hasChildren: true, + label: 'src', + parentPath: ColumnType.DEPS, + path: buildDepPath('src'), + type: ColumnType.DEPS + }, + { + hasChildren: true, + label: 'data', + parentPath: buildDepPath('src'), + path: buildDepPath('src', 'data'), + type: ColumnType.DEPS + }, + { + hasChildren: false, + label: 'make_dataset.py', + maxStringLength: 7, + parentPath: buildDepPath('src', 'data'), + path: buildDepPath('src', 'data', 'make_dataset.py'), + pathArray: [ColumnType.DEPS, join('src', 'data', 'make_dataset.py')], + type: ColumnType.DEPS, + types: ['string'] + }, + { + hasChildren: true, + label: 'data', + parentPath: ColumnType.DEPS, + path: buildDepPath('data'), + type: ColumnType.DEPS + }, + { + hasChildren: true, + label: 'raw', + parentPath: buildDepPath('data'), + path: buildDepPath('data', 'raw'), + type: ColumnType.DEPS + }, + { + hasChildren: false, + label: 'test.csv', + maxStringLength: 7, + parentPath: buildDepPath('data', 'raw'), + path: buildDepPath('data', 'raw', 'test.csv'), + pathArray: [ColumnType.DEPS, join('data', 'raw', 'test.csv')], + type: ColumnType.DEPS, + types: ['string'] + }, + { + hasChildren: false, + label: 'train.csv', + maxStringLength: 7, + parentPath: buildDepPath('data', 'raw'), + path: buildDepPath('data', 'raw', 'train.csv'), + pathArray: [ColumnType.DEPS, join('data', 'raw', 'train.csv')], + type: ColumnType.DEPS, + types: ['string'] + }, + { + hasChildren: false, + label: 'encode_labels.py', + maxStringLength: 7, + parentPath: buildDepPath('src', 'data'), + path: buildDepPath('src', 'data', 'encode_labels.py'), + pathArray: [ColumnType.DEPS, join('src', 'data', 'encode_labels.py')], + type: ColumnType.DEPS, + types: ['string'] + }, + { + hasChildren: true, + label: 'interim', + parentPath: buildDepPath('data'), + path: buildDepPath('data', 'interim'), + type: ColumnType.DEPS + }, + { + hasChildren: false, + label: 'test_categorized.csv', + maxStringLength: 7, + parentPath: buildDepPath('data', 'interim'), + path: buildDepPath('data', 'interim', 'test_categorized.csv'), + pathArray: [ + ColumnType.DEPS, + join('data', 'interim', 'test_categorized.csv') + ], + type: ColumnType.DEPS, + types: ['string'] + }, + { + hasChildren: false, + label: 'train_categorized.csv', + maxStringLength: 7, + parentPath: buildDepPath('data', 'interim'), + path: buildDepPath('data', 'interim', 'train_categorized.csv'), + pathArray: [ + ColumnType.DEPS, + join('data', 'interim', 'train_categorized.csv') + ], + type: ColumnType.DEPS, + types: ['string'] + }, + { + hasChildren: false, + label: 'replace_nan.py', + maxStringLength: 7, + parentPath: buildDepPath('src', 'data'), + path: buildDepPath('src', 'data', 'replace_nan.py'), + pathArray: [ColumnType.DEPS, join('src', 'data', 'replace_nan.py')], + type: ColumnType.DEPS, + types: ['string'] + }, + { + hasChildren: false, + label: 'test_nan_imputed.csv', + maxStringLength: 7, + parentPath: buildDepPath('data', 'interim'), + path: buildDepPath('data', 'interim', 'test_nan_imputed.csv'), + pathArray: [ + ColumnType.DEPS, + join('data', 'interim', 'test_nan_imputed.csv') + ], + type: ColumnType.DEPS, + types: ['string'] + }, + { + hasChildren: false, + label: 'train_nan_imputed.csv', + maxStringLength: 7, + parentPath: buildDepPath('data', 'interim'), + path: buildDepPath('data', 'interim', 'train_nan_imputed.csv'), + pathArray: [ + ColumnType.DEPS, + join('data', 'interim', 'train_nan_imputed.csv') + ], + type: ColumnType.DEPS, + types: ['string'] + }, + { + hasChildren: true, + label: 'features', + parentPath: buildDepPath('src'), + path: buildDepPath('src', 'features'), + type: ColumnType.DEPS + }, + { + hasChildren: false, + label: 'build_features.py', + maxStringLength: 7, + parentPath: buildDepPath('src', 'features'), + path: buildDepPath('src', 'features', 'build_features.py'), + pathArray: [ColumnType.DEPS, join('src', 'features', 'build_features.py')], + type: ColumnType.DEPS, + types: ['string'] + }, + { + hasChildren: false, + label: 'test_featurized.csv', + maxStringLength: 7, + parentPath: buildDepPath('data', 'interim'), + path: buildDepPath('data', 'interim', 'test_featurized.csv'), + pathArray: [ + ColumnType.DEPS, + join('data', 'interim', 'test_featurized.csv') + ], + type: ColumnType.DEPS, + types: ['string'] + }, + { + hasChildren: false, + label: 'train_featurized.csv', + maxStringLength: 7, + parentPath: buildDepPath('data', 'interim'), + path: buildDepPath('data', 'interim', 'train_featurized.csv'), + pathArray: [ + ColumnType.DEPS, + join('data', 'interim', 'train_featurized.csv') + ], + type: ColumnType.DEPS, + types: ['string'] + }, + { + hasChildren: false, + label: 'normalize.py', + maxStringLength: 7, + parentPath: buildDepPath('src', 'features'), + path: buildDepPath('src', 'features', 'normalize.py'), + pathArray: [ColumnType.DEPS, join('src', 'features', 'normalize.py')], + type: ColumnType.DEPS, + types: ['string'] + }, + { + hasChildren: true, + label: 'processed', + parentPath: buildDepPath('data'), + path: buildDepPath('data', 'processed'), + type: ColumnType.DEPS + }, + { + hasChildren: false, + label: 'train_processed.csv', + maxStringLength: 7, + parentPath: buildDepPath('data', 'processed'), + path: buildDepPath('data', 'processed', 'train_processed.csv'), + pathArray: [ + ColumnType.DEPS, + join('data', 'processed', 'train_processed.csv') + ], + type: ColumnType.DEPS, + types: ['string'] + }, + { + hasChildren: false, + label: 'split_train_dev.py', + maxStringLength: 7, + parentPath: buildDepPath('src', 'data'), + path: buildDepPath('src', 'data', 'split_train_dev.py'), + pathArray: [ColumnType.DEPS, join('src', 'data', 'split_train_dev.py')], + type: ColumnType.DEPS, + types: ['string'] + }, + { + hasChildren: false, + label: 'split_train_dev.csv', + maxStringLength: 7, + parentPath: buildDepPath('data', 'processed'), + path: buildDepPath('data', 'processed', 'split_train_dev.csv'), + pathArray: [ + ColumnType.DEPS, + join('data', 'processed', 'split_train_dev.csv') + ], + type: ColumnType.DEPS, + types: ['string'] + }, + { + hasChildren: true, + label: 'models', + parentPath: buildDepPath('src'), + path: buildDepPath('src', 'models'), + type: ColumnType.DEPS + }, + { + hasChildren: false, + label: 'train_model.py', + maxStringLength: 7, + parentPath: buildDepPath('src', 'models'), + path: buildDepPath('src', 'models', 'train_model.py'), + pathArray: [ColumnType.DEPS, join('src', 'models', 'train_model.py')], + type: ColumnType.DEPS, + types: ['string'] + }, + { + hasChildren: false, + label: 'test_processed.csv', + maxStringLength: 7, + parentPath: buildDepPath('data', 'processed'), + path: buildDepPath('data', 'processed', 'test_processed.csv'), + pathArray: [ + ColumnType.DEPS, + join('data', 'processed', 'test_processed.csv') + ], + type: ColumnType.DEPS, + types: ['string'] + }, + { + hasChildren: true, + label: 'models', + parentPath: ColumnType.DEPS, + path: buildDepPath('models'), + type: ColumnType.DEPS + }, + { + hasChildren: false, + label: 'estimator.pkl', + maxStringLength: 7, + parentPath: buildDepPath('models'), + path: buildDepPath('models', 'estimator.pkl'), + pathArray: [ColumnType.DEPS, join('models', 'estimator.pkl')], + type: ColumnType.DEPS, + types: ['string'] + }, + { + hasChildren: false, + label: 'metrics.py', + maxStringLength: 7, + parentPath: buildDepPath('src', 'models'), + path: buildDepPath('src', 'models', 'metrics.py'), + pathArray: [ColumnType.DEPS, join('src', 'models', 'metrics.py')], + type: ColumnType.DEPS, + types: ['string'] + }, + { + hasChildren: false, + label: 'predict.py', + maxStringLength: 7, + parentPath: buildDepPath('src', 'models'), + path: buildDepPath('src', 'models', 'predict.py'), + pathArray: [ColumnType.DEPS, join('src', 'models', 'predict.py')], + type: ColumnType.DEPS, + types: ['string'] + } +] + +export default data diff --git a/extension/src/test/fixtures/expShow/survival/output.ts b/extension/src/test/fixtures/expShow/survival/output.ts new file mode 100644 index 0000000000..7b4b4287ff --- /dev/null +++ b/extension/src/test/fixtures/expShow/survival/output.ts @@ -0,0 +1,1232 @@ +import { + ExperimentsOutput, + ExperimentStatus +} from '../../../../cli/dvc/contract' +import { join } from '../../../util/path' + +// https://dagshub.com/kingabzpro/kaggle-titanic-dvc +const data: ExperimentsOutput = { + workspace: { + baseline: { + data: { + timestamp: null, + params: { + 'params.yaml': { + data: { + classifier: 'random_forest', + drop_cols: ['Name', 'Cabin', 'Ticket'], + dtypes: { + Age: 'float', + Embarked: 'category', + Fare: 'float', + Parch: 'int', + Pclass: 'category', + Sex: 'category', + SibSp: 'int', + Survived: 'category' + }, + feature_eng: { featurize: true }, + imputation: { Age: 29.6991, Fare: 32.2042, method: 'mean' }, + model_params: { + logistic_regression: null, + naive_bayes: null, + neural_network: null, + random_forest: { + criterion: 'gini', + max_depth: 15, + max_features: 'auto', + min_samples_leaf: 6, + min_samples_split: 9, + n_estimators: 460 + }, + support_vector_machine: null, + xgboost: null + }, + normalize: null, + param_tuning: { + logistic_regression: null, + naive_bayes: null, + neural_network: null, + num_eval: 100, + random_forest: { + criterion: 'gini', + max_depth: 15, + max_features: 'auto', + min_samples_leaf: 6, + min_samples_split: 9, + n_estimators: 460 + }, + scoring: 'accuracy', + support_vector_machine: null + }, + predict: { js_estimator: true }, + random_seed: 12345, + train_test_split: { + n_split: 10, + shuffle: true, + target_class: 'Survived' + } + } + } + }, + deps: { + [join('src', 'data', 'make_dataset.py')]: { + hash: '4f66b01ce7fbc7219ddd5479027b4fce', + size: 1966, + nfiles: null + }, + [join('data', 'raw', 'test.csv')]: { + hash: '029c9cd22461f6dbe8d9ab01def965c6', + size: 28629, + nfiles: null + }, + [join('data', 'raw', 'train.csv')]: { + hash: '61fdd54abdbf6a85b778e937122e1194', + size: 61194, + nfiles: null + }, + [join('src', 'data', 'encode_labels.py')]: { + hash: '71c20e2dd8094132c4ca78915c4af31c', + size: 3311, + nfiles: null + }, + [join('data', 'interim', 'test_categorized.csv')]: { + hash: 'f0fcdcd7bb08c23d382a665ac1436034', + size: 10788, + nfiles: null + }, + [join('data', 'interim', 'train_categorized.csv')]: { + hash: '5d06666c95fed743140b44190fb67c77', + size: 23884, + nfiles: null + }, + [join('src', 'data', 'replace_nan.py')]: { + hash: 'a292443ade2893463ea8e78a2b2cb7fe', + size: 3527, + nfiles: null + }, + [join('data', 'interim', 'test_nan_imputed.csv')]: { + hash: 'cbc38434c407b0761da80a422ba97cff', + size: 11136, + nfiles: null + }, + [join('data', 'interim', 'train_nan_imputed.csv')]: { + hash: '9edd0421f46d2f0786ea6d82fdcf4e12', + size: 24592, + nfiles: null + }, + [join('src', 'features', 'build_features.py')]: { + hash: '15a0db18893a1dea5b5c425bbc04e2fb', + size: 4716, + nfiles: null + }, + [join('data', 'interim', 'test_featurized.csv')]: { + hash: '6879b369c8d9f93c8ddeff61baea9ada', + size: 59474, + nfiles: null + }, + [join('data', 'interim', 'train_featurized.csv')]: { + hash: '980d370c7991c5b991bf8c47d13beb02', + size: 127169, + nfiles: null + }, + [join('src', 'features', 'normalize.py')]: { + hash: '06e7d4f840c84d24a8892496db8f3e19', + size: 1610, + nfiles: null + }, + [join('data', 'processed', 'train_processed.csv')]: { + hash: '55fc818f9babfe04c7bd9a605e0f6240', + size: 126326, + nfiles: null + }, + [join('src', 'data', 'split_train_dev.py')]: { + hash: '3ccd2f141aa14c9a0ea84e4fa6f461ca', + size: 2652, + nfiles: null + }, + [join('data', 'processed', 'split_train_dev.csv')]: { + hash: 'd4d2c3159380a986fc2f04a8bcffda08', + size: 56115, + nfiles: null + }, + [join('src', 'models', 'train_model.py')]: { + hash: '3edee7d3e727ce6d3587557a3924eca3', + size: 4110, + nfiles: null + }, + [join('data', 'processed', 'test_processed.csv')]: { + hash: '0cb34fc53024fa12b32a098a32870612', + size: 59004, + nfiles: null + }, + [join('models', 'estimator.pkl')]: { + hash: 'a97b560743390021fc662ba0496e6237', + size: 31660351, + nfiles: null + }, + [join('src', 'models', 'metrics.py')]: { + hash: '71807a31d25c031180a695981df6fe9c', + size: 1742, + nfiles: null + }, + [join('src', 'models', 'predict.py')]: { + hash: 'ffcea00661810b8f82c86a6c39309253', + size: 2954, + nfiles: null + } + }, + outs: { + [join('data', 'raw', 'test.csv')]: { + hash: '029c9cd22461f6dbe8d9ab01def965c6', + size: 28629, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'raw', 'train.csv')]: { + hash: '61fdd54abdbf6a85b778e937122e1194', + size: 61194, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('reports', 'figures', 'data_dictionary.tex')]: { + hash: '10c5361db59b330722bd70b83ce0fcee', + size: 1521, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('reports', 'figures', 'table_one.tex')]: { + hash: '4581508bdb37e12d9b9b5ff03244390d', + size: 844, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'test_categorized.csv')]: { + hash: 'f0fcdcd7bb08c23d382a665ac1436034', + size: 10788, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'train_categorized.csv')]: { + hash: '5d06666c95fed743140b44190fb67c77', + size: 23884, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'test_nan_imputed.csv')]: { + hash: 'cbc38434c407b0761da80a422ba97cff', + size: 11136, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'train_nan_imputed.csv')]: { + hash: '9edd0421f46d2f0786ea6d82fdcf4e12', + size: 24592, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'test_featurized.csv')]: { + hash: '6879b369c8d9f93c8ddeff61baea9ada', + size: 59474, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'train_featurized.csv')]: { + hash: '980d370c7991c5b991bf8c47d13beb02', + size: 127169, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'processed', 'test_processed.csv')]: { + hash: '0cb34fc53024fa12b32a098a32870612', + size: 59004, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'processed', 'train_processed.csv')]: { + hash: '55fc818f9babfe04c7bd9a605e0f6240', + size: 126326, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'processed', 'split_train_dev.csv')]: { + hash: 'd4d2c3159380a986fc2f04a8bcffda08', + size: 56115, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('models', 'estimator.pkl')]: { + hash: 'a97b560743390021fc662ba0496e6237', + size: 31660351, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('results', 'test_predict_binary.csv')]: { + hash: '76577b506c3bc22a50d1aa61f3b940d0', + size: 2839, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('results', 'test_predict_proba.csv')]: { + hash: '84ac915213a1b4f510486a0d049f39df', + size: 10087, + nfiles: null, + use_cache: true, + is_data_source: false + } + }, + status: ExperimentStatus.SUCCESS, + executor: null, + metrics: { + [join('results', 'metrics.json')]: { + data: { + fit_time: 0.6337410688400269, + score_time: 0.07778854370117187, + accuracy: 0.8293632958801498, + balanced_accuracy: 0.8040020654726536, + f1: 0.7572265847252886, + gmpr: 0.7615174102573903, + jaccard: 0.6113136909663465, + precision: 0.8361572183378356, + recall: 0.695546218487395, + roc_auc: 0.8703211951447246 + } + } + } + } + } + }, + '3d5adcb974bb2c85917a5d61a489b933adaa2b7f': { + baseline: { + data: { + timestamp: '2021-07-16T19:54:42', + params: { + 'params.yaml': { + data: { + classifier: 'random_forest', + drop_cols: ['Name', 'Cabin', 'Ticket'], + dtypes: { + Age: 'float', + Embarked: 'category', + Fare: 'float', + Parch: 'int', + Pclass: 'category', + Sex: 'category', + SibSp: 'int', + Survived: 'category' + }, + feature_eng: { featurize: true }, + imputation: { Age: 29.6991, Fare: 32.2042, method: 'mean' }, + model_params: { + logistic_regression: null, + naive_bayes: null, + neural_network: null, + random_forest: { + criterion: 'gini', + max_depth: 15, + max_features: 'auto', + min_samples_leaf: 6, + min_samples_split: 9, + n_estimators: 460 + }, + support_vector_machine: null, + xgboost: null + }, + normalize: null, + param_tuning: { + logistic_regression: null, + naive_bayes: null, + neural_network: null, + num_eval: 100, + random_forest: { + criterion: 'gini', + max_depth: 15, + max_features: 'auto', + min_samples_leaf: 6, + min_samples_split: 9, + n_estimators: 460 + }, + scoring: 'accuracy', + support_vector_machine: null + }, + predict: { js_estimator: true }, + random_seed: 12345, + train_test_split: { + n_split: 10, + shuffle: true, + target_class: 'Survived' + } + } + } + }, + deps: { + [join('src', 'data', 'make_dataset.py')]: { + hash: '4f66b01ce7fbc7219ddd5479027b4fce', + size: 1966, + nfiles: null + }, + [join('data', 'raw', 'test.csv')]: { + hash: '029c9cd22461f6dbe8d9ab01def965c6', + size: 28629, + nfiles: null + }, + [join('data', 'raw', 'train.csv')]: { + hash: '61fdd54abdbf6a85b778e937122e1194', + size: 61194, + nfiles: null + }, + [join('src', 'data', 'encode_labels.py')]: { + hash: '71c20e2dd8094132c4ca78915c4af31c', + size: 3311, + nfiles: null + }, + [join('data', 'interim', 'test_categorized.csv')]: { + hash: 'f0fcdcd7bb08c23d382a665ac1436034', + size: 10788, + nfiles: null + }, + [join('data', 'interim', 'train_categorized.csv')]: { + hash: '5d06666c95fed743140b44190fb67c77', + size: 23884, + nfiles: null + }, + [join('src', 'data', 'replace_nan.py')]: { + hash: 'a292443ade2893463ea8e78a2b2cb7fe', + size: 3527, + nfiles: null + }, + [join('data', 'interim', 'test_nan_imputed.csv')]: { + hash: 'cbc38434c407b0761da80a422ba97cff', + size: 11136, + nfiles: null + }, + [join('data', 'interim', 'train_nan_imputed.csv')]: { + hash: '9edd0421f46d2f0786ea6d82fdcf4e12', + size: 24592, + nfiles: null + }, + [join('src', 'features', 'build_features.py')]: { + hash: '15a0db18893a1dea5b5c425bbc04e2fb', + size: 4716, + nfiles: null + }, + [join('data', 'interim', 'test_featurized.csv')]: { + hash: '6879b369c8d9f93c8ddeff61baea9ada', + size: 59474, + nfiles: null + }, + [join('data', 'interim', 'train_featurized.csv')]: { + hash: '980d370c7991c5b991bf8c47d13beb02', + size: 127169, + nfiles: null + }, + [join('src', 'features', 'normalize.py')]: { + hash: '06e7d4f840c84d24a8892496db8f3e19', + size: 1610, + nfiles: null + }, + [join('data', 'processed', 'train_processed.csv')]: { + hash: '55fc818f9babfe04c7bd9a605e0f6240', + size: 126326, + nfiles: null + }, + [join('src', 'data', 'split_train_dev.py')]: { + hash: '3ccd2f141aa14c9a0ea84e4fa6f461ca', + size: 2652, + nfiles: null + }, + [join('data', 'processed', 'split_train_dev.csv')]: { + hash: 'd4d2c3159380a986fc2f04a8bcffda08', + size: 56115, + nfiles: null + }, + [join('src', 'models', 'train_model.py')]: { + hash: '3edee7d3e727ce6d3587557a3924eca3', + size: 4110, + nfiles: null + }, + [join('data', 'processed', 'test_processed.csv')]: { + hash: '0cb34fc53024fa12b32a098a32870612', + size: 59004, + nfiles: null + }, + [join('models', 'estimator.pkl')]: { + hash: 'a97b560743390021fc662ba0496e6237', + size: 31660351, + nfiles: null + }, + [join('src', 'models', 'metrics.py')]: { + hash: '71807a31d25c031180a695981df6fe9c', + size: 1742, + nfiles: null + }, + [join('src', 'models', 'predict.py')]: { + hash: 'ffcea00661810b8f82c86a6c39309253', + size: 2954, + nfiles: null + } + }, + outs: { + [join('data', 'raw', 'test.csv')]: { + hash: '029c9cd22461f6dbe8d9ab01def965c6', + size: 28629, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'raw', 'train.csv')]: { + hash: '61fdd54abdbf6a85b778e937122e1194', + size: 61194, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('reports', 'figures', 'data_dictionary.tex')]: { + hash: '10c5361db59b330722bd70b83ce0fcee', + size: 1521, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('reports', 'figures', 'table_one.tex')]: { + hash: '4581508bdb37e12d9b9b5ff03244390d', + size: 844, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'test_categorized.csv')]: { + hash: 'f0fcdcd7bb08c23d382a665ac1436034', + size: 10788, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'train_categorized.csv')]: { + hash: '5d06666c95fed743140b44190fb67c77', + size: 23884, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'test_nan_imputed.csv')]: { + hash: 'cbc38434c407b0761da80a422ba97cff', + size: 11136, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'train_nan_imputed.csv')]: { + hash: '9edd0421f46d2f0786ea6d82fdcf4e12', + size: 24592, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'test_featurized.csv')]: { + hash: '6879b369c8d9f93c8ddeff61baea9ada', + size: 59474, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'train_featurized.csv')]: { + hash: '980d370c7991c5b991bf8c47d13beb02', + size: 127169, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'processed', 'test_processed.csv')]: { + hash: '0cb34fc53024fa12b32a098a32870612', + size: 59004, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'processed', 'train_processed.csv')]: { + hash: '55fc818f9babfe04c7bd9a605e0f6240', + size: 126326, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'processed', 'split_train_dev.csv')]: { + hash: 'd4d2c3159380a986fc2f04a8bcffda08', + size: 56115, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('models', 'estimator.pkl')]: { + hash: 'a97b560743390021fc662ba0496e6237', + size: 31660351, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('results', 'test_predict_binary.csv')]: { + hash: '76577b506c3bc22a50d1aa61f3b940d0', + size: 2839, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('results', 'test_predict_proba.csv')]: { + hash: '84ac915213a1b4f510486a0d049f39df', + size: 10087, + nfiles: null, + use_cache: true, + is_data_source: false + } + }, + status: ExperimentStatus.SUCCESS, + executor: null, + metrics: { + [join('results', 'metrics.json')]: { + data: { + fit_time: 0.6337410688400269, + score_time: 0.07778854370117187, + accuracy: 0.8293632958801498, + balanced_accuracy: 0.8040020654726536, + f1: 0.7572265847252886, + gmpr: 0.7615174102573903, + jaccard: 0.6113136909663465, + precision: 0.8361572183378356, + recall: 0.695546218487395, + roc_auc: 0.8703211951447246 + } + } + }, + name: 'master' + } + } + }, + a49e03966a1f9f1299ec222ebc4bed8625d2c54d: { + baseline: { + data: { + timestamp: '2021-07-16T19:50:39', + params: { + 'params.yaml': { + data: { + classifier: 'random_forest', + drop_cols: ['Name', 'Cabin', 'Ticket'], + dtypes: { + Age: 'float', + Embarked: 'category', + Fare: 'float', + Parch: 'int', + Pclass: 'category', + Sex: 'category', + SibSp: 'int', + Survived: 'category' + }, + feature_eng: { featurize: true }, + imputation: { Age: 29.6991, Fare: 32.2042, method: 'mean' }, + model_params: { + logistic_regression: null, + naive_bayes: null, + neural_network: null, + random_forest: { + criterion: 'gini', + max_depth: 15, + max_features: 'auto', + min_samples_leaf: 6, + min_samples_split: 9, + n_estimators: 460 + }, + support_vector_machine: null, + xgboost: null + }, + normalize: null, + param_tuning: { + logistic_regression: null, + naive_bayes: null, + neural_network: null, + num_eval: 100, + random_forest: { + criterion: 'gini', + max_depth: 15, + max_features: 'auto', + min_samples_leaf: 6, + min_samples_split: 9, + n_estimators: 460 + }, + scoring: 'accuracy', + support_vector_machine: null + }, + predict: { js_estimator: true }, + random_seed: 12345, + train_test_split: { + n_split: 10, + shuffle: true, + target_class: 'Survived' + } + } + } + }, + deps: { + [join('src', 'data', 'make_dataset.py')]: { + hash: '4f66b01ce7fbc7219ddd5479027b4fce', + size: 1966, + nfiles: null + }, + [join('data', 'raw', 'test.csv')]: { + hash: '029c9cd22461f6dbe8d9ab01def965c6', + size: 28629, + nfiles: null + }, + [join('data', 'raw', 'train.csv')]: { + hash: '61fdd54abdbf6a85b778e937122e1194', + size: 61194, + nfiles: null + }, + [join('src', 'data', 'encode_labels.py')]: { + hash: '71c20e2dd8094132c4ca78915c4af31c', + size: 3311, + nfiles: null + }, + [join('data', 'interim', 'test_categorized.csv')]: { + hash: 'f0fcdcd7bb08c23d382a665ac1436034', + size: 10788, + nfiles: null + }, + [join('data', 'interim', 'train_categorized.csv')]: { + hash: '5d06666c95fed743140b44190fb67c77', + size: 23884, + nfiles: null + }, + [join('src', 'data', 'replace_nan.py')]: { + hash: 'e1a2e28ebedd2c3c05d60e2d556d8970', + size: 2486, + nfiles: null + }, + [join('data', 'interim', 'test_nan_imputed.csv')]: { + hash: 'cbc38434c407b0761da80a422ba97cff', + size: 11136, + nfiles: null + }, + [join('data', 'interim', 'train_nan_imputed.csv')]: { + hash: '9edd0421f46d2f0786ea6d82fdcf4e12', + size: 24592, + nfiles: null + }, + [join('src', 'features', 'build_features.py')]: { + hash: '15a0db18893a1dea5b5c425bbc04e2fb', + size: 4716, + nfiles: null + }, + [join('data', 'interim', 'test_featurized.csv')]: { + hash: '6879b369c8d9f93c8ddeff61baea9ada', + size: 59474, + nfiles: null + }, + [join('data', 'interim', 'train_featurized.csv')]: { + hash: '980d370c7991c5b991bf8c47d13beb02', + size: 127169, + nfiles: null + }, + [join('src', 'features', 'normalize.py')]: { + hash: '06e7d4f840c84d24a8892496db8f3e19', + size: 1610, + nfiles: null + }, + [join('data', 'processed', 'train_processed.csv')]: { + hash: '55fc818f9babfe04c7bd9a605e0f6240', + size: 126326, + nfiles: null + }, + [join('src', 'data', 'split_train_dev.py')]: { + hash: '3ccd2f141aa14c9a0ea84e4fa6f461ca', + size: 2652, + nfiles: null + }, + [join('data', 'processed', 'split_train_dev.csv')]: { + hash: 'd4d2c3159380a986fc2f04a8bcffda08', + size: 56115, + nfiles: null + }, + [join('src', 'models', 'train_model.py')]: { + hash: '3edee7d3e727ce6d3587557a3924eca3', + size: 4110, + nfiles: null + }, + [join('data', 'processed', 'test_processed.csv')]: { + hash: '0cb34fc53024fa12b32a098a32870612', + size: 59004, + nfiles: null + }, + [join('models', 'estimator.pkl')]: { + hash: 'a97b560743390021fc662ba0496e6237', + size: 31660351, + nfiles: null + }, + [join('src', 'models', 'metrics.py')]: { + hash: '71807a31d25c031180a695981df6fe9c', + size: 1742, + nfiles: null + }, + [join('src', 'models', 'predict.py')]: { + hash: 'ffcea00661810b8f82c86a6c39309253', + size: 2954, + nfiles: null + } + }, + outs: { + [join('data', 'raw', 'test.csv')]: { + hash: '029c9cd22461f6dbe8d9ab01def965c6', + size: 28629, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'raw', 'train.csv')]: { + hash: '61fdd54abdbf6a85b778e937122e1194', + size: 61194, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('reports', 'figures', 'data_dictionary.tex')]: { + hash: '10c5361db59b330722bd70b83ce0fcee', + size: 1521, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('reports', 'figures', 'table_one.tex')]: { + hash: '4581508bdb37e12d9b9b5ff03244390d', + size: 844, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'test_categorized.csv')]: { + hash: 'f0fcdcd7bb08c23d382a665ac1436034', + size: 10788, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'train_categorized.csv')]: { + hash: '5d06666c95fed743140b44190fb67c77', + size: 23884, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'test_nan_imputed.csv')]: { + hash: 'cbc38434c407b0761da80a422ba97cff', + size: 11136, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'train_nan_imputed.csv')]: { + hash: '9edd0421f46d2f0786ea6d82fdcf4e12', + size: 24592, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'test_featurized.csv')]: { + hash: '6879b369c8d9f93c8ddeff61baea9ada', + size: 59474, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'train_featurized.csv')]: { + hash: '980d370c7991c5b991bf8c47d13beb02', + size: 127169, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'processed', 'test_processed.csv')]: { + hash: '0cb34fc53024fa12b32a098a32870612', + size: 59004, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'processed', 'train_processed.csv')]: { + hash: '55fc818f9babfe04c7bd9a605e0f6240', + size: 126326, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'processed', 'split_train_dev.csv')]: { + hash: 'd4d2c3159380a986fc2f04a8bcffda08', + size: 56115, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('models', 'estimator.pkl')]: { + hash: 'a97b560743390021fc662ba0496e6237', + size: 31660351, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('results', 'test_predict_binary.csv')]: { + hash: '76577b506c3bc22a50d1aa61f3b940d0', + size: 2839, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('results', 'test_predict_proba.csv')]: { + hash: '84ac915213a1b4f510486a0d049f39df', + size: 10087, + nfiles: null, + use_cache: true, + is_data_source: false + } + }, + status: ExperimentStatus.SUCCESS, + executor: null, + metrics: { + [join('results', 'metrics.json')]: { + data: { + fit_time: 0.6337410688400269, + score_time: 0.07778854370117187, + accuracy: 0.8293632958801498, + balanced_accuracy: 0.8040020654726536, + f1: 0.7572265847252886, + gmpr: 0.7615174102573903, + jaccard: 0.6113136909663465, + precision: 0.8361572183378356, + recall: 0.695546218487395, + roc_auc: 0.8703211951447246 + } + } + } + } + } + }, + '4f7b50c3d171a11b6cfcd04416a16fc80b61018d': { + baseline: { + data: { + timestamp: '2021-07-16T19:48:45', + params: { + 'params.yaml': { + data: { + classifier: 'random_forest', + drop_cols: ['Name', 'Cabin', 'Ticket'], + dtypes: { + Age: 'float', + Embarked: 'category', + Fare: 'float', + Parch: 'int', + Pclass: 'category', + Sex: 'category', + SibSp: 'int', + Survived: 'category' + }, + feature_eng: { featurize: true }, + imputation: { Age: 29.6991, Fare: 32.2042, method: 'mean' }, + model_params: { + logistic_regression: null, + naive_bayes: null, + neural_network: null, + random_forest: { + criterion: 'gini', + max_depth: 15, + max_features: 'auto', + min_samples_leaf: 6, + min_samples_split: 9, + n_estimators: 460 + }, + support_vector_machine: null, + xgboost: null + }, + normalize: null, + param_tuning: { + logistic_regression: null, + naive_bayes: null, + neural_network: null, + num_eval: 100, + random_forest: { + criterion: 'gini', + max_depth: 15, + max_features: 'auto', + min_samples_leaf: 6, + min_samples_split: 9, + n_estimators: 460 + }, + scoring: 'accuracy', + support_vector_machine: null + }, + predict: { js_estimator: true }, + random_seed: 12345, + train_test_split: { + n_split: 10, + shuffle: true, + target_class: 'Survived' + } + } + } + }, + deps: { + [join('src', 'data', 'make_dataset.py')]: { + hash: '4f66b01ce7fbc7219ddd5479027b4fce', + size: 1966, + nfiles: null + }, + [join('data', 'raw', 'test.csv')]: { + hash: '029c9cd22461f6dbe8d9ab01def965c6', + size: 28629, + nfiles: null + }, + [join('data', 'raw', 'train.csv')]: { + hash: '61fdd54abdbf6a85b778e937122e1194', + size: 61194, + nfiles: null + }, + [join('src', 'data', 'encode_labels.py')]: { + hash: '71c20e2dd8094132c4ca78915c4af31c', + size: 3311, + nfiles: null + }, + [join('data', 'interim', 'test_categorized.csv')]: { + hash: 'f0fcdcd7bb08c23d382a665ac1436034', + size: 10788, + nfiles: null + }, + [join('data', 'interim', 'train_categorized.csv')]: { + hash: '5d06666c95fed743140b44190fb67c77', + size: 23884, + nfiles: null + }, + [join('src', 'data', 'replace_nan.py')]: { + hash: 'e1a2e28ebedd2c3c05d60e2d556d8970', + size: 2486, + nfiles: null + }, + [join('data', 'interim', 'test_nan_imputed.csv')]: { + hash: 'cbc38434c407b0761da80a422ba97cff', + size: 11136, + nfiles: null + }, + [join('data', 'interim', 'train_nan_imputed.csv')]: { + hash: '9edd0421f46d2f0786ea6d82fdcf4e12', + size: 24592, + nfiles: null + }, + [join('src', 'features', 'build_features.py')]: { + hash: '15a0db18893a1dea5b5c425bbc04e2fb', + size: 4716, + nfiles: null + }, + [join('data', 'interim', 'test_featurized.csv')]: { + hash: '6879b369c8d9f93c8ddeff61baea9ada', + size: 59474, + nfiles: null + }, + [join('data', 'interim', 'train_featurized.csv')]: { + hash: '980d370c7991c5b991bf8c47d13beb02', + size: 127169, + nfiles: null + }, + [join('src', 'features', 'normalize.py')]: { + hash: '06e7d4f840c84d24a8892496db8f3e19', + size: 1610, + nfiles: null + }, + [join('data', 'processed', 'train_processed.csv')]: { + hash: '55fc818f9babfe04c7bd9a605e0f6240', + size: 126326, + nfiles: null + }, + [join('src', 'data', 'split_train_dev.py')]: { + hash: '3ccd2f141aa14c9a0ea84e4fa6f461ca', + size: 2652, + nfiles: null + }, + [join('data', 'processed', 'split_train_dev.csv')]: { + hash: 'd4d2c3159380a986fc2f04a8bcffda08', + size: 56115, + nfiles: null + }, + [join('src', 'models', 'train_model.py')]: { + hash: '3edee7d3e727ce6d3587557a3924eca3', + size: 4110, + nfiles: null + }, + [join('data', 'processed', 'test_processed.csv')]: { + hash: '0cb34fc53024fa12b32a098a32870612', + size: 59004, + nfiles: null + }, + [join('models', 'estimator.pkl')]: { + hash: 'a97b560743390021fc662ba0496e6237', + size: 31660351, + nfiles: null + }, + [join('src', 'models', 'metrics.py')]: { + hash: '71807a31d25c031180a695981df6fe9c', + size: 1742, + nfiles: null + }, + [join('src', 'models', 'predict.py')]: { + hash: 'ffcea00661810b8f82c86a6c39309253', + size: 2954, + nfiles: null + } + }, + outs: { + [join('data', 'raw', 'test.csv')]: { + hash: '029c9cd22461f6dbe8d9ab01def965c6', + size: 28629, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'raw', 'train.csv')]: { + hash: '61fdd54abdbf6a85b778e937122e1194', + size: 61194, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('reports', 'figures', 'data_dictionary.tex')]: { + hash: '10c5361db59b330722bd70b83ce0fcee', + size: 1521, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('reports', 'figures', 'table_one.tex')]: { + hash: '4581508bdb37e12d9b9b5ff03244390d', + size: 844, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'test_categorized.csv')]: { + hash: 'f0fcdcd7bb08c23d382a665ac1436034', + size: 10788, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'train_categorized.csv')]: { + hash: '5d06666c95fed743140b44190fb67c77', + size: 23884, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'test_nan_imputed.csv')]: { + hash: 'cbc38434c407b0761da80a422ba97cff', + size: 11136, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'train_nan_imputed.csv')]: { + hash: '9edd0421f46d2f0786ea6d82fdcf4e12', + size: 24592, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'test_featurized.csv')]: { + hash: '6879b369c8d9f93c8ddeff61baea9ada', + size: 59474, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'train_featurized.csv')]: { + hash: '980d370c7991c5b991bf8c47d13beb02', + size: 127169, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'processed', 'test_processed.csv')]: { + hash: '0cb34fc53024fa12b32a098a32870612', + size: 59004, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'processed', 'train_processed.csv')]: { + hash: '55fc818f9babfe04c7bd9a605e0f6240', + size: 126326, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'processed', 'split_train_dev.csv')]: { + hash: 'd4d2c3159380a986fc2f04a8bcffda08', + size: 56115, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('models', 'estimator.pkl')]: { + hash: 'a97b560743390021fc662ba0496e6237', + size: 31660351, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('results', 'test_predict_binary.csv')]: { + hash: '76577b506c3bc22a50d1aa61f3b940d0', + size: 2839, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('results', 'test_predict_proba.csv')]: { + hash: '84ac915213a1b4f510486a0d049f39df', + size: 10087, + nfiles: null, + use_cache: true, + is_data_source: false + } + }, + status: ExperimentStatus.SUCCESS, + executor: null, + metrics: { + [join('results', 'metrics.json')]: { + data: { + fit_time: 0.6337410688400269, + score_time: 0.07778854370117187, + accuracy: 0.8293632958801498, + balanced_accuracy: 0.8040020654726536, + f1: 0.7572265847252886, + gmpr: 0.7615174102573903, + jaccard: 0.6113136909663465, + precision: 0.8361572183378356, + recall: 0.695546218487395, + roc_auc: 0.8703211951447246 + } + } + } + } + } + } +} + +export default data diff --git a/extension/src/test/fixtures/expShow/survival/rows.ts b/extension/src/test/fixtures/expShow/survival/rows.ts new file mode 100644 index 0000000000..7d61d24a07 --- /dev/null +++ b/extension/src/test/fixtures/expShow/survival/rows.ts @@ -0,0 +1,1098 @@ +import { join } from '../../../util/path' +import { ExperimentStatus, Row } from '../../../../experiments/webview/contract' + +const data: Row[] = [ + { + id: 'workspace', + label: 'workspace', + outs: { + [join('data', 'raw', 'test.csv')]: { + hash: '029c9cd22461f6dbe8d9ab01def965c6', + size: 28629, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'raw', 'train.csv')]: { + hash: '61fdd54abdbf6a85b778e937122e1194', + size: 61194, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('reports', 'figures', 'data_dictionary.tex')]: { + hash: '10c5361db59b330722bd70b83ce0fcee', + size: 1521, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('reports', 'figures', 'table_one.tex')]: { + hash: '4581508bdb37e12d9b9b5ff03244390d', + size: 844, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'test_categorized.csv')]: { + hash: 'f0fcdcd7bb08c23d382a665ac1436034', + size: 10788, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'train_categorized.csv')]: { + hash: '5d06666c95fed743140b44190fb67c77', + size: 23884, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'test_nan_imputed.csv')]: { + hash: 'cbc38434c407b0761da80a422ba97cff', + size: 11136, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'train_nan_imputed.csv')]: { + hash: '9edd0421f46d2f0786ea6d82fdcf4e12', + size: 24592, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'test_featurized.csv')]: { + hash: '6879b369c8d9f93c8ddeff61baea9ada', + size: 59474, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'train_featurized.csv')]: { + hash: '980d370c7991c5b991bf8c47d13beb02', + size: 127169, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'processed', 'test_processed.csv')]: { + hash: '0cb34fc53024fa12b32a098a32870612', + size: 59004, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'processed', 'train_processed.csv')]: { + hash: '55fc818f9babfe04c7bd9a605e0f6240', + size: 126326, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'processed', 'split_train_dev.csv')]: { + hash: 'd4d2c3159380a986fc2f04a8bcffda08', + size: 56115, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('models', 'estimator.pkl')]: { + hash: 'a97b560743390021fc662ba0496e6237', + size: 31660351, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('results', 'test_predict_binary.csv')]: { + hash: '76577b506c3bc22a50d1aa61f3b940d0', + size: 2839, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('results', 'test_predict_proba.csv')]: { + hash: '84ac915213a1b4f510486a0d049f39df', + size: 10087, + nfiles: null, + use_cache: true, + is_data_source: false + } + }, + status: ExperimentStatus.SUCCESS, + executor: null, + metrics: { + [join('results', 'metrics.json')]: { + fit_time: 0.6337410688400269, + score_time: 0.07778854370117187, + accuracy: 0.8293632958801498, + balanced_accuracy: 0.8040020654726536, + f1: 0.7572265847252886, + gmpr: 0.7615174102573903, + jaccard: 0.6113136909663465, + precision: 0.8361572183378356, + recall: 0.695546218487395, + roc_auc: 0.8703211951447246 + } + }, + params: { + 'params.yaml': { + classifier: 'random_forest', + drop_cols: ['Name', 'Cabin', 'Ticket'], + dtypes: { + Age: 'float', + Embarked: 'category', + Fare: 'float', + Parch: 'int', + Pclass: 'category', + Sex: 'category', + SibSp: 'int', + Survived: 'category' + }, + feature_eng: { featurize: true }, + imputation: { Age: 29.6991, Fare: 32.2042, method: 'mean' }, + model_params: { + logistic_regression: null, + naive_bayes: null, + neural_network: null, + random_forest: { + criterion: 'gini', + max_depth: 15, + max_features: 'auto', + min_samples_leaf: 6, + min_samples_split: 9, + n_estimators: 460 + }, + support_vector_machine: null, + xgboost: null + }, + normalize: null, + param_tuning: { + logistic_regression: null, + naive_bayes: null, + neural_network: null, + num_eval: 100, + random_forest: { + criterion: 'gini', + max_depth: 15, + max_features: 'auto', + min_samples_leaf: 6, + min_samples_split: 9, + n_estimators: 460 + }, + scoring: 'accuracy', + support_vector_machine: null + }, + predict: { js_estimator: true }, + random_seed: 12345, + train_test_split: { + n_split: 10, + shuffle: true, + target_class: 'Survived' + } + } + }, + deps: { + [join('src', 'data', 'make_dataset.py')]: { + changes: false, + value: '4f66b01' + }, + [join('data', 'raw', 'test.csv')]: { changes: false, value: '029c9cd' }, + [join('data', 'raw', 'train.csv')]: { changes: false, value: '61fdd54' }, + [join('src', 'data', 'encode_labels.py')]: { + changes: false, + value: '71c20e2' + }, + [join('data', 'interim', 'test_categorized.csv')]: { + changes: false, + value: 'f0fcdcd' + }, + [join('data', 'interim', 'train_categorized.csv')]: { + changes: false, + value: '5d06666' + }, + [join('src', 'data', 'replace_nan.py')]: { + changes: false, + value: 'a292443' + }, + [join('data', 'interim', 'test_nan_imputed.csv')]: { + changes: false, + value: 'cbc3843' + }, + [join('data', 'interim', 'train_nan_imputed.csv')]: { + changes: false, + value: '9edd042' + }, + [join('src', 'features', 'build_features.py')]: { + changes: false, + value: '15a0db1' + }, + [join('data', 'interim', 'test_featurized.csv')]: { + changes: false, + value: '6879b36' + }, + [join('data', 'interim', 'train_featurized.csv')]: { + changes: false, + value: '980d370' + }, + [join('src', 'features', 'normalize.py')]: { + changes: false, + value: '06e7d4f' + }, + [join('data', 'processed', 'train_processed.csv')]: { + changes: false, + value: '55fc818' + }, + [join('src', 'data', 'split_train_dev.py')]: { + changes: false, + value: '3ccd2f1' + }, + [join('data', 'processed', 'split_train_dev.csv')]: { + changes: false, + value: 'd4d2c31' + }, + [join('src', 'models', 'train_model.py')]: { + changes: false, + value: '3edee7d' + }, + [join('data', 'processed', 'test_processed.csv')]: { + changes: false, + value: '0cb34fc' + }, + [join('models', 'estimator.pkl')]: { changes: false, value: 'a97b560' }, + [join('src', 'models', 'metrics.py')]: { + changes: false, + value: '71807a3' + }, + [join('src', 'models', 'predict.py')]: { + changes: false, + value: 'ffcea00' + } + }, + displayColor: '#945dd6', + selected: true, + starred: false + }, + { + id: 'master', + label: 'master', + outs: { + [join('data', 'raw', 'test.csv')]: { + hash: '029c9cd22461f6dbe8d9ab01def965c6', + size: 28629, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'raw', 'train.csv')]: { + hash: '61fdd54abdbf6a85b778e937122e1194', + size: 61194, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('reports', 'figures', 'data_dictionary.tex')]: { + hash: '10c5361db59b330722bd70b83ce0fcee', + size: 1521, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('reports', 'figures', 'table_one.tex')]: { + hash: '4581508bdb37e12d9b9b5ff03244390d', + size: 844, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'test_categorized.csv')]: { + hash: 'f0fcdcd7bb08c23d382a665ac1436034', + size: 10788, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'train_categorized.csv')]: { + hash: '5d06666c95fed743140b44190fb67c77', + size: 23884, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'test_nan_imputed.csv')]: { + hash: 'cbc38434c407b0761da80a422ba97cff', + size: 11136, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'train_nan_imputed.csv')]: { + hash: '9edd0421f46d2f0786ea6d82fdcf4e12', + size: 24592, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'test_featurized.csv')]: { + hash: '6879b369c8d9f93c8ddeff61baea9ada', + size: 59474, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'train_featurized.csv')]: { + hash: '980d370c7991c5b991bf8c47d13beb02', + size: 127169, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'processed', 'test_processed.csv')]: { + hash: '0cb34fc53024fa12b32a098a32870612', + size: 59004, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'processed', 'train_processed.csv')]: { + hash: '55fc818f9babfe04c7bd9a605e0f6240', + size: 126326, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'processed', 'split_train_dev.csv')]: { + hash: 'd4d2c3159380a986fc2f04a8bcffda08', + size: 56115, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('models', 'estimator.pkl')]: { + hash: 'a97b560743390021fc662ba0496e6237', + size: 31660351, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('results', 'test_predict_binary.csv')]: { + hash: '76577b506c3bc22a50d1aa61f3b940d0', + size: 2839, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('results', 'test_predict_proba.csv')]: { + hash: '84ac915213a1b4f510486a0d049f39df', + size: 10087, + nfiles: null, + use_cache: true, + is_data_source: false + } + }, + status: ExperimentStatus.SUCCESS, + executor: null, + name: 'master', + sha: '3d5adcb974bb2c85917a5d61a489b933adaa2b7f', + Created: '2021-07-16T19:54:42', + metrics: { + [join('results', 'metrics.json')]: { + fit_time: 0.6337410688400269, + score_time: 0.07778854370117187, + accuracy: 0.8293632958801498, + balanced_accuracy: 0.8040020654726536, + f1: 0.7572265847252886, + gmpr: 0.7615174102573903, + jaccard: 0.6113136909663465, + precision: 0.8361572183378356, + recall: 0.695546218487395, + roc_auc: 0.8703211951447246 + } + }, + params: { + 'params.yaml': { + classifier: 'random_forest', + drop_cols: ['Name', 'Cabin', 'Ticket'], + dtypes: { + Age: 'float', + Embarked: 'category', + Fare: 'float', + Parch: 'int', + Pclass: 'category', + Sex: 'category', + SibSp: 'int', + Survived: 'category' + }, + feature_eng: { featurize: true }, + imputation: { Age: 29.6991, Fare: 32.2042, method: 'mean' }, + model_params: { + logistic_regression: null, + naive_bayes: null, + neural_network: null, + random_forest: { + criterion: 'gini', + max_depth: 15, + max_features: 'auto', + min_samples_leaf: 6, + min_samples_split: 9, + n_estimators: 460 + }, + support_vector_machine: null, + xgboost: null + }, + normalize: null, + param_tuning: { + logistic_regression: null, + naive_bayes: null, + neural_network: null, + num_eval: 100, + random_forest: { + criterion: 'gini', + max_depth: 15, + max_features: 'auto', + min_samples_leaf: 6, + min_samples_split: 9, + n_estimators: 460 + }, + scoring: 'accuracy', + support_vector_machine: null + }, + predict: { js_estimator: true }, + random_seed: 12345, + train_test_split: { + n_split: 10, + shuffle: true, + target_class: 'Survived' + } + } + }, + deps: { + [join('src', 'data', 'make_dataset.py')]: { + changes: false, + value: '4f66b01' + }, + [join('data', 'raw', 'test.csv')]: { changes: false, value: '029c9cd' }, + [join('data', 'raw', 'train.csv')]: { changes: false, value: '61fdd54' }, + [join('src', 'data', 'encode_labels.py')]: { + changes: false, + value: '71c20e2' + }, + [join('data', 'interim', 'test_categorized.csv')]: { + changes: false, + value: 'f0fcdcd' + }, + [join('data', 'interim', 'train_categorized.csv')]: { + changes: false, + value: '5d06666' + }, + [join('src', 'data', 'replace_nan.py')]: { + changes: false, + value: 'a292443' + }, + [join('data', 'interim', 'test_nan_imputed.csv')]: { + changes: false, + value: 'cbc3843' + }, + [join('data', 'interim', 'train_nan_imputed.csv')]: { + changes: false, + value: '9edd042' + }, + [join('src', 'features', 'build_features.py')]: { + changes: false, + value: '15a0db1' + }, + [join('data', 'interim', 'test_featurized.csv')]: { + changes: false, + value: '6879b36' + }, + [join('data', 'interim', 'train_featurized.csv')]: { + changes: false, + value: '980d370' + }, + [join('src', 'features', 'normalize.py')]: { + changes: false, + value: '06e7d4f' + }, + [join('data', 'processed', 'train_processed.csv')]: { + changes: false, + value: '55fc818' + }, + [join('src', 'data', 'split_train_dev.py')]: { + changes: false, + value: '3ccd2f1' + }, + [join('data', 'processed', 'split_train_dev.csv')]: { + changes: false, + value: 'd4d2c31' + }, + [join('src', 'models', 'train_model.py')]: { + changes: false, + value: '3edee7d' + }, + [join('data', 'processed', 'test_processed.csv')]: { + changes: false, + value: '0cb34fc' + }, + [join('models', 'estimator.pkl')]: { changes: false, value: 'a97b560' }, + [join('src', 'models', 'metrics.py')]: { + changes: false, + value: '71807a3' + }, + [join('src', 'models', 'predict.py')]: { + changes: false, + value: 'ffcea00' + } + }, + displayColor: '#13adc7', + selected: true, + starred: false + }, + { + id: 'a49e03966a1f9f1299ec222ebc4bed8625d2c54d', + label: 'a49e039', + outs: { + [join('data', 'raw', 'test.csv')]: { + hash: '029c9cd22461f6dbe8d9ab01def965c6', + size: 28629, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'raw', 'train.csv')]: { + hash: '61fdd54abdbf6a85b778e937122e1194', + size: 61194, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('reports', 'figures', 'data_dictionary.tex')]: { + hash: '10c5361db59b330722bd70b83ce0fcee', + size: 1521, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('reports', 'figures', 'table_one.tex')]: { + hash: '4581508bdb37e12d9b9b5ff03244390d', + size: 844, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'test_categorized.csv')]: { + hash: 'f0fcdcd7bb08c23d382a665ac1436034', + size: 10788, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'train_categorized.csv')]: { + hash: '5d06666c95fed743140b44190fb67c77', + size: 23884, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'test_nan_imputed.csv')]: { + hash: 'cbc38434c407b0761da80a422ba97cff', + size: 11136, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'train_nan_imputed.csv')]: { + hash: '9edd0421f46d2f0786ea6d82fdcf4e12', + size: 24592, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'test_featurized.csv')]: { + hash: '6879b369c8d9f93c8ddeff61baea9ada', + size: 59474, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'train_featurized.csv')]: { + hash: '980d370c7991c5b991bf8c47d13beb02', + size: 127169, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'processed', 'test_processed.csv')]: { + hash: '0cb34fc53024fa12b32a098a32870612', + size: 59004, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'processed', 'train_processed.csv')]: { + hash: '55fc818f9babfe04c7bd9a605e0f6240', + size: 126326, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'processed', 'split_train_dev.csv')]: { + hash: 'd4d2c3159380a986fc2f04a8bcffda08', + size: 56115, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('models', 'estimator.pkl')]: { + hash: 'a97b560743390021fc662ba0496e6237', + size: 31660351, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('results', 'test_predict_binary.csv')]: { + hash: '76577b506c3bc22a50d1aa61f3b940d0', + size: 2839, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('results', 'test_predict_proba.csv')]: { + hash: '84ac915213a1b4f510486a0d049f39df', + size: 10087, + nfiles: null, + use_cache: true, + is_data_source: false + } + }, + status: ExperimentStatus.SUCCESS, + executor: null, + sha: 'a49e03966a1f9f1299ec222ebc4bed8625d2c54d', + Created: '2021-07-16T19:50:39', + metrics: { + [join('results', 'metrics.json')]: { + fit_time: 0.6337410688400269, + score_time: 0.07778854370117187, + accuracy: 0.8293632958801498, + balanced_accuracy: 0.8040020654726536, + f1: 0.7572265847252886, + gmpr: 0.7615174102573903, + jaccard: 0.6113136909663465, + precision: 0.8361572183378356, + recall: 0.695546218487395, + roc_auc: 0.8703211951447246 + } + }, + params: { + 'params.yaml': { + classifier: 'random_forest', + drop_cols: ['Name', 'Cabin', 'Ticket'], + dtypes: { + Age: 'float', + Embarked: 'category', + Fare: 'float', + Parch: 'int', + Pclass: 'category', + Sex: 'category', + SibSp: 'int', + Survived: 'category' + }, + feature_eng: { featurize: true }, + imputation: { Age: 29.6991, Fare: 32.2042, method: 'mean' }, + model_params: { + logistic_regression: null, + naive_bayes: null, + neural_network: null, + random_forest: { + criterion: 'gini', + max_depth: 15, + max_features: 'auto', + min_samples_leaf: 6, + min_samples_split: 9, + n_estimators: 460 + }, + support_vector_machine: null, + xgboost: null + }, + normalize: null, + param_tuning: { + logistic_regression: null, + naive_bayes: null, + neural_network: null, + num_eval: 100, + random_forest: { + criterion: 'gini', + max_depth: 15, + max_features: 'auto', + min_samples_leaf: 6, + min_samples_split: 9, + n_estimators: 460 + }, + scoring: 'accuracy', + support_vector_machine: null + }, + predict: { js_estimator: true }, + random_seed: 12345, + train_test_split: { + n_split: 10, + shuffle: true, + target_class: 'Survived' + } + } + }, + deps: { + [join('src', 'data', 'make_dataset.py')]: { + changes: false, + value: '4f66b01' + }, + [join('data', 'raw', 'test.csv')]: { changes: false, value: '029c9cd' }, + [join('data', 'raw', 'train.csv')]: { changes: false, value: '61fdd54' }, + [join('src', 'data', 'encode_labels.py')]: { + changes: false, + value: '71c20e2' + }, + [join('data', 'interim', 'test_categorized.csv')]: { + changes: false, + value: 'f0fcdcd' + }, + [join('data', 'interim', 'train_categorized.csv')]: { + changes: false, + value: '5d06666' + }, + [join('src', 'data', 'replace_nan.py')]: { + changes: false, + value: 'e1a2e28' + }, + [join('data', 'interim', 'test_nan_imputed.csv')]: { + changes: false, + value: 'cbc3843' + }, + [join('data', 'interim', 'train_nan_imputed.csv')]: { + changes: false, + value: '9edd042' + }, + [join('src', 'features', 'build_features.py')]: { + changes: false, + value: '15a0db1' + }, + [join('data', 'interim', 'test_featurized.csv')]: { + changes: false, + value: '6879b36' + }, + [join('data', 'interim', 'train_featurized.csv')]: { + changes: false, + value: '980d370' + }, + [join('src', 'features', 'normalize.py')]: { + changes: false, + value: '06e7d4f' + }, + [join('data', 'processed', 'train_processed.csv')]: { + changes: false, + value: '55fc818' + }, + [join('src', 'data', 'split_train_dev.py')]: { + changes: false, + value: '3ccd2f1' + }, + [join('data', 'processed', 'split_train_dev.csv')]: { + changes: false, + value: 'd4d2c31' + }, + [join('src', 'models', 'train_model.py')]: { + changes: false, + value: '3edee7d' + }, + [join('data', 'processed', 'test_processed.csv')]: { + changes: false, + value: '0cb34fc' + }, + [join('models', 'estimator.pkl')]: { changes: false, value: 'a97b560' }, + [join('src', 'models', 'metrics.py')]: { + changes: false, + value: '71807a3' + }, + [join('src', 'models', 'predict.py')]: { + changes: false, + value: 'ffcea00' + } + }, + displayColor: '#f46837', + selected: true, + starred: false + }, + { + id: '4f7b50c3d171a11b6cfcd04416a16fc80b61018d', + label: '4f7b50c', + outs: { + [join('data', 'raw', 'test.csv')]: { + hash: '029c9cd22461f6dbe8d9ab01def965c6', + size: 28629, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'raw', 'train.csv')]: { + hash: '61fdd54abdbf6a85b778e937122e1194', + size: 61194, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('reports', 'figures', 'data_dictionary.tex')]: { + hash: '10c5361db59b330722bd70b83ce0fcee', + size: 1521, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('reports', 'figures', 'table_one.tex')]: { + hash: '4581508bdb37e12d9b9b5ff03244390d', + size: 844, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'test_categorized.csv')]: { + hash: 'f0fcdcd7bb08c23d382a665ac1436034', + size: 10788, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'train_categorized.csv')]: { + hash: '5d06666c95fed743140b44190fb67c77', + size: 23884, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'test_nan_imputed.csv')]: { + hash: 'cbc38434c407b0761da80a422ba97cff', + size: 11136, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'train_nan_imputed.csv')]: { + hash: '9edd0421f46d2f0786ea6d82fdcf4e12', + size: 24592, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'test_featurized.csv')]: { + hash: '6879b369c8d9f93c8ddeff61baea9ada', + size: 59474, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'interim', 'train_featurized.csv')]: { + hash: '980d370c7991c5b991bf8c47d13beb02', + size: 127169, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'processed', 'test_processed.csv')]: { + hash: '0cb34fc53024fa12b32a098a32870612', + size: 59004, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'processed', 'train_processed.csv')]: { + hash: '55fc818f9babfe04c7bd9a605e0f6240', + size: 126326, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('data', 'processed', 'split_train_dev.csv')]: { + hash: 'd4d2c3159380a986fc2f04a8bcffda08', + size: 56115, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('models', 'estimator.pkl')]: { + hash: 'a97b560743390021fc662ba0496e6237', + size: 31660351, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('results', 'test_predict_binary.csv')]: { + hash: '76577b506c3bc22a50d1aa61f3b940d0', + size: 2839, + nfiles: null, + use_cache: true, + is_data_source: false + }, + [join('results', 'test_predict_proba.csv')]: { + hash: '84ac915213a1b4f510486a0d049f39df', + size: 10087, + nfiles: null, + use_cache: true, + is_data_source: false + } + }, + status: ExperimentStatus.SUCCESS, + executor: null, + sha: '4f7b50c3d171a11b6cfcd04416a16fc80b61018d', + Created: '2021-07-16T19:48:45', + metrics: { + [join('results', 'metrics.json')]: { + fit_time: 0.6337410688400269, + score_time: 0.07778854370117187, + accuracy: 0.8293632958801498, + balanced_accuracy: 0.8040020654726536, + f1: 0.7572265847252886, + gmpr: 0.7615174102573903, + jaccard: 0.6113136909663465, + precision: 0.8361572183378356, + recall: 0.695546218487395, + roc_auc: 0.8703211951447246 + } + }, + params: { + 'params.yaml': { + classifier: 'random_forest', + drop_cols: ['Name', 'Cabin', 'Ticket'], + dtypes: { + Age: 'float', + Embarked: 'category', + Fare: 'float', + Parch: 'int', + Pclass: 'category', + Sex: 'category', + SibSp: 'int', + Survived: 'category' + }, + feature_eng: { featurize: true }, + imputation: { Age: 29.6991, Fare: 32.2042, method: 'mean' }, + model_params: { + logistic_regression: null, + naive_bayes: null, + neural_network: null, + random_forest: { + criterion: 'gini', + max_depth: 15, + max_features: 'auto', + min_samples_leaf: 6, + min_samples_split: 9, + n_estimators: 460 + }, + support_vector_machine: null, + xgboost: null + }, + normalize: null, + param_tuning: { + logistic_regression: null, + naive_bayes: null, + neural_network: null, + num_eval: 100, + random_forest: { + criterion: 'gini', + max_depth: 15, + max_features: 'auto', + min_samples_leaf: 6, + min_samples_split: 9, + n_estimators: 460 + }, + scoring: 'accuracy', + support_vector_machine: null + }, + predict: { js_estimator: true }, + random_seed: 12345, + train_test_split: { + n_split: 10, + shuffle: true, + target_class: 'Survived' + } + } + }, + deps: { + [join('src', 'data', 'make_dataset.py')]: { + changes: false, + value: '4f66b01' + }, + [join('data', 'raw', 'test.csv')]: { changes: false, value: '029c9cd' }, + [join('data', 'raw', 'train.csv')]: { changes: false, value: '61fdd54' }, + [join('src', 'data', 'encode_labels.py')]: { + changes: false, + value: '71c20e2' + }, + [join('data', 'interim', 'test_categorized.csv')]: { + changes: false, + value: 'f0fcdcd' + }, + [join('data', 'interim', 'train_categorized.csv')]: { + changes: false, + value: '5d06666' + }, + [join('src', 'data', 'replace_nan.py')]: { + changes: false, + value: 'e1a2e28' + }, + [join('data', 'interim', 'test_nan_imputed.csv')]: { + changes: false, + value: 'cbc3843' + }, + [join('data', 'interim', 'train_nan_imputed.csv')]: { + changes: false, + value: '9edd042' + }, + [join('src', 'features', 'build_features.py')]: { + changes: false, + value: '15a0db1' + }, + [join('data', 'interim', 'test_featurized.csv')]: { + changes: false, + value: '6879b36' + }, + [join('data', 'interim', 'train_featurized.csv')]: { + changes: false, + value: '980d370' + }, + [join('src', 'features', 'normalize.py')]: { + changes: false, + value: '06e7d4f' + }, + [join('data', 'processed', 'train_processed.csv')]: { + changes: false, + value: '55fc818' + }, + [join('src', 'data', 'split_train_dev.py')]: { + changes: false, + value: '3ccd2f1' + }, + [join('data', 'processed', 'split_train_dev.csv')]: { + changes: false, + value: 'd4d2c31' + }, + [join('src', 'models', 'train_model.py')]: { + changes: false, + value: '3edee7d' + }, + [join('data', 'processed', 'test_processed.csv')]: { + changes: false, + value: '0cb34fc' + }, + [join('models', 'estimator.pkl')]: { changes: false, value: 'a97b560' }, + [join('src', 'models', 'metrics.py')]: { + changes: false, + value: '71807a3' + }, + [join('src', 'models', 'predict.py')]: { + changes: false, + value: 'ffcea00' + } + }, + displayColor: '#48bb78', + selected: true, + starred: false + } +] + +export default data diff --git a/extension/src/test/fixtures/expShow/survival/tableData.ts b/extension/src/test/fixtures/expShow/survival/tableData.ts new file mode 100644 index 0000000000..7b0b2b427c --- /dev/null +++ b/extension/src/test/fixtures/expShow/survival/tableData.ts @@ -0,0 +1,19 @@ +import { TableData } from '../../../../experiments/webview/contract' +import rowsFixture from './rows' +import columnsFixture from './columns' + +const data: TableData = { + filteredCounts: { experiments: 0, checkpoints: 0 }, + rows: rowsFixture, + columns: columnsFixture, + filters: [], + hasCheckpoints: true, + hasRunningExperiment: true, + hasColumns: true, + sorts: [], + changes: [], + columnOrder: [], + columnWidths: {} +} + +export default data diff --git a/webview/src/stories/Table.stories.tsx b/webview/src/stories/Table.stories.tsx index 2f81adb8e8..2d20977fc3 100644 --- a/webview/src/stories/Table.stories.tsx +++ b/webview/src/stories/Table.stories.tsx @@ -7,6 +7,7 @@ import columnsFixture from 'dvc/src/test/fixtures/expShow/base/columns' import workspaceChangesFixture from 'dvc/src/test/fixtures/expShow/base/workspaceChanges' import deeplyNestedTableData from 'dvc/src/test/fixtures/expShow/deeplyNested/tableData' import dataTypesTableFixture from 'dvc/src/test/fixtures/expShow/dataTypes/tableData' +import survivalTableData from 'dvc/src/test/fixtures/expShow/survival/tableData' import { timestampColumn } from 'dvc/src/experiments/columns/constants' import { ExperimentStatus, @@ -126,6 +127,11 @@ const Template: Story<{ tableData: TableDataState }> = ({ tableData }) => { export const WithData = Template.bind({}) +export const WithSurvivalData = Template.bind({}) +WithSurvivalData.args = { + tableData: { ...survivalTableData, hasData: true } +} + export const WithMiddleStates = Template.bind({}) const tableDataWithSomeSelectedExperiments = setExperimentsAsSelected( tableData,