Skip to content

Commit

Permalink
Reduced processing time
Browse files Browse the repository at this point in the history
  • Loading branch information
christopherbunn committed Jun 30, 2023
1 parent abcd38b commit 313a598
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 17 deletions.
17 changes: 16 additions & 1 deletion docs/source/start.ipynb
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Start"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -26,6 +28,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -42,6 +45,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down Expand Up @@ -77,6 +81,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -95,13 +100,15 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"__Note:__ To provide data to EvalML, it is recommended that you initialize a woodwork accessor so that you control how EvalML will treat each feature, such as as a numeric feature, a categorical feature, a text feature or other type of feature. Consult the [the Woodwork project](https://woodwork.alteryx.com/en/stable/) for help on how to do this. Here, `split_data()` returns dataframes with woodwork accessors."
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down Expand Up @@ -133,6 +140,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -151,6 +159,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -170,20 +179,22 @@
" y_train=y_train,\n",
" problem_type=\"binary\",\n",
" objective=\"f1\",\n",
" max_batches=3,\n",
" max_batches=2,\n",
" verbose=False,\n",
")\n",
"automl.search()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"We also provide a standalone `search` [method](./autoapi/evalml/automl/index.rst#evalml.automl.search) which does all of the above in a single line, and returns the `AutoMLSearch` instance and data check results. If there were data check errors, AutoML will not be run and no `AutoMLSearch` instance will be returned."
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -200,6 +211,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -216,6 +228,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -233,6 +246,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -250,6 +264,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down
20 changes: 10 additions & 10 deletions evalml/tests/automl_tests/test_automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -1861,7 +1861,7 @@ def test_pipelines_in_batch_return_none(
X_train=X,
y_train=y,
problem_type="binary",
max_batches=3,
max_batches=2,
automl_algorithm="iterative",
allowed_component_graphs={"Name": [dummy_classifier_estimator_class]},
n_jobs=1,
Expand Down Expand Up @@ -2299,7 +2299,7 @@ def test_time_series_regression_with_parameters(ts_data):
allowed_component_graphs={"Name_0": ["Imputer", "Linear Regressor"]},
objective="auto",
problem_configuration=problem_configuration,
max_batches=3,
max_batches=2,
)
assert (
automl.automl_algorithm.search_parameters["pipeline"] == problem_configuration
Expand Down Expand Up @@ -2344,7 +2344,7 @@ def test_automl_accepts_component_graphs(graph_type, X_y_binary):
problem_type="binary",
allowed_component_graphs={"Dummy_Name": component_graph},
objective="auto",
max_batches=3,
max_batches=2,
)
for pipeline_ in automl.allowed_pipelines:
assert isinstance(pipeline_, BinaryClassificationPipeline)
Expand Down Expand Up @@ -4158,7 +4158,7 @@ def test_automl_drop_unknown_columns(columns, AutoMLTestEnv, X_y_binary, caplog)
y_train=y,
problem_type="binary",
optimize_thresholds=False,
max_batches=3,
max_batches=2,
verbose=True,
)
env = AutoMLTestEnv("binary")
Expand Down Expand Up @@ -4573,7 +4573,7 @@ def test_automl_passes_known_in_advance_pipeline_parameters_to_all_pipelines(
X_train=X,
y_train=y,
problem_type=problem_type,
max_batches=3,
max_batches=2,
problem_configuration={
"time_index": "date",
"max_delay": 3,
Expand Down Expand Up @@ -4624,7 +4624,7 @@ def test_cv_ranking_scores(
X_train=X,
y_train=y,
problem_type="binary",
max_batches=3,
max_batches=2,
data_splitter=data_splitter,
allowed_component_graphs={"Name": [dummy_classifier_estimator_class]},
n_jobs=1,
Expand Down Expand Up @@ -4818,7 +4818,7 @@ def test_automl_accepts_features(
y_train=y,
problem_type="binary",
optimize_thresholds=False,
max_batches=3,
max_batches=2,
features=features,
automl_algorithm=automl_algorithm,
)
Expand Down Expand Up @@ -4862,7 +4862,7 @@ def test_automl_with_empty_features_list(
y_train=y,
problem_type="binary",
optimize_thresholds=False,
max_batches=3,
max_batches=2,
features=[],
automl_algorithm=automl_algorithm,
)
Expand Down Expand Up @@ -5075,7 +5075,7 @@ def test_default_algorithm_uses_n_jobs(X_y_binary, AutoMLTestEnv):
X_train=X,
y_train=y,
problem_type="binary",
max_batches=3,
max_batches=2,
automl_algorithm="default",
n_jobs=2,
)
Expand Down Expand Up @@ -5525,7 +5525,7 @@ def test_holdout_set_results_and_rankings(caplog, AutoMLTestEnv):
X_train=X,
y_train=y,
problem_type="binary",
max_batches=3,
max_batches=2,
automl_algorithm="default",
verbose=True,
holdout_set_size=0.1,
Expand Down
10 changes: 5 additions & 5 deletions evalml/tests/automl_tests/test_automl_search_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -1022,7 +1022,7 @@ def test_automl_search_dictionary_undersampler(
optimize_thresholds=False,
sampler_method="Undersampler",
search_parameters=search_parameters,
max_batches=3,
max_batches=2,
)
# check that the sampling dict got set properly
automl.search()
Expand Down Expand Up @@ -1077,7 +1077,7 @@ def test_automl_search_dictionary_oversampler(
sampler_method="Oversampler",
optimize_thresholds=False,
search_parameters=search_parameters,
max_batches=3,
max_batches=2,
)
# check that the sampling dict got set properly
pipelines = automl.allowed_pipelines
Expand Down Expand Up @@ -1122,7 +1122,7 @@ def test_automl_search_sampler_dictionary_keys(
sampler_method=sampler,
optimize_thresholds=False,
search_parameters=search_parameters,
max_batches=3,
max_batches=2,
)
if errors:
with pytest.raises(
Expand Down Expand Up @@ -1254,7 +1254,7 @@ def test_automl_passes_allow_long_running_models(
objective="Log Loss Multiclass",
allow_long_running_models=allow_long_running_models,
automl_algorithm=algo,
max_batches=3,
max_batches=2,
verbose=True,
)
assert (
Expand All @@ -1280,7 +1280,7 @@ def test_automl_threshold_score(fraud_100):
X_train,
y_train,
problem_type="binary",
max_batches=3,
max_batches=2,
ensembling=True,
verbose=False,
automl_algorithm="default",
Expand Down
2 changes: 1 addition & 1 deletion evalml/tests/automl_tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def test_search_args(mock_automl_search, mock_data_checks_validate, X_y_binary):
assert automl.max_time == 42
assert automl.patience == 3
assert automl.tolerance == 0.5
assert automl.max_batches == 4
assert automl.max_batches == 3
assert isinstance(automl.automl_algorithm, DefaultAlgorithm)

automl, _ = search(
Expand Down

0 comments on commit 313a598

Please sign in to comment.