Encountered an internal AutoML error. Error Message/Code: Expected argument dataset_json to have a valid value #15849

amitca71 · 2020-12-17T11:03:45Z

Package Name: azureml.core
Package Version: 1.19
Operating System: Linux driver 4.15.0-1098-azure Python 3.3+ support #109~16.04.1-Ubuntu SMP Wed Sep 30 18:53:14 UTC 2020 x86_64 x86_64 x86_64 GNU/Linux
Python Version: 3.6.9 :: Anaconda, Inc

Describe the bug
A clear and concise description of what the bug is.
when try to execute with automl_config, getting the exception. if executing with regular pipeline its nongt working.
from azureml.train.automl import AutoMLConfig
from azureml.pipeline.steps import AutoMLStep
train_ds =train_dataset.parse_parquet_files()
automl_settings = {
"iteration_timeout_minutes": 10,
"experiment_timeout_hours": 0.25,
"n_cross_validations": 3,
"primary_metric": 'normalized_mean_absolute_error',
"max_concurrent_iterations": 3,
"max_cores_per_iteration": -1,
"verbosity": logging.INFO,
"enable_early_stopping": True
}

automl_config = AutoMLConfig(task = 'regression',
debug_log = 'automl_errors.log',
path = ".",
compute_target=compute_target,
training_data = train_ds,
label_column_name = target_column_name,
**automl_settings
)
from azureml.pipeline.core import PipelineData, TrainingOutput

metrics_output_name = 'metrics_output'
best_model_output_name = 'best_model_output'

metrics_data = PipelineData(name='metrics_data',
datastore=datastore,
pipeline_output_name=metrics_output_name,
training_output=TrainingOutput(type='Metrics'))
model_data = PipelineData(name='model_data',
datastore=datastore,
pipeline_output_name=best_model_output_name,
training_output=TrainingOutput(type='Model'))
automl_step = AutoMLStep(
name='automl_module',
automl_config=automl_config,
outputs=[metrics_data, model_data],
allow_reuse=False)
training_pipeline = Pipeline(
description="training_pipeline",
workspace=ws,
steps=[automl_step])
training_pipeline_run = experiment.submit(training_pipeline)

exception when:
from azureml.core.experiment import Experiment
experiment=Experiment(ws, 'automl_remote')
remote_run = experiment.submit(automl_config, show_output=True)

To Reproduce
Steps to reproduce the behavior:

see above

Expected behavior
A clear and concise description of what you expected to happen.
not fail with execption
Screenshots
If applicable, add screenshots to help explain your problem.
Running on remote.
No run_configuration provided, running on cont-cluster with default configuration
Running on remote compute: cont-cluster

ValidationException Traceback (most recent call last)
in
1 from azureml.core.experiment import Experiment
2 experiment=Experiment(ws, 'automl_remote')
----> 3 remote_run = experiment.submit(automl_config, show_output=True)

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/_jupyter_common/init.py in submit(self, config, tags, **kwargs)
84 def _experiment_submit_notebook_decorator(original_submit):
85 def submit(self, config, tags=None, **kwargs):
---> 86 run = original_submit(self, config, tags, **kwargs)
87 _update_run_created_from(run)
88 return run

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/core/experiment.py in submit(self, config, tags, **kwargs)
218 submit_func = get_experiment_submit(config)
219 with self._log_context("submit config {}".format(config.class.name)):
--> 220 run = submit_func(config, self.workspace, self.name, **kwargs)
221 if tags is not None:
222 run.set_tags(tags)

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/train/automl/automlconfig.py in _automl_static_submit(automl_config_object, workspace, experiment_name, **kwargs)
98 compute_target,
99 parent_run_id,
--> 100 show_output)
101
102 automl_run.add_properties(global_tracking_info_registry.gather_all(settings.path))

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/train/automl/automlconfig.py in _start_execution(experiment, settings_obj, fit_params, run_config, compute_target, parent_run_id, show_output)
209 if settings_obj.scenario == constants.Scenarios._NON_PROD:
210 validate_non_prod_env_exists(experiment.workspace)
--> 211 automl_run = _default_execution(experiment, settings_obj, fit_params, False, show_output)
212
213 return automl_run

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/train/automl/automlconfig.py in _default_execution(experiment, settings_obj, fit_params, legacy_local, show_output, parent_run_id)
122 automl_estimator = _azureautomlclient.AzureAutoMLClient(experiment, settings_obj)
123
--> 124 return automl_estimator.fit(**fit_params)
125
126

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/train/automl/_azureautomlclient.py in fit(self, run_configuration, compute_target, X, y, sample_weight, X_valid, y_valid, sample_weight_valid, cv_splits_indices, show_output, existing_run, training_data, validation_data, test_data, _script_run, parent_run_id, is_managed, kwargs)
407 cv_splits_indices=cv_splits_indices, show_output=show_output,
408 training_data=training_data, validation_data=validation_data,
--> 409 test_data=test_data)
410 except Exception as e:
411 self._fail_parent_run(error_details=e, is_aml_compute=run_configuration.target != 'local')

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/train/automl/_azureautomlclient.py in _fit_remote(self, run_configuration, X, y, sample_weight, X_valid, y_valid, sample_weight_valid, cv_splits_indices, show_output, training_data, validation_data, test_data)
421 y_valid=y_valid, sample_weight_valid=sample_weight_valid,
422 cv_splits_indices=cv_splits_indices, training_data=training_data,
--> 423 validation_data=validation_data, test_data=test_data)
424
425 if show_output:

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/train/automl/_azureautomlclient.py in _fit_remote_core(self, run_configuration, X, y, sample_weight, X_valid, y_valid, sample_weight_valid, cv_splits_indices, training_data, validation_data, test_data)
484 run_config_object, X=X, y=y, sample_weight=sample_weight, X_valid=X_valid, y_valid=y_valid,
485 sample_weight_valid=sample_weight_valid, cv_splits_indices=cv_splits_indices,
--> 486 training_data=training_data, validation_data=validation_data, test_data=test_data)
487
488 try:

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/train/automl/_azureautomlclient.py in _create_parent_run_for_remote(self, run_config_object, X, y, sample_weight, X_valid, y_valid, sample_weight_valid, cv_splits_indices, training_data, validation_data, test_data)
535 sample_weight_valid=sample_weight_valid,
536 cv_splits_indices=cv_splits_indices,
--> 537 test_data=test_data
538 )
539

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/train/automl/_azureautomlclient.py in _create_and_validate_parent_run_dto(self, target, training_data, validation_data, X, y, sample_weight, X_valid, y_valid, sample_weight_valid, cv_splits_indices, parent_run_id, test_data)
600 get_datasets_json(training_data=training_data,
601 validation_data=validation_data,
--> 602 test_data=test_data)
603 else:
604 dataprep_json = dataprep_utilities.get_dataprep_json(X=X, y=y,

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/automl/core/dataset_utilities.py in get_datasets_json(training_data, validation_data, test_data)
130
131 # We must always be able to JSON-ify Datasets
--> 132 Contract.assert_value(dataset_json, "dataset_json")
133
134 return dataset_json

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/automl/core/shared/_diagnostics/contract.py in assert_value(value, name, reference_code, log_safe)
63
64 Contract.assert_true(value is not None, message=error_details,
---> 65 target=name, reference_code=reference_code, log_safe=log_safe)
66
67 @staticmethod

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/automl/core/shared/_diagnostics/contract.py in assert_true(condition, message, target, reference_code, log_safe)
42
43 raise ValidationException._with_error(AzureMLError.create(
---> 44 AutoMLInternal, target=target, reference_code=reference_code, error_details=message)
45 )
46

ValidationException: ValidationException:
Message: Encountered an internal AutoML error. Error Message/Code: Expected argument dataset_json to have a valid value.
InnerException: None
ErrorResponse
{
"error": {
"code": "SystemError",
"message": "Encountered an internal AutoML error. Error Message/Code: Expected argument dataset_json to have a valid value.",
"details_uri": "https://docs.microsoft.com/azure/machine-learning/resource-known-issues#automated-machine-learning",
"target": "dataset_json",
"inner_error": {
"code": "ClientError",
"inner_error": {
"code": "AutoMLInternal"
}
}
}
Additional context
Add any other context about the problem here.

ghost · 2021-02-08T23:49:13Z

Thanks for the feedback! We are routing this to the appropriate team for follow-up. cc @azureml-github.

Issue Details

Package Name: azureml.core
Package Version: 1.19
Operating System: Linux driver 4.15.0-1098-azure Python 3.3+ support #109~16.04.1-Ubuntu SMP Wed Sep 30 18:53:14 UTC 2020 x86_64 x86_64 x86_64 GNU/Linux
Python Version: 3.6.9 :: Anaconda, Inc

Describe the bug
A clear and concise description of what the bug is.
when try to execute with automl_config, getting the exception. if executing with regular pipeline its nongt working.
from azureml.train.automl import AutoMLConfig
from azureml.pipeline.steps import AutoMLStep
train_ds =train_dataset.parse_parquet_files()
automl_settings = {
"iteration_timeout_minutes": 10,
"experiment_timeout_hours": 0.25,
"n_cross_validations": 3,
"primary_metric": 'normalized_mean_absolute_error',
"max_concurrent_iterations": 3,
"max_cores_per_iteration": -1,
"verbosity": logging.INFO,
"enable_early_stopping": True
}

automl_config = AutoMLConfig(task = 'regression',
debug_log = 'automl_errors.log',
path = ".",
compute_target=compute_target,
training_data = train_ds,
label_column_name = target_column_name,
**automl_settings
)
from azureml.pipeline.core import PipelineData, TrainingOutput

metrics_output_name = 'metrics_output'
best_model_output_name = 'best_model_output'

metrics_data = PipelineData(name='metrics_data',
datastore=datastore,
pipeline_output_name=metrics_output_name,
training_output=TrainingOutput(type='Metrics'))
model_data = PipelineData(name='model_data',
datastore=datastore,
pipeline_output_name=best_model_output_name,
training_output=TrainingOutput(type='Model'))
automl_step = AutoMLStep(
name='automl_module',
automl_config=automl_config,
outputs=[metrics_data, model_data],
allow_reuse=False)
training_pipeline = Pipeline(
description="training_pipeline",
workspace=ws,
steps=[automl_step])
training_pipeline_run = experiment.submit(training_pipeline)

exception when:
from azureml.core.experiment import Experiment
experiment=Experiment(ws, 'automl_remote')
remote_run = experiment.submit(automl_config, show_output=True)

To Reproduce
Steps to reproduce the behavior:

see above