We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
sdv.evaluate call sometimes fails with a ValueError: Input contains infinity or a value too large for dtype('float64').
sdv.evaluate
ValueError: Input contains infinity or a value too large for dtype('float64').
The exception is raised in pipeline fit step inside sdmetrics/detection/tabular/logistic.py.
fit
sdmetrics/detection/tabular/logistic.py
We should review if we can prevent this, or at least capture it and return a 0.
This is the full traceback:
------------------ from sdv.evaluation import evaluate evaluate(new_data, data) ------------------ --------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-1-349ebfb54984> in <module> 1 from sdv.evaluation import evaluate 2 ----> 3 evaluate(new_data, data) ~/work/SDV/SDV/.tox/py36/lib/python3.6/site-packages/sdv/evaluation.py in evaluate(synthetic_data, real_data, metadata, root_path, table_name, metrics, get_report, aggregate) 152 computed = {} 153 for metric in metrics: --> 154 computed[metric] = METRICS[metric](synth, real, metadata, details=get_report) 155 156 if get_report: ~/work/SDV/SDV/.tox/py36/lib/python3.6/site-packages/sdv/evaluation.py in _logistic_detection(synthetic, real, metadata, details) 98 99 def _logistic_detection(synthetic, real, metadata=None, details=False): --> 100 return _tabular_metric(LogisticDetector(), synthetic, real, metadata, details) 101 102 ~/work/SDV/SDV/.tox/py36/lib/python3.6/site-packages/sdv/evaluation.py in _tabular_metric(sdmetric, synthetic, real, metadata, details) 86 return list(metrics) 87 ---> 88 return np.mean([metric.value for metric in metrics]) 89 90 ~/work/SDV/SDV/.tox/py36/lib/python3.6/site-packages/sdv/evaluation.py in <listcomp>(.0) 86 return list(metrics) 87 ---> 88 return np.mean([metric.value for metric in metrics]) 89 90 ~/work/SDV/SDV/.tox/py36/lib/python3.6/site-packages/sdmetrics/detection/tabular/base.py in metrics(self, metadata, real_tables, synthetic_tables) 48 Metric: The next metric. 49 """ ---> 50 yield from self._single_table_detection(metadata, real_tables, synthetic_tables) 51 yield from self._parent_child_detection(metadata, real_tables, synthetic_tables) 52 ~/work/SDV/SDV/.tox/py36/lib/python3.6/site-packages/sdmetrics/detection/tabular/base.py in _single_table_detection(self, metadata, real_tables, synthetic_tables) 57 auroc = self._compute_auroc( 58 real_tables[table_name][table_fields], ---> 59 synthetic_tables[table_name][table_fields]) 60 61 yield Metric( ~/work/SDV/SDV/.tox/py36/lib/python3.6/site-packages/sdmetrics/detection/tabular/base.py in _compute_auroc(self, real_table, synthetic_table) 123 kf = StratifiedKFold(n_splits=3, shuffle=True) 124 for train_index, test_index in kf.split(X, y): --> 125 self.fit(X[train_index], y[train_index]) 126 y_pred = self.predict_proba(X[test_index]) 127 auroc = roc_auc_score(y[test_index], y_pred) ~/work/SDV/SDV/.tox/py36/lib/python3.6/site-packages/sdmetrics/detection/tabular/logistic.py in fit(self, X, y) 22 ('classifier', LogisticRegression(solver="lbfgs")), 23 ]) ---> 24 self.model.fit(X, y) 25 26 def predict_proba(self, X): ~/work/SDV/SDV/.tox/py36/lib/python3.6/site-packages/sklearn/pipeline.py in fit(self, X, y, **fit_params) 328 """ 329 fit_params_steps = self._check_fit_params(**fit_params) --> 330 Xt = self._fit(X, y, **fit_params_steps) 331 with _print_elapsed_time('Pipeline', 332 self._log_message(len(self.steps) - 1)): ~/work/SDV/SDV/.tox/py36/lib/python3.6/site-packages/sklearn/pipeline.py in _fit(self, X, y, **fit_params_steps) 294 message_clsname='Pipeline', 295 message=self._log_message(step_idx), --> 296 **fit_params_steps[name]) 297 # Replace the transformer of the step with the fitted 298 # transformer. This is necessary when loading the transformer ~/work/SDV/SDV/.tox/py36/lib/python3.6/site-packages/joblib/memory.py in __call__(self, *args, **kwargs) 350 351 def __call__(self, *args, **kwargs): --> 352 return self.func(*args, **kwargs) 353 354 def call_and_shelve(self, *args, **kwargs): ~/work/SDV/SDV/.tox/py36/lib/python3.6/site-packages/sklearn/pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params) 738 with _print_elapsed_time(message_clsname, message): 739 if hasattr(transformer, 'fit_transform'): --> 740 res = transformer.fit_transform(X, y, **fit_params) 741 else: 742 res = transformer.fit(X, y, **fit_params).transform(X) ~/work/SDV/SDV/.tox/py36/lib/python3.6/site-packages/sklearn/base.py in fit_transform(self, X, y, **fit_params) 691 else: 692 # fit method of arity 2 (supervised transformation) --> 693 return self.fit(X, y, **fit_params).transform(X) 694 695 ~/work/SDV/SDV/.tox/py36/lib/python3.6/site-packages/sklearn/preprocessing/_data.py in fit(self, X, y) 1200 X = self._validate_data(X, accept_sparse='csc', estimator=self, 1201 dtype=FLOAT_DTYPES, -> 1202 force_all_finite='allow-nan') 1203 1204 q_min, q_max = self.quantile_range ~/work/SDV/SDV/.tox/py36/lib/python3.6/site-packages/sklearn/base.py in _validate_data(self, X, y, reset, validate_separately, **check_params) 418 f"requires y to be passed, but the target y is None." 419 ) --> 420 X = check_array(X, **check_params) 421 out = X 422 else: ~/work/SDV/SDV/.tox/py36/lib/python3.6/site-packages/sklearn/utils/validation.py in inner_f(*args, **kwargs) 70 FutureWarning) 71 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)}) ---> 72 return f(**kwargs) 73 return inner_f 74 ~/work/SDV/SDV/.tox/py36/lib/python3.6/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator) 643 if force_all_finite: 644 _assert_all_finite(array, --> 645 allow_nan=force_all_finite == 'allow-nan') 646 647 if ensure_min_samples > 0: ~/work/SDV/SDV/.tox/py36/lib/python3.6/site-packages/sklearn/utils/validation.py in _assert_all_finite(X, allow_nan, msg_dtype) 97 msg_err.format 98 (type_err, ---> 99 msg_dtype if msg_dtype is not None else X.dtype) 100 ) 101 # for object dtype data, we only check for NaNs (GH-13254) ValueError: Input contains infinity or a value too large for dtype('float64').
The text was updated successfully, but these errors were encountered:
csala
Successfully merging a pull request may close this issue.
sdv.evaluate
call sometimes fails with aValueError: Input contains infinity or a value too large for dtype('float64').
The exception is raised in pipeline
fit
step insidesdmetrics/detection/tabular/logistic.py
.We should review if we can prevent this, or at least capture it and return a 0.
This is the full traceback:
The text was updated successfully, but these errors were encountered: