You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I'm getting the error: "ValueError: The number of quantiles cannot be greater than the number of samples used. Got 12427 quantiles and 10000 samples."
Context:
My dataset is large, ~77,000 samples, but I have successfully run similar sized datasets with TabPFN in the past.
There are several columns with a small number of unique values (5-15), but I've tried turning off inference of categorical features via interface_config ("MIN_NUMBER_SAMPLES_FOR_CATEGORICAL_INFERENCE": 1000000, "MAX_UNIQUE_FOR_CATEGORICAL_FEATURES": 1, "MIN_UNIQUE_FOR_NUMERICAL_FEATURES": 1) and get the same error.
Full traceback:
Traceback (most recent call last):
File "/home/skaye/PycharmProjects/cellFormationModels/src/train_model.py", line 154, in
main()
File "/home/skaye/PycharmProjects/cellFormationModels/src/train_model.py", line 83, in main
model, scores, predictions, roc_data = train_and_score_model(
^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/src/helpers/train_and_score_models.py", line 421, in train_and_score_model
model.fit(x_train, y_train)
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/tabpfn/regressor.py", line 503, in fit
self.executor_ = create_inference_engine(
^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/tabpfn/base.py", line 213, in create_inference_engine
engine = InferenceEngineCachePreprocessing.prepare(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/tabpfn/inference.py", line 265, in prepare
configs, preprocessors, X_trains, y_trains, cat_ixs = list(zip(*itr))
^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/tabpfn/preprocessing.py", line 625, in fit_preprocessing
yield from executor( # type: ignore
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/joblib/parallel.py", line 1918, in call
return output if self.return_generator else list(output)
^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/joblib/parallel.py", line 1847, in get_sequential_output
res = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/tabpfn/preprocessing.py", line 538, in fit_preprocessing_one
res = preprocessor.fit_transform(X_train, cat_ix)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/tabpfn/model/preprocessing.py", line 397, in fit_transform
X, categorical_features = step.fit_transform(X, categorical_features)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/tabpfn/model/preprocessing.py", line 987, in fit_transform
Xt = transformer.fit_transform(X[:, self.subsampled_features])
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/base.py", line 1473, in wrapper
return fit_method(estimator, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/pipeline.py", line 533, in fit_transform
Xt = self._fit(X, y, routed_params)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/pipeline.py", line 406, in _fit
X, fitted_transformer = fit_transform_one_cached(
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/joblib/memory.py", line 312, in call
return self.func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/pipeline.py", line 1310, in _fit_transform_one
res = transformer.fit_transform(X, y, **params.get("fit_transform", {}))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/utils/_set_output.py", line 313, in wrapped
data_to_wrap = f(self, X, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/base.py", line 1473, in wrapper
return fit_method(estimator, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/compose/_column_transformer.py", line 976, in fit_transform
result = self._call_func_on_transformers(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/compose/_column_transformer.py", line 885, in _call_func_on_transformers
return Parallel(n_jobs=self.n_jobs)(jobs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/utils/parallel.py", line 74, in call
return super().call(iterable_with_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/joblib/parallel.py", line 1918, in call
return output if self.return_generator else list(output)
^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/joblib/parallel.py", line 1847, in _get_sequential_output
res = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/utils/parallel.py", line 136, in call
return self.function(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/pipeline.py", line 1310, in _fit_transform_one
res = transformer.fit_transform(X, y, **params.get("fit_transform", {}))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/utils/_set_output.py", line 313, in wrapped
data_to_wrap = f(self, X, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/base.py", line 1098, in fit_transform
return self.fit(X, **fit_params).transform(X)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/base.py", line 1473, in wrapper
return fit_method(estimator, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/preprocessing/_data.py", line 2775, in fit
raise ValueError(
ValueError: The number of quantiles cannot be greater than the number of samples used. Got 12427 quantiles and 10000 samples.
Steps/Code to Reproduce
No response
Expected Results
No response
Actual Results
No response
Versions
The text was updated successfully, but these errors were encountered:
Describe the bug
I'm getting the error: "ValueError: The number of quantiles cannot be greater than the number of samples used. Got 12427 quantiles and 10000 samples."
Context:
Full traceback:
Traceback (most recent call last):
File "/home/skaye/PycharmProjects/cellFormationModels/src/train_model.py", line 154, in
main()
File "/home/skaye/PycharmProjects/cellFormationModels/src/train_model.py", line 83, in main
model, scores, predictions, roc_data = train_and_score_model(
^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/src/helpers/train_and_score_models.py", line 421, in train_and_score_model
model.fit(x_train, y_train)
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/tabpfn/regressor.py", line 503, in fit
self.executor_ = create_inference_engine(
^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/tabpfn/base.py", line 213, in create_inference_engine
engine = InferenceEngineCachePreprocessing.prepare(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/tabpfn/inference.py", line 265, in prepare
configs, preprocessors, X_trains, y_trains, cat_ixs = list(zip(*itr))
^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/tabpfn/preprocessing.py", line 625, in fit_preprocessing
yield from executor( # type: ignore
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/joblib/parallel.py", line 1918, in call
return output if self.return_generator else list(output)
^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/joblib/parallel.py", line 1847, in get_sequential_output
res = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/tabpfn/preprocessing.py", line 538, in fit_preprocessing_one
res = preprocessor.fit_transform(X_train, cat_ix)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/tabpfn/model/preprocessing.py", line 397, in fit_transform
X, categorical_features = step.fit_transform(X, categorical_features)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/tabpfn/model/preprocessing.py", line 987, in fit_transform
Xt = transformer.fit_transform(X[:, self.subsampled_features])
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/base.py", line 1473, in wrapper
return fit_method(estimator, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/pipeline.py", line 533, in fit_transform
Xt = self._fit(X, y, routed_params)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/pipeline.py", line 406, in _fit
X, fitted_transformer = fit_transform_one_cached(
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/joblib/memory.py", line 312, in call
return self.func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/pipeline.py", line 1310, in _fit_transform_one
res = transformer.fit_transform(X, y, **params.get("fit_transform", {}))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/utils/_set_output.py", line 313, in wrapped
data_to_wrap = f(self, X, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/base.py", line 1473, in wrapper
return fit_method(estimator, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/compose/_column_transformer.py", line 976, in fit_transform
result = self._call_func_on_transformers(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/compose/_column_transformer.py", line 885, in _call_func_on_transformers
return Parallel(n_jobs=self.n_jobs)(jobs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/utils/parallel.py", line 74, in call
return super().call(iterable_with_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/joblib/parallel.py", line 1918, in call
return output if self.return_generator else list(output)
^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/joblib/parallel.py", line 1847, in _get_sequential_output
res = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/utils/parallel.py", line 136, in call
return self.function(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/pipeline.py", line 1310, in _fit_transform_one
res = transformer.fit_transform(X, y, **params.get("fit_transform", {}))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/utils/_set_output.py", line 313, in wrapped
data_to_wrap = f(self, X, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/base.py", line 1098, in fit_transform
return self.fit(X, **fit_params).transform(X)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/base.py", line 1473, in wrapper
return fit_method(estimator, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/skaye/PycharmProjects/cellFormationModels/venv/lib/python3.12/site-packages/sklearn/preprocessing/_data.py", line 2775, in fit
raise ValueError(
ValueError: The number of quantiles cannot be greater than the number of samples used. Got 12427 quantiles and 10000 samples.
Steps/Code to Reproduce
No response
Expected Results
No response
Actual Results
No response
Versions
The text was updated successfully, but these errors were encountered: