Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AutoTabPFN giving an error: 'dict' object has no attribute 'name' #203

Open
RonghongJi opened this issue Feb 25, 2025 · 6 comments
Open
Labels
bug Something isn't working

Comments

@RonghongJi
Copy link

Describe the bug

from sklearn.datasets import load_breast_cancer
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import train_test_split

from tabpfn_extensions.post_hoc_ensembles.sklearn_interface import AutoTabPFNClassifier

X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)
clf = AutoTabPFNClassifier() # 120 seconds tuning time
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)

This paper and its corresponding model truly represent a remarkable leap forward in handling small tabular data.
I had a problem using it. Why do I get the following error when I run the sample code after installing the AutoTabPFNClassifier according to the instructions? I would really appreciate it if you could help me take a look at this.


AttributeError Traceback (most recent call last)
Cell In[3], line 10
8 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)
9 clf = AutoTabPFNClassifier() # 120 seconds tuning time
---> 10 clf.fit(X_train, y_train)
11 predictions = clf.predict(X_test)

File ~/tabpfn-extensions/src/tabpfn_extensions/post_hoc_ensembles/sklearn_interface.py:112, in AutoTabPFNClassifier.fit(self, X, y, categorical_feature_indices)
97 task_type = (
98 TaskType.MULTICLASS if len(unique_labels(y)) > 2 else TaskType.BINARY
99 )
100 self.predictor_ = AutoPostHocEnsemblePredictor(
101 preset=self.preset,
102 task_type=task_type,
(...)
109 **self.phe_init_args_,
110 )
--> 112 self.predictor_.fit(
113 X,
114 y,
115 categorical_feature_indices=self.categorical_feature_indices,
116 )
118 # -- Sklearn required values
119 self.classes_ = self.predictor_.label_encoder.classes

File ~/tabpfn-extensions/src/tabpfn_extensions/post_hoc_ensembles/pfn_phe.py:333, in AutoPostHocEnsemblePredictor.fit(self, X, y, categorical_feature_indices)
316 self._estimators, model_family_per_estimator = self._collect_base_models(
317 categorical_feature_indices=categorical_feature_indices,
318 )
320 self._ens_model = self._ens_model(
321 estimators=self._estimators,
322 seed=self.ges_random_state,
(...)
330 model_family_per_estimator=model_family_per_estimator,
331 )
--> 333 self._ens_model.fit(X, y)
335 return self

File ~/tabpfn-extensions/src/tabpfn_extensions/post_hoc_ensembles/greedy_weighted_ensemble.py:234, in GreedyWeightedEnsemble.fit(self, X, y)
233 def fit(self, X, y):
--> 234 weights = self.get_weights(X, y)
236 final_weights = []
237 base_models = []

File ~/tabpfn-extensions/src/tabpfn_extensions/post_hoc_ensembles/greedy_weighted_ensemble.py:173, in GreedyWeightedEnsemble.get_weights(self, X, y)
172 def get_weights(self, X, y):
--> 173 oof_proba = self.get_oof_per_estimator(X, y)
174 self.model_family_per_estimator = (
175 self.model_family_per_estimator
176 if self.model_family_per_estimator is not None
177 else ["X"] * len(self._estimators)
178 )
179 self._model_family_per_estimator = self.model_family_per_estimator[
180 : len(self._estimators)
181 ]

File ~/tabpfn-extensions/src/tabpfn_extensions/post_hoc_ensembles/abstract_validation_utils.py:372, in AbstractValidationUtils.get_oof_per_estimator(self, X, y, return_loss_per_estimator, impute_dropped_instances, _extra_processing)
369 to_pass_holdout_index_hits = holdout_index_hits
370 holdout_index_hit_counts = current_repeat
--> 372 self._fill_predictions_in_place(
373 model_i=model_i,
374 base_model=base_model,
375 oof_proba_list=oof_proba_list,
376 X=X,
377 y=y,
378 train_index=train_index,
379 test_index=test_index,
380 loss_per_estimator=loss_per_estimator,
381 holdout_index_hits=to_pass_holdout_index_hits,
382 split_i=split_i,
383 _extra_processing=_extra_processing,
384 )
386 if check_for_repeat_early_stopping: # True after every repeat.
387 ran_repeats = current_repeat

File ~/tabpfn-extensions/src/tabpfn_extensions/post_hoc_ensembles/abstract_validation_utils.py:127, in AbstractValidationUtils._fill_predictions_in_place(self, model_i, base_model, oof_proba_list, X, y, train_index, test_index, loss_per_estimator, holdout_index_hits, _extra_processing, split_i)
123 fold_y_train, fold_y_test = y[train_index], y[test_index]
124 # base_model = copy.deepcopy(base_model) # FIXME: think about adding this for safety but will likely slow down (due to having to load model again)
125
126 # Default base models case
--> 127 base_model.fit(fold_X_train, fold_y_train)
129 pred = self._predict_oof(base_model, fold_X_test)
131 oof_proba_list[model_i][test_index] += pred

File /opt/anaconda3/envs/tabpfn/lib/python3.13/site-packages/tabpfn/classifier.py:485, in TabPFNClassifier.fit(self, X, y)
482 assert len(ensemble_configs) == self.n_estimators
484 # Create the inference engine
--> 485 self.executor_ = create_inference_engine(
486 X_train=X,
487 y_train=y,
488 model=self.model_,
489 ensemble_configs=ensemble_configs,
490 cat_ix=self.inferred_categorical_indices_,
491 fit_mode=self.fit_mode,
492 device_=self.device_,
493 rng=rng,
494 n_jobs=self.n_jobs,
495 byte_size=byte_size,
496 forced_inference_dtype_=self.forced_inference_dtype_,
497 memory_saving_mode=self.memory_saving_mode,
498 use_autocast_=self.use_autocast_,
499 )
501 return self

File /opt/anaconda3/envs/tabpfn/lib/python3.13/site-packages/tabpfn/base.py:213, in create_inference_engine(X_train, y_train, model, ensemble_configs, cat_ix, fit_mode, device_, rng, n_jobs, byte_size, forced_inference_dtype_, memory_saving_mode, use_autocast_)
200 engine = InferenceEngineOnDemand.prepare(
201 X_train=X_train,
202 y_train=y_train,
(...)
210 save_peak_mem=memory_saving_mode,
211 )
212 elif fit_mode == "fit_preprocessors":
--> 213 engine = InferenceEngineCachePreprocessing.prepare(
214 X_train=X_train,
215 y_train=y_train,
216 cat_ix=cat_ix,
217 ensemble_configs=ensemble_configs,
218 n_workers=n_jobs,
219 model=model,
220 rng=rng,
221 dtype_byte_size=byte_size,
222 force_inference_dtype=forced_inference_dtype_,
223 save_peak_mem=memory_saving_mode,
224 )
225 elif fit_mode == "fit_with_cache":
226 engine = InferenceEngineCacheKV.prepare(
227 X_train=X_train,
228 y_train=y_train,
(...)
238 autocast=use_autocast_,
239 )

File /opt/anaconda3/envs/tabpfn/lib/python3.13/site-packages/tabpfn/inference.py:265, in InferenceEngineCachePreprocessing.prepare(cls, X_train, y_train, cat_ix, model, ensemble_configs, n_workers, rng, dtype_byte_size, force_inference_dtype, save_peak_mem)
239 """Prepare the inference engine.
240
241 Args:
(...)
254 The prepared inference engine.
255 """
256 itr = fit_preprocessing(
257 configs=ensemble_configs,
258 X_train=X_train,
(...)
263 parallel_mode="block",
264 )
--> 265 configs, preprocessors, X_trains, y_trains, cat_ixs = list(zip(*itr))
266 return InferenceEngineCachePreprocessing(
267 X_trains=X_trains,
268 y_trains=y_trains,
(...)
275 save_peak_mem=save_peak_mem,
276 )

File /opt/anaconda3/envs/tabpfn/lib/python3.13/site-packages/tabpfn/preprocessing.py:625, in fit_preprocessing(configs, X_train, y_train, random_state, cat_ix, n_workers, parallel_mode)
622 worker_func = joblib.delayed(func)
624 seeds = rng.integers(0, np.iinfo(np.int32).max, len(configs))
--> 625 yield from executor( # type: ignore
626 [
627 worker_func(config, X_train, y_train, seed)
628 for config, seed in zip(configs, seeds)
629 ],
630 )

File /opt/anaconda3/envs/tabpfn/lib/python3.13/site-packages/joblib/parallel.py:1918, in Parallel.call(self, iterable)
1916 output = self._get_sequential_output(iterable)
1917 next(output)
-> 1918 return output if self.return_generator else list(output)
1920 # Let's create an ID that uniquely identifies the current call. If the
1921 # call is interrupted early and that the same instance is immediately
1922 # re-used, this id will be used to prevent workers that were
1923 # concurrently finalizing a task from the previous call to run the
1924 # callback.
1925 with self._lock:

File /opt/anaconda3/envs/tabpfn/lib/python3.13/site-packages/joblib/parallel.py:1847, in Parallel._get_sequential_output(self, iterable)
1845 self.n_dispatched_batches += 1
1846 self.n_dispatched_tasks += 1
-> 1847 res = func(*args, **kwargs)
1848 self.n_completed_tasks += 1
1849 self.print_progress()

File /opt/anaconda3/envs/tabpfn/lib/python3.13/site-packages/tabpfn/preprocessing.py:537, in fit_preprocessing_one(config, X_train, y_train, random_state, cat_ix)
534 X_train = X_train.copy()
535 y_train = y_train.copy()
--> 537 preprocessor = config.to_pipeline(random_state=static_seed)
538 res = preprocessor.fit_transform(X_train, cat_ix)
540 # TODO(eddiebergman): Not a fan of this, wish it was more transparent, but we want
541 # to distuinguish what to do with the ys based on the ensemble config type

File /opt/anaconda3/envs/tabpfn/lib/python3.13/site-packages/tabpfn/preprocessing.py:452, in EnsembleConfig.to_pipeline(self, random_state)
440 if use_poly_features:
441 steps.append(
442 NanHandlingPolynomialFeaturesStep(
443 max_features=max_poly_features,
444 random_state=random_state,
445 ),
446 )
448 steps.extend(
449 [
450 RemoveConstantFeaturesStep(),
451 ReshapeFeatureDistributionsStep(
--> 452 transform_name=self.preprocess_config.name,
453 append_to_original=self.preprocess_config.append_original,
454 subsample_features=self.preprocess_config.subsample_features,
455 global_transformer_name=self.preprocess_config.global_transformer_name,
456 apply_to_categorical=(
457 self.preprocess_config.categorical_name == "numeric"
458 ),
459 random_state=random_state,
460 ),
461 EncodeCategoricalFeaturesStep(
462 self.preprocess_config.categorical_name,
463 random_state=random_state,
464 ),
465 ],
466 )
468 if self.add_fingerprint_feature:
469 steps.append(AddFingerprintFeaturesStep(random_state=random_state))

AttributeError: 'dict' object has no attribute 'name'

Steps/Code to Reproduce

No response

Expected Results

No response

Actual Results

No response

Versions

@RonghongJi RonghongJi added the bug Something isn't working label Feb 25, 2025
@noahho
Copy link
Collaborator

noahho commented Feb 25, 2025

Thanks a lot for your kind words. I believe this is due to an outdated tabpfn version. Please check the newest version is installed: "pip install tabpfn --upgrade"

@lincj1994
Copy link

Thanks a lot for your kind words. I believe this is due to an outdated tabpfn version. Please check the newest version is installed: "pip install tabpfn --upgrade"

Hello, same issue persists even after upgrading TabPFN via pip install tabpfn --upgrade. Could you please provide further guidance?

@RonghongJi
Copy link
Author

Thanks a lot for your kind words. I believe this is due to an outdated tabpfn version. Please check the newest version is installed: "pip install tabpfn --upgrade"

Hello, same issue persists even after upgrading TabPFN via pip install tabpfn --upgrade. Could you please provide further guidance?

I didn't solve the problem this way either, maybe it wasn't a version problem.

@mert-kurttutan
Copy link
Contributor

@RonghongJi Can you show your result from running pip list ?

@RonghongJi
Copy link
Author

@RonghongJi Can you show your result from running pip list ?

(tabpfn) rhji@RonadeMacBook-Pro ~ % pip list
Package Version Editable project location


appnope 0.1.3
asttokens 2.0.5
Bottleneck 1.4.2
certifi 2025.1.31
charset-normalizer 3.4.1
cloudpickle 3.1.1
colour 0.1.5
comm 0.2.1
contourpy 1.3.1
cycler 0.12.1
debugpy 1.8.11
decorator 5.1.1
einops 0.8.1
et_xmlfile 1.1.0
executing 0.8.3
filelock 3.17.0
fonttools 4.56.0
fsspec 2025.2.0
future 1.0.0
huggingface-hub 0.29.1
hyperopt 0.2.7
idna 3.10
ipykernel 6.29.5
ipython 8.30.0
jedi 0.19.2
Jinja2 3.1.5
joblib 1.4.2
jupyter_client 8.6.3
jupyter_core 5.7.2
kditransform 0.2.0
kiwisolver 1.4.8
llvmlite 0.44.0
MarkupSafe 3.0.2
matplotlib 3.10.0
matplotlib-inline 0.1.6
mpmath 1.3.0
nest_asyncio 1.6.0
networkx 3.4.2
numba 0.61.0
numexpr 2.10.1
numpy 2.1.3
openpyxl 3.1.5
packaging 24.2
pandas 2.2.3
parso 0.8.4
pexpect 4.8.0
pillow 11.1.0
pip 25.0
platformdirs 3.10.0
prompt_toolkit 3.0.43
psutil 5.9.0
ptyprocess 0.7.0
pure-eval 0.2.2
py4j 0.10.9.9
Pygments 2.15.1
pyparsing 3.2.1
python-dateutil 2.9.0.post0
pytz 2024.1
PyYAML 6.0.2
pyzmq 26.2.0
requests 2.32.3
scikit-learn 1.6.1
scipy 1.15.2
seaborn 0.12.2
setuptools 72.1.0
shap 0.46.0
shapiq 1.2.1
six 1.16.0
slicer 0.0.8
stack-data 0.2.0
sympy 1.13.1
tabpfn 2.0.1
tabpfn-extensions 0.0.4 /Users/rhji/tabpfn-extensions
threadpoolctl 3.5.0
torch 2.6.0
tornado 6.4.2
tqdm 4.67.1
traitlets 5.14.3
typing_extensions 4.12.2
tzdata 2023.3
urllib3 2.3.0
wcwidth 0.2.5
wheel 0.45.1

@mert-kurttutan
Copy link
Contributor

mert-kurttutan commented Mar 2, 2025

@RonghongJi Indeed, something in your environment setting is preventing you from updating[1] to the lastest version, which is 2.0.6

I suggest you run the following:

pip install tabpfn==2.0.6

see if it runs successfully.

1: This might be because of version settings in requirements.txt or pyproject.toml file you have in your local setting.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

No branches or pull requests

4 participants