Skip to content

Commit

Permalink
fix: use numpy std with bessel correction and test
Browse files Browse the repository at this point in the history
  • Loading branch information
eduardocarvp authored and Optimox committed Dec 27, 2021
1 parent 49bd61b commit 3adaf4c
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 79 deletions.
97 changes: 20 additions & 77 deletions pretraining_example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 12,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -12,7 +12,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -43,7 +43,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -54,17 +54,9 @@
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading file...\n"
]
}
],
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"out.parent.mkdir(parents=True, exist_ok=True)\n",
"if out.exists():\n",
Expand All @@ -83,7 +75,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -108,31 +100,9 @@
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"39 73\n",
" State-gov 9\n",
" Bachelors 16\n",
" 13 16\n",
" Never-married 7\n",
" Adm-clerical 15\n",
" Not-in-family 6\n",
" White 5\n",
" Male 2\n",
" 2174 119\n",
" 0 92\n",
" 40 94\n",
" United-States 42\n",
" <=50K 2\n",
"Set 3\n"
]
}
],
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"nunique = train.nunique()\n",
"types = train.dtypes\n",
Expand Down Expand Up @@ -160,7 +130,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -175,7 +145,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -198,7 +168,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -207,18 +177,9 @@
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/work/pytorch_tabnet/abstract_model.py:75: UserWarning: Device used : cpu\n",
" warnings.warn(f\"Device used : {self.device}\")\n"
]
}
],
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# TabNetPretrainer\n",
"unsupervised_model = TabNetPretrainer(\n",
Expand All @@ -242,38 +203,20 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"max_epochs = 2 if not os.getenv(\"CI\", False) else 2"
"max_epochs = 1000 if not os.getenv(\"CI\", False) else 2"
]
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": null,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"epoch 0 | loss: 6.48655 | val_0_unsup_loss_numpy: 2.0507700443267822| 0:00:07s\n",
"epoch 1 | loss: 1.61586 | val_0_unsup_loss_numpy: 1.2413300275802612| 0:00:15s\n",
"Stop training because you reached max_epochs = 2 with best_epoch = 1 and best_val_0_unsup_loss_numpy = 1.2413300275802612\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/work/pytorch_tabnet/callbacks.py:172: UserWarning: Best weights from best epoch are automatically used!\n",
" warnings.warn(wrn_msg)\n"
]
}
],
"outputs": [],
"source": [
"unsupervised_model.fit(\n",
" X_train=X_train,\n",
Expand Down
3 changes: 1 addition & 2 deletions pytorch_tabnet/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def UnsupervisedLoss(y_pred, embedded_x, obf_vars, eps=1e-9):
def UnsupervisedLossNumpy(y_pred, embedded_x, obf_vars, eps=1e-9):
errors = y_pred - embedded_x
reconstruction_errors = np.multiply(errors, obf_vars) ** 2
batch_stds = np.std(embedded_x, axis=0) ** 2 + eps
batch_stds = np.std(embedded_x, axis=0, ddof=1) ** 2 + eps
features_loss = np.matmul(reconstruction_errors, 1 / batch_stds)
# compute the number of obfuscated variables to reconstruct
nb_reconstructed_variables = np.sum(obf_vars, axis=1)
Expand All @@ -64,7 +64,6 @@ def UnsupervisedLossNumpy(y_pred, embedded_x, obf_vars, eps=1e-9):
return loss



@dataclass
class UnsupMetricContainer:
"""Container holding a list of metrics.
Expand Down
25 changes: 25 additions & 0 deletions tests/unsupervised_loss.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import numpy as np
import torch
from pytorch_tabnet.metrics import UnsupervisedLoss, UnsupervisedLossNumpy

torch.set_printoptions(precision=10)


def test_equal_losses():
y_pred = np.random.uniform(low=-2, high=2, size=(20, 100))
embedded_x = np.random.uniform(low=-2, high=2, size=(20, 100))
obf_vars = np.random.choice([0, 1], size=(20, 100), replace=True)

numpy_loss = UnsupervisedLossNumpy(
y_pred=y_pred,
embedded_x=embedded_x,
obf_vars=obf_vars
)

torch_loss = UnsupervisedLoss(
y_pred=torch.tensor(y_pred, dtype=torch.float64),
embedded_x=torch.tensor(embedded_x, dtype=torch.float64),
obf_vars=torch.tensor(obf_vars, dtype=torch.float64)
)

assert np.isclose(numpy_loss, torch_loss.detach().numpy())

0 comments on commit 3adaf4c

Please sign in to comment.