Skip to content

Commit

Permalink
Add an epsilon to avoid numerical instability in PCA whiteneing
Browse files Browse the repository at this point in the history
Summary:
PCA whitening implies to multiply eigenvectors with 1/sqrt(singular values of convariance matrix). The singular values are sometimes 0 (because the vector subspace is not full-rank) or negative (because of numerical issues).
Therefore, this diff adds an epsilon to the denominator above (default 0).

Reviewed By: edpizzi

Differential Revision: D31725075

fbshipit-source-id: dae68bda9f7452220785d76e30ce4b2ac8582413
  • Loading branch information
mdouze authored and facebook-github-bot committed Oct 19, 2021
1 parent 26abede commit b598f55
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 7 deletions.
3 changes: 2 additions & 1 deletion faiss/VectorTransform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,7 @@ PCAMatrix::PCAMatrix(
is_trained = false;
max_points_per_d = 1000;
balanced_bins = 0;
epsilon = 0;
}

namespace {
Expand Down Expand Up @@ -620,7 +621,7 @@ void PCAMatrix::prepare_Ab() {
if (eigen_power != 0) {
float* ai = A.data();
for (int i = 0; i < d_out; i++) {
float factor = pow(eigenvalues[i], eigen_power);
float factor = pow(eigenvalues[i] + epsilon, eigen_power);
for (int j = 0; j < d_in; j++)
*ai++ *= factor;
}
Expand Down
3 changes: 3 additions & 0 deletions faiss/VectorTransform.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ struct PCAMatrix : LinearTransform {
*/
float eigen_power;

/// value added to eigenvalues to avoid division by 0 when whitening
float epsilon;

/// random rotation after PCA
bool random_rotation;

Expand Down
12 changes: 9 additions & 3 deletions faiss/impl/index_read.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,16 +78,22 @@ VectorTransform* read_VectorTransform(IOReader* f) {
VectorTransform* vt = nullptr;

if (h == fourcc("rrot") || h == fourcc("PCAm") || h == fourcc("LTra") ||
h == fourcc("PcAm") || h == fourcc("Viqm")) {
h == fourcc("PcAm") || h == fourcc("Viqm") || h == fourcc("Pcam")) {
LinearTransform* lt = nullptr;
if (h == fourcc("rrot")) {
lt = new RandomRotationMatrix();
} else if (h == fourcc("PCAm") || h == fourcc("PcAm")) {
} else if (
h == fourcc("PCAm") || h == fourcc("PcAm") ||
h == fourcc("Pcam")) {
PCAMatrix* pca = new PCAMatrix();
READ1(pca->eigen_power);
if (h == fourcc("Pcam")) {
READ1(pca->epsilon);
}
READ1(pca->random_rotation);
if (h == fourcc("PcAm"))
if (h != fourcc("PCAm")) {
READ1(pca->balanced_bins);
}
READVECTOR(pca->mean);
READVECTOR(pca->eigenvalues);
READVECTOR(pca->PCAMat);
Expand Down
3 changes: 2 additions & 1 deletion faiss/impl/index_write.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,10 @@ void write_VectorTransform(const VectorTransform* vt, IOWriter* f) {
uint32_t h = fourcc("rrot");
WRITE1(h);
} else if (const PCAMatrix* pca = dynamic_cast<const PCAMatrix*>(lt)) {
uint32_t h = fourcc("PcAm");
uint32_t h = fourcc("Pcam");
WRITE1(h);
WRITE1(pca->eigen_power);
WRITE1(pca->epsilon);
WRITE1(pca->random_rotation);
WRITE1(pca->balanced_bins);
WRITEVECTOR(pca->mean);
Expand Down
33 changes: 31 additions & 2 deletions tests/test_build_blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,10 @@

import faiss
import unittest
import array

from common_faiss_tests import get_dataset_2



class TestPCA(unittest.TestCase):

def test_pca(self):
Expand All @@ -35,6 +33,37 @@ def test_pca(self):
self.assertGreater(prev, o)
prev = o

def test_pca_epsilon(self):
d = 64
n = 1000
np.random.seed(123)
x = np.random.random(size=(n, d)).astype('float32')

# make sure data is in a sub-space
x[:, ::2] = 0

# check division by 0 with default computation
pca = faiss.PCAMatrix(d, 60, -0.5)
pca.train(x)
y = pca.apply(x)
self.assertFalse(np.all(np.isfinite(y)))

# check add epsilon
pca = faiss.PCAMatrix(d, 60, -0.5)
pca.epsilon = 1e-5
pca.train(x)
y = pca.apply(x)
self.assertTrue(np.all(np.isfinite(y)))

# check I/O
index = faiss.index_factory(d, "PCAW60,Flat")
index = faiss.deserialize_index(faiss.serialize_index(index))
pca1 = faiss.downcast_VectorTransform(index.chain.at(0))
pca1.epsilon = 1e-5
index.train(x)
pca = faiss.downcast_VectorTransform(index.chain.at(0))
y = pca.apply(x)
self.assertTrue(np.all(np.isfinite(y)))


class TestRevSwigPtr(unittest.TestCase):
Expand Down

0 comments on commit b598f55

Please sign in to comment.