Skip to content

Commit

Permalink
Rewind smallPos in MultinomialSampler when every bigPos removed.
Browse files Browse the repository at this point in the history
* Also refine unittest to multiple iteration to prevent luckily random number.
* Remove unused unittest before.
  • Loading branch information
reyoung committed Sep 22, 2016
1 parent 02c6cdf commit f63e641
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 59 deletions.
6 changes: 3 additions & 3 deletions paddle/gserver/layers/MultinomialSampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ MultinomialSampler::MultinomialSampler(const real* prob, int size)
}

if (intervals_[bigPos].thresh < 1) {
bigPos = nextBigPos(0);
} else { // the big interval becomes a small interval.
smallPos = nextSmallPos(0);
// the big interval becomes a small interval.
bigPos = nextBigPos(bigPos + 1);
}
smallPos = nextSmallPos(0);
}

// Handle the inaccuracy caused by finite-precision arithmetic which
Expand Down
87 changes: 31 additions & 56 deletions paddle/gserver/tests/test_MultinomialSampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ limitations under the License. */

#include <gtest/gtest.h>
#include <vector>
#include <stdlib.h>

#undef PADDLE_DISABLE_TIMER
#include "paddle/utils/Stat.h"
Expand All @@ -43,69 +42,45 @@ TEST(MultinomialSampler, gen) {
int numGrids = 1024 * 1024;
int size = 1024 * 4;
default_random_engine reng;
uniform_int_distribution<int> rand(1, numGrids / size * 1.8);
vector<real> prob;
int sum = 0;
for (int i = 0; i < size; ++i) {
prob.push_back(rand(reng));
sum += prob.back();
}

CHECK_LE(sum, numGrids);
prob.back() += numGrids - sum;
for (size_t iter=0; iter < 256; ++iter) {
uniform_int_distribution<int> rand(1, numGrids / size * 1.8);
vector<real> prob;
int sum = 0;
for (int i = 0; i < size; ++i) {
prob.push_back(rand(reng));
sum += prob.back();
}

vector<int> counts(size);
MultinomialSamplerTester sampler(&prob[0], size);
counts.assign(size, 0);
{
double s = (double)size / (double)numGrids;
REGISTER_TIMER("MultinomialSampler");
for (double i = 0; i < numGrids; ++i) {
int ret = sampler.testGen([i, s]() { return s * i; });
if (ret < 0 || ret >= size) {
EXPECT_GE(ret, 0);
EXPECT_LT(ret, size);
break;
CHECK_LE(sum, numGrids);
prob.back() += numGrids - sum;

vector<int> counts(size);
MultinomialSamplerTester sampler(&prob[0], size);
counts.assign(size, 0);
{
double s = (double)size / (double)numGrids;
REGISTER_TIMER("MultinomialSampler");
for (double i = 0; i < numGrids; ++i) {
int ret = sampler.testGen([i, s]() { return s * i; });
if (ret < 0 || ret >= size) {
EXPECT_GE(ret, 0);
EXPECT_LT(ret, size);
break;
}
++counts[ret];
}
++counts[ret];
}
}
for (int i = 0; i < size; ++i) {
if (prob[i] != counts[i]) {
EXPECT_EQ(prob[i], counts[i]);
break;
for (int i = 0; i < size; ++i) {
if (prob[i] != counts[i]) {
EXPECT_EQ(prob[i], counts[i]);
LOG(INFO) << iter;
break;
}
}
}
}


TEST(MultinomialSampler, larger_then_1) {
std::vector<int> probs = { 1, 100, 100, 1, 1};
std::vector<real> fProbs;
std::transform(probs.begin(), probs.end(),
std::back_insert_iterator<std::vector<real>>(fProbs),
[](int a){
return (real)a;
});

MultinomialSamplerTester sampler(fProbs.data(), probs.size());

int sum = std::accumulate(probs.begin(), probs.end(), 0);

std::vector<int> cnt(probs.size(), 0);


double divides = (double)probs.size() / (double) sum;

for (int i = 0; i < sum; ++i) {
++cnt[sampler.testGen([&] {return i * divides;})];
}

for (size_t i=0; i < probs.size(); ++i) {
CHECK_LE(std::abs(cnt[i] - probs[i]), 1);
}
}

void benchmarkRandom() {
int n = 1024 * 1024;

Expand Down

0 comments on commit f63e641

Please sign in to comment.