Skip to content
This repository has been archived by the owner on Dec 3, 2019. It is now read-only.

Commit

Permalink
Implement Earth Mover's Distance in Catapult
Browse files Browse the repository at this point in the history
The Earth Mover's distance is a measure of the distance between
two distributions. It has many applications,
for example, it's frequently used as a measure of similarity
between two images by comparing their color histograms.

Bug: chromium:985773
Change-Id: I5e1c38a321115772955c4409c507736fe5a51d3b
Reviewed-on: https://chromium-review.googlesource.com/c/catapult/+/1709719
Reviewed-by: Ben Hayden <[email protected]>
Reviewed-by: Nicolás Peña Moreno <[email protected]>
Reviewed-by: Deepanjan Roy <[email protected]>
Commit-Queue: Rasha Nasri <[email protected]>
  • Loading branch information
Rasha-Nasri authored and Commit Bot committed Jul 19, 2019
1 parent 58751aa commit 53913ce
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 0 deletions.
61 changes: 61 additions & 0 deletions tracing/tracing/base/math/earth_movers_distance.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
<!DOCTYPE html>
<!--
Copyright (c) 2019 The Chromium Authors. All rights reserved.
Use of this source code is governed by a BSD-style license that can be
found in the LICENSE file.
-->
<script>
'use strict';

tr.exportTo('tr.b.math', function() {
/**
* Earth Mover's distance (EMD), also known as the Wasserstein distance,
* is used to compare distributions.
* The Earth Mover's Distance between two distributions is proportional
* to the minimum amount of work required to change one distribution into
* the other.
* One unit of work in this case is equal to the amount of work necessary
* to move one unit of 'dirt' by one unit of distance.
* For one-dimensional distributions, when the two distributions are
* non-negative and are of the same size (contain same amount of dirt),
* there exists a closed form formula for the EMD between these two
* distributions. The EMD in that case is equal to the area between the
* CDFs of the two distributions.
*
* http://infolab.stanford.edu/pub/cstr/reports/cs/tr/99/1620/CS-TR-99-1620.ch4.pdf
*
* This function takes in two histograms as input, represented as an array
* of numbers where the n-th element is the weight of the n-th bin.
* It throws an error if the two input histograms don't have same number
* of bins.
*
* @param {number[]} firstHistogram histogram of the first distribution.
* @param {number[]} secondHistogram histogram of the second distribution.
* @return {earthMoversDistance}
*
*/
function earthMoversDistance(firstHistogram, secondHistogram) {
const buckets = firstHistogram.length;
if (secondHistogram.length !== buckets) {
throw new Error('Histograms have a different number of bins.');
}

const arrSum = arr => arr.reduce((a, b) => a + b, 0);
if (arrSum(firstHistogram) !== arrSum(secondHistogram)) {
throw new Error('The histograms\' sizes don\'t match.');
}

let total = 0;
let remainder = 0;
for (let bucket = 0; bucket < buckets; bucket++) {
remainder += secondHistogram[bucket] -
firstHistogram[bucket];
total += Math.abs(remainder);
}
return total;
}
return {
earthMoversDistance,
};
});
</script>
50 changes: 50 additions & 0 deletions tracing/tracing/base/math/earth_movers_distance_test.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
<!DOCTYPE html>
<!--
Copyright (c) 2019 The Chromium Authors. All rights reserved.
Use of this source code is governed by a BSD-style license that can be
found in the LICENSE file.
-->
<link rel="import" href="/tracing/base/math/earth_movers_distance.html">

<script>
'use strict';

tr.b.unittest.testSuite(function() {
test('differentBinsNumberHistograms', function() {
const firstHistogram = [1, 2, 3];
const secondHistogram = [3, 1, 1, 3];
assert.throws(() =>
tr.b.math.earthMoversDistance(firstHistogram, secondHistogram),
'Histograms have a different number of bins.');
});

test('differentSizeHistograms', function() {
const firstHistogram = [1, 2, 3, 1];
const secondHistogram = [3, 1, 1, 3];
assert.throws(() =>
tr.b.math.earthMoversDistance(firstHistogram, secondHistogram),
'The histograms\' sizes don\'t match.');
});

test('emptyHistograms', function() {
const firstHistogram = [];
const secondHistogram = [];
assert.strictEqual(0,
tr.b.math.earthMoversDistance(firstHistogram, secondHistogram));
});

test('sameHistograms', function() {
const firstHistogram = [3, 1, 1, 3];
const secondHistogram = [3, 1, 1, 3];
assert.strictEqual(0,
tr.b.math.earthMoversDistance(firstHistogram, secondHistogram));
});

test('generalCaseHistograms', function() {
const firstHistogram = [1, 2, 3, 2];
const secondHistogram = [3, 1, 1, 3];
assert.strictEqual(4,
tr.b.math.earthMoversDistance(firstHistogram, secondHistogram));
});
});
</script>

0 comments on commit 53913ce

Please sign in to comment.