-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactor imports and enhance result accumulation logic
- Updated import paths for `cos_sim` to use the new `utils` directory across multiple files, ensuring consistency and better organization. - Renamed `limit` to `first_n` in the `Collection` class to clarify its purpose in filtering results. - Removed deprecated `top_acc.js` file and introduced new utility functions for result accumulation in `results_acc.js`, improving the handling of top-k results. - Added integration tests for the new result accumulation functions to ensure their correctness and reliability. - Enhanced sorting logic in `DefaultEntitiesVectorAdapter` to maintain order when returning results. These changes improve code clarity, maintainability, and testing coverage.
- Loading branch information
Brian Joseph Petro
committed
Dec 23, 2024
1 parent
5e60e16
commit 7dee6f9
Showing
12 changed files
with
251 additions
and
59 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
/** | ||
* Accumulate top-k (highest score) results in _acc.results. | ||
* @param {Object} _acc | ||
* @param {Set} _acc.results - The set of accumulated results so far. | ||
* @param {number} _acc.min - The currently known minimum score in the set. | ||
* @param {Object} _acc.minResult - The result object with the min score. | ||
* @param {Object} result - { item: <item>, score: <number> }. | ||
* @param {number} [ct=10] - The maximum number of results to keep. | ||
* | ||
* NOTE: Caller should initialize _acc as: | ||
* { results: new Set(), min: Number.POSITIVE_INFINITY, minResult: null } | ||
*/ | ||
export function results_acc(_acc, result, ct = 10) { | ||
// If under capacity, just add: | ||
if (_acc.results.size < ct) { | ||
_acc.results.add(result); | ||
|
||
// Once we reach capacity, figure out the min so we know the threshold | ||
if (_acc.results.size === ct && _acc.min === Number.POSITIVE_INFINITY) { | ||
let { minScore, minObj } = find_min(_acc.results); | ||
_acc.min = minScore; | ||
_acc.minResult = minObj; | ||
} | ||
} | ||
// If already at capacity, only add if score is bigger than the known min | ||
else if (result.score > _acc.min) { | ||
_acc.results.add(result); | ||
// Remove the old min | ||
_acc.results.delete(_acc.minResult); | ||
|
||
// Recalculate the new min in the set | ||
let { minScore, minObj } = find_min(_acc.results); | ||
_acc.min = minScore; | ||
_acc.minResult = minObj; | ||
} | ||
} | ||
|
||
/** | ||
* Accumulate top-k (lowest score) results in _acc.results. | ||
* @param {Object} _acc | ||
* @param {Set} _acc.results - The set of accumulated results so far. | ||
* @param {number} _acc.max - The currently known maximum score in the set. | ||
* @param {Object} _acc.maxResult - The result object with the max score. | ||
* @param {Object} result - { item: <item>, score: <number> }. | ||
* @param {number} [ct=10] - The maximum number of results to keep. | ||
* | ||
* NOTE: Caller should initialize _acc as: | ||
* { results: new Set(), max: Number.NEGATIVE_INFINITY, maxResult: null } | ||
*/ | ||
export function furthest_acc(_acc, result, ct = 10) { | ||
// If under capacity, just add: | ||
if (_acc.results.size < ct) { | ||
_acc.results.add(result); | ||
|
||
// Once we reach capacity, figure out the max so we know the threshold | ||
if (_acc.results.size === ct && _acc.max === Number.NEGATIVE_INFINITY) { | ||
let { maxScore, maxObj } = find_max(_acc.results); | ||
_acc.max = maxScore; | ||
_acc.maxResult = maxObj; | ||
} | ||
} | ||
// If at capacity, only add if score is smaller than the known max | ||
else if (result.score < _acc.max) { | ||
_acc.results.add(result); | ||
// Remove the old max | ||
_acc.results.delete(_acc.maxResult); | ||
|
||
// Recalculate the new max in the set | ||
let { maxScore, maxObj } = find_max(_acc.results); | ||
_acc.max = maxScore; | ||
_acc.maxResult = maxObj; | ||
} | ||
} | ||
|
||
/** | ||
* Helper to find the item with the smallest .score in a set of results | ||
* @param {Set} results - A set of objects like { item, score } | ||
* @returns {{ minScore: number, minObj: object }} | ||
*/ | ||
function find_min(results) { | ||
let minScore = Number.POSITIVE_INFINITY; | ||
let minObj = null; | ||
for (const obj of results) { | ||
if (obj.score < minScore) { | ||
minScore = obj.score; | ||
minObj = obj; | ||
} | ||
} | ||
return { minScore, minObj }; | ||
} | ||
|
||
/** | ||
* Helper to find the item with the largest .score in a set of results | ||
* @param {Set} results - A set of objects like { item, score } | ||
* @returns {{ maxScore: number, maxObj: object }} | ||
*/ | ||
function find_max(results) { | ||
let maxScore = Number.NEGATIVE_INFINITY; | ||
let maxObj = null; | ||
for (const obj of results) { | ||
if (obj.score > maxScore) { | ||
maxScore = obj.score; | ||
maxObj = obj; | ||
} | ||
} | ||
return { maxScore, maxObj }; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
/** | ||
* @file test_acc.js | ||
* @description Integration-level tests for results_acc and furthest_acc using AVA. | ||
* | ||
* To run: | ||
* npx ava test_acc.js | ||
*/ | ||
|
||
import test from 'ava'; | ||
import { results_acc, furthest_acc } from './results_acc.js'; | ||
|
||
/** | ||
* Builds a fresh accumulator object. | ||
* @returns {{results: Set<any>, min: number, minResult: any, max: number, maxResult: any}} | ||
*/ | ||
function build_acc() { | ||
return { | ||
results: new Set(), | ||
min: Infinity, | ||
minResult: null, | ||
max: -Infinity, | ||
maxResult: null | ||
}; | ||
} | ||
|
||
test('results_acc: adds items when below capacity', t => { | ||
const acc = build_acc(); | ||
const ct = 3; | ||
|
||
results_acc(acc, { score: 10 }, ct); | ||
results_acc(acc, { score: 5 }, ct); | ||
|
||
t.is(acc.results.size, 2, 'Should add all items when below capacity'); | ||
t.deepEqual([...acc.results], [{ score: 10 }, { score: 5 }]); | ||
}); | ||
|
||
test('results_acc: does not add new item if its score <= current min when at capacity', t => { | ||
const acc = build_acc(); | ||
const ct = 3; | ||
|
||
// Fill to capacity | ||
results_acc(acc, { score: 10 }, ct); | ||
results_acc(acc, { score: 8 }, ct); | ||
results_acc(acc, { score: 6 }, ct); | ||
|
||
// Make sure min and minResult are correct | ||
t.is(acc.results.size, 3); | ||
t.not(acc.min, Infinity); | ||
t.not(acc.minResult, null); | ||
|
||
// Attempt to add item with score <= min | ||
const oldMin = acc.min; | ||
results_acc(acc, { score: oldMin }, ct); | ||
|
||
t.is(acc.results.size, 3, 'Should not add an item with score equal to the current min'); | ||
}); | ||
|
||
test('results_acc: replaces min item if new score is larger than current min when at capacity', t => { | ||
const acc = build_acc(); | ||
const ct = 3; | ||
|
||
// Fill to capacity | ||
results_acc(acc, { score: 10 }, ct); | ||
results_acc(acc, { score: 8 }, ct); | ||
results_acc(acc, { score: 6 }, ct); | ||
|
||
t.is(acc.results.size, 3); | ||
|
||
// The current min is 6 | ||
// Add a new item with a bigger score than 6 | ||
results_acc(acc, { score: 9 }, ct); | ||
|
||
// Ensure we still have exactly 3 results | ||
t.is(acc.results.size, 3, 'Should remain at capacity'); | ||
// The new min should not be the old min (6) | ||
t.false([...acc.results].some((item) => item.score === 6), 'Should have removed the old min item'); | ||
}); | ||
|
||
test('furthest_acc: adds items when below capacity', t => { | ||
const acc = build_acc(); | ||
const ct = 3; | ||
|
||
furthest_acc(acc, { score: 10 }, ct); | ||
furthest_acc(acc, { score: 20 }, ct); | ||
|
||
t.is(acc.results.size, 2, 'Should add all items when below capacity'); | ||
t.deepEqual([...acc.results], [{ score: 10 }, { score: 20 }]); | ||
}); | ||
|
||
test('furthest_acc: does not add new item if its score >= current max when at capacity', t => { | ||
const acc = build_acc(); | ||
const ct = 3; | ||
|
||
// Fill to capacity | ||
furthest_acc(acc, { score: 5 }, ct); | ||
furthest_acc(acc, { score: 10 }, ct); | ||
furthest_acc(acc, { score: 15 }, ct); | ||
|
||
// Attempt to add item with score >= max | ||
const oldMax = acc.max; | ||
furthest_acc(acc, { score: oldMax }, ct); | ||
|
||
t.is(acc.results.size, 3, 'Should not add an item when new score >= current max'); | ||
}); | ||
|
||
test('furthest_acc: replaces max item if new score is smaller than current max when at capacity', t => { | ||
const acc = build_acc(); | ||
const ct = 3; | ||
|
||
// Fill to capacity | ||
furthest_acc(acc, { score: 5 }, ct); | ||
furthest_acc(acc, { score: 10 }, ct); | ||
furthest_acc(acc, { score: 15 }, ct); | ||
|
||
t.is(acc.results.size, 3); | ||
|
||
// The current max is 15 | ||
// Add a new item with a smaller score | ||
furthest_acc(acc, { score: 7 }, ct); | ||
|
||
// Ensure we still have exactly 3 results | ||
t.is(acc.results.size, 3, 'Should remain at capacity'); | ||
// The old max (15) should be removed | ||
t.false([...acc.results].some((item) => item.score === 15), 'Should have removed the old max item'); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.