Skip to content

Commit

Permalink
Refactor imports and enhance result accumulation logic
Browse files Browse the repository at this point in the history
- Updated import paths for `cos_sim` to use the new `utils` directory across multiple files, ensuring consistency and better organization.
- Renamed `limit` to `first_n` in the `Collection` class to clarify its purpose in filtering results.
- Removed deprecated `top_acc.js` file and introduced new utility functions for result accumulation in `results_acc.js`, improving the handling of top-k results.
- Added integration tests for the new result accumulation functions to ensure their correctness and reliability.
- Enhanced sorting logic in `DefaultEntitiesVectorAdapter` to maintain order when returning results.

These changes improve code clarity, maintainability, and testing coverage.
  • Loading branch information
Brian Joseph Petro committed Dec 23, 2024
1 parent 5e60e16 commit 7dee6f9
Show file tree
Hide file tree
Showing 12 changed files with 251 additions and 59 deletions.
2 changes: 1 addition & 1 deletion smart-clusters/smart_cluster.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { SmartGroup } from "smart-groups";
import { cos_sim } from "../smart-entities/cos_sim.js";
import { cos_sim } from "../smart-entities/utils/cos_sim.js";

export class SmartCluster extends SmartGroup {
static get defaults() {
Expand Down
2 changes: 1 addition & 1 deletion smart-clusters/utils/cluster_sources.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
* repeated distanceFn calls for the same item pairs.
*/

import { cos_sim } from 'smart-entities/cos_sim.js';
import { cos_sim } from 'smart-entities/utils/cos_sim.js';
import { shuffle_array } from './shuffle_array.js';

/**
Expand Down
4 changes: 2 additions & 2 deletions smart-collections/collection.js
Original file line number Diff line number Diff line change
Expand Up @@ -148,10 +148,10 @@ export class Collection {
this.filter_opts = this.prepare_filter(filter_opts);

const results = [];
const { limit } = this.filter_opts;
const { first_n } = this.filter_opts;

for (const item of Object.values(this.items)) {
if (limit && results.length >= limit) break;
if (first_n && results.length >= first_n) break;
if (item.filter(filter_opts)) results.push(item);
}
return results;
Expand Down
10 changes: 5 additions & 5 deletions smart-entities/adapters/default.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
*/

import { EntitiesVectorAdapter, EntityVectorAdapter } from "./_adapter.js";
import { cos_sim } from "../cos_sim.js";
import { results_acc, furthest_acc } from "../top_acc.js";

import { cos_sim } from "../utils/cos_sim.js";
import { results_acc, furthest_acc } from "../utils/results_acc.js";
import { sort_by_score_ascending, sort_by_score_descending } from "../utils/sort_by_score.js";
/**
* @class DefaultEntitiesVectorAdapter
* @extends EntitiesVectorAdapter
Expand Down Expand Up @@ -43,7 +43,7 @@ export class DefaultEntitiesVectorAdapter extends EntitiesVectorAdapter {
results_acc(acc, result, limit); // update acc
return acc;
}, { min: 0, results: new Set() });
return Array.from(nearest.results);
return Array.from(nearest.results).sort(sort_by_score_descending);
}

/**
Expand All @@ -67,7 +67,7 @@ export class DefaultEntitiesVectorAdapter extends EntitiesVectorAdapter {
furthest_acc(acc, result, limit); // update acc
return acc;
}, { max: 0, results: new Set() });
return Array.from(furthest.results);
return Array.from(furthest.results).sort(sort_by_score_ascending);
}

/**
Expand Down
2 changes: 1 addition & 1 deletion smart-entities/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

import { SmartEntity } from "./smart_entity.js";
import { SmartEntities } from "./smart_entities.js";
import { cos_sim } from "./cos_sim.js";
import { cos_sim } from "./utils/cos_sim.js";

export {
SmartEntity,
Expand Down
18 changes: 4 additions & 14 deletions smart-entities/smart_entities.js
Original file line number Diff line number Diff line change
Expand Up @@ -268,15 +268,14 @@ export class SmartEntities extends Collection {
...(params.filter || {}),
};
const results = await hyp_vecs.reduce(async (acc_promise, embedding, i) => {
const acc = await acc_promise;
const results = await this.nearest(embedding.vec, filter);
results.forEach(result => {
const acc = await acc_promise;
const results = await this.nearest(embedding.vec, filter);
results.forEach(result => {
if (!acc[result.item.path] || result.score > acc[result.item.path].score) {
acc[result.item.path] = {
key: result.item.key,
score: result.score,
item: result.item,
entity: result.item, // DEPRECATED: use item instead
hypothetical_i: i,
};
} else {
Expand All @@ -287,6 +286,7 @@ export class SmartEntities extends Collection {
return acc;
}, Promise.resolve({}));

console.log(results);
const top_k = Object.values(results)
.sort(sort_by_score)
.slice(0, limit)
Expand Down Expand Up @@ -355,16 +355,6 @@ export class SmartEntities extends Collection {
await this.process_load_queue();
}

async render_lookup(container, opts={}) {
if(container) container.innerHTML = 'Loading lookup...';
const frag = await this.env.render_component('lookup', this, opts);
if(container) {
container.innerHTML = '';
container.appendChild(frag);
}
return frag;
}

get connections_filter_config() { return connections_filter_config; }

}
Expand Down
32 changes: 0 additions & 32 deletions smart-entities/top_acc.js

This file was deleted.

File renamed without changes.
107 changes: 107 additions & 0 deletions smart-entities/utils/results_acc.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/**
* Accumulate top-k (highest score) results in _acc.results.
* @param {Object} _acc
* @param {Set} _acc.results - The set of accumulated results so far.
* @param {number} _acc.min - The currently known minimum score in the set.
* @param {Object} _acc.minResult - The result object with the min score.
* @param {Object} result - { item: <item>, score: <number> }.
* @param {number} [ct=10] - The maximum number of results to keep.
*
* NOTE: Caller should initialize _acc as:
* { results: new Set(), min: Number.POSITIVE_INFINITY, minResult: null }
*/
export function results_acc(_acc, result, ct = 10) {
// If under capacity, just add:
if (_acc.results.size < ct) {
_acc.results.add(result);

// Once we reach capacity, figure out the min so we know the threshold
if (_acc.results.size === ct && _acc.min === Number.POSITIVE_INFINITY) {
let { minScore, minObj } = find_min(_acc.results);
_acc.min = minScore;
_acc.minResult = minObj;
}
}
// If already at capacity, only add if score is bigger than the known min
else if (result.score > _acc.min) {
_acc.results.add(result);
// Remove the old min
_acc.results.delete(_acc.minResult);

// Recalculate the new min in the set
let { minScore, minObj } = find_min(_acc.results);
_acc.min = minScore;
_acc.minResult = minObj;
}
}

/**
* Accumulate top-k (lowest score) results in _acc.results.
* @param {Object} _acc
* @param {Set} _acc.results - The set of accumulated results so far.
* @param {number} _acc.max - The currently known maximum score in the set.
* @param {Object} _acc.maxResult - The result object with the max score.
* @param {Object} result - { item: <item>, score: <number> }.
* @param {number} [ct=10] - The maximum number of results to keep.
*
* NOTE: Caller should initialize _acc as:
* { results: new Set(), max: Number.NEGATIVE_INFINITY, maxResult: null }
*/
export function furthest_acc(_acc, result, ct = 10) {
// If under capacity, just add:
if (_acc.results.size < ct) {
_acc.results.add(result);

// Once we reach capacity, figure out the max so we know the threshold
if (_acc.results.size === ct && _acc.max === Number.NEGATIVE_INFINITY) {
let { maxScore, maxObj } = find_max(_acc.results);
_acc.max = maxScore;
_acc.maxResult = maxObj;
}
}
// If at capacity, only add if score is smaller than the known max
else if (result.score < _acc.max) {
_acc.results.add(result);
// Remove the old max
_acc.results.delete(_acc.maxResult);

// Recalculate the new max in the set
let { maxScore, maxObj } = find_max(_acc.results);
_acc.max = maxScore;
_acc.maxResult = maxObj;
}
}

/**
* Helper to find the item with the smallest .score in a set of results
* @param {Set} results - A set of objects like { item, score }
* @returns {{ minScore: number, minObj: object }}
*/
function find_min(results) {
let minScore = Number.POSITIVE_INFINITY;
let minObj = null;
for (const obj of results) {
if (obj.score < minScore) {
minScore = obj.score;
minObj = obj;
}
}
return { minScore, minObj };
}

/**
* Helper to find the item with the largest .score in a set of results
* @param {Set} results - A set of objects like { item, score }
* @returns {{ maxScore: number, maxObj: object }}
*/
function find_max(results) {
let maxScore = Number.NEGATIVE_INFINITY;
let maxObj = null;
for (const obj of results) {
if (obj.score > maxScore) {
maxScore = obj.score;
maxObj = obj;
}
}
return { maxScore, maxObj };
}
125 changes: 125 additions & 0 deletions smart-entities/utils/results_acc.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/**
* @file test_acc.js
* @description Integration-level tests for results_acc and furthest_acc using AVA.
*
* To run:
* npx ava test_acc.js
*/

import test from 'ava';
import { results_acc, furthest_acc } from './results_acc.js';

/**
* Builds a fresh accumulator object.
* @returns {{results: Set<any>, min: number, minResult: any, max: number, maxResult: any}}
*/
function build_acc() {
return {
results: new Set(),
min: Infinity,
minResult: null,
max: -Infinity,
maxResult: null
};
}

test('results_acc: adds items when below capacity', t => {
const acc = build_acc();
const ct = 3;

results_acc(acc, { score: 10 }, ct);
results_acc(acc, { score: 5 }, ct);

t.is(acc.results.size, 2, 'Should add all items when below capacity');
t.deepEqual([...acc.results], [{ score: 10 }, { score: 5 }]);
});

test('results_acc: does not add new item if its score <= current min when at capacity', t => {
const acc = build_acc();
const ct = 3;

// Fill to capacity
results_acc(acc, { score: 10 }, ct);
results_acc(acc, { score: 8 }, ct);
results_acc(acc, { score: 6 }, ct);

// Make sure min and minResult are correct
t.is(acc.results.size, 3);
t.not(acc.min, Infinity);
t.not(acc.minResult, null);

// Attempt to add item with score <= min
const oldMin = acc.min;
results_acc(acc, { score: oldMin }, ct);

t.is(acc.results.size, 3, 'Should not add an item with score equal to the current min');
});

test('results_acc: replaces min item if new score is larger than current min when at capacity', t => {
const acc = build_acc();
const ct = 3;

// Fill to capacity
results_acc(acc, { score: 10 }, ct);
results_acc(acc, { score: 8 }, ct);
results_acc(acc, { score: 6 }, ct);

t.is(acc.results.size, 3);

// The current min is 6
// Add a new item with a bigger score than 6
results_acc(acc, { score: 9 }, ct);

// Ensure we still have exactly 3 results
t.is(acc.results.size, 3, 'Should remain at capacity');
// The new min should not be the old min (6)
t.false([...acc.results].some((item) => item.score === 6), 'Should have removed the old min item');
});

test('furthest_acc: adds items when below capacity', t => {
const acc = build_acc();
const ct = 3;

furthest_acc(acc, { score: 10 }, ct);
furthest_acc(acc, { score: 20 }, ct);

t.is(acc.results.size, 2, 'Should add all items when below capacity');
t.deepEqual([...acc.results], [{ score: 10 }, { score: 20 }]);
});

test('furthest_acc: does not add new item if its score >= current max when at capacity', t => {
const acc = build_acc();
const ct = 3;

// Fill to capacity
furthest_acc(acc, { score: 5 }, ct);
furthest_acc(acc, { score: 10 }, ct);
furthest_acc(acc, { score: 15 }, ct);

// Attempt to add item with score >= max
const oldMax = acc.max;
furthest_acc(acc, { score: oldMax }, ct);

t.is(acc.results.size, 3, 'Should not add an item when new score >= current max');
});

test('furthest_acc: replaces max item if new score is smaller than current max when at capacity', t => {
const acc = build_acc();
const ct = 3;

// Fill to capacity
furthest_acc(acc, { score: 5 }, ct);
furthest_acc(acc, { score: 10 }, ct);
furthest_acc(acc, { score: 15 }, ct);

t.is(acc.results.size, 3);

// The current max is 15
// Add a new item with a smaller score
furthest_acc(acc, { score: 7 }, ct);

// Ensure we still have exactly 3 results
t.is(acc.results.size, 3, 'Should remain at capacity');
// The old max (15) should be removed
t.false([...acc.results].some((item) => item.score === 15), 'Should have removed the old max item');
});
4 changes: 2 additions & 2 deletions smart-groups/adapters/vector/median_members.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { DefaultEntitiesVectorAdapter, DefaultEntityVectorAdapter } from "smart-entities/adapters/default.js";
import { sort_by_score_ascending, sort_by_score_descending } from "smart-entities/utils/sort_by_score.js";
import { cos_sim } from "smart-entities/cos_sim.js";
import { results_acc, furthest_acc } from "smart-entities/top_acc.js";
import { cos_sim } from "smart-entities/utils/cos_sim.js";
import { results_acc, furthest_acc } from "smart-entities/utils/results_acc.js";


export class MedianMemberVectorsAdapter extends DefaultEntitiesVectorAdapter {
Expand Down
Loading

0 comments on commit 7dee6f9

Please sign in to comment.