diff --git a/js/json.test.ts b/js/json.test.ts index 244f156..9982ea6 100644 --- a/js/json.test.ts +++ b/js/json.test.ts @@ -1,4 +1,6 @@ import { JSONDiff, ValidJSON } from "./json"; +import { NumericDiff } from "./number"; +import { ExactMatch } from "./value"; test("JSON String Test", async () => { const cases = [ @@ -105,3 +107,56 @@ test("Valid JSON Test", async () => { expect(score).toEqual(expected); } }); + +test("Semantic JSON Test", async () => { + const cases = [ + { a: '{"x": 1, "y": 2}', b: '{"y": 2, "x": 1}', expected: 1 }, + { + a: '{"zs": ["a", "b"], "x": 1, "y": 2}', + b: '{"y": 2, "zs": ["a", "b"], "x": 1}', + expected: 1, + }, + { + a: '{"o1": {"x": 1, "y": 2}}', + b: '{"o1": {"y": 2, "x": 1}}', + expected: 1, + }, + { + a: '{"xs": [{"o1": {"x": 1, "y": [2]}}]}', + b: '{"xs": [{"o1": {"y": [2], "x": 1}}]}', + expected: 1, + }, + { + a: '{"o1": {"x": 2, "y": 2}}', + b: '{"o1": {"y": 2, "x": 1}}', + expected: 0.83333, + }, + { + a: { o1: { x: 2, y: 2 } }, + b: '{"o1": {"y": 2, "x": 1}}', + expected: 0.83333, + }, + { a: '{"x": 1, "y": 2}', b: '{"x": 1, "z": 2}', expected: 0.3333 }, + { a: "[1, 2]", b: "[1, 2]", expected: 1 }, + { a: "[1, 2]", b: "[2, 1]", expected: 0.66667 }, + ]; + + for (const { a, b, expected } of cases) { + for (const exactNumber of [true, false]) { + const score = ( + await JSONDiff({ + output: a, + expected: b, + numberScorer: exactNumber ? ExactMatch : NumericDiff, + }) + ).score; + if (!exactNumber) { + expect(score).toBeCloseTo(expected); + } else { + expect(Math.round((score ?? 0) * 100)).toBeLessThanOrEqual( + Math.round(expected * 100), + ); + } + } + } +}); diff --git a/js/json.ts b/js/json.ts index c669067..1ecf9e2 100644 --- a/js/json.ts +++ b/js/json.ts @@ -10,17 +10,28 @@ import { makePartial, ScorerWithPartial } from "./partial"; */ export const JSONDiff: ScorerWithPartial< any, - { stringScorer?: Scorer; numberScorer?: Scorer } + { + stringScorer?: Scorer; + numberScorer?: Scorer; + preserveStrings?: boolean; + } > = makePartial( async ({ output, expected, stringScorer = LevenshteinScorer, numberScorer = NumericDiff, + preserveStrings = false, }) => { return { name: "JSONDiff", - score: await jsonDiff(output, expected, stringScorer, numberScorer), + score: await jsonDiff( + output, + expected, + stringScorer, + numberScorer, + preserveStrings, + ), }; }, "JSONDiff", @@ -42,9 +53,19 @@ export const ValidJSON: ScorerWithPartial = async function jsonDiff( o1: any, o2: any, - stringScorer: Scorer, - numberScorer: Scorer, + stringScorer: Scorer, + numberScorer: Scorer, + preserveStrings: boolean, ): Promise { + if (!preserveStrings) { + if (typeof o1 === "string" && validJSON(o1) === 1) { + o1 = JSON.parse(o1); + } + if (typeof o2 === "string" && validJSON(o2) === 1) { + o2 = JSON.parse(o2); + } + } + if (isObject(o1) && isObject(o2)) { if (Object.keys(o1).length == 0 && Object.keys(o2).length == 0) { return 1; @@ -58,9 +79,12 @@ async function jsonDiff( ), ); + // eslint-disable-next-line @typescript-eslint/consistent-type-assertions const baseScores = ( await Promise.all( - allKeys.map((k) => jsonDiff(o1[k], o2[k], stringScorer, numberScorer)), + allKeys.map((k) => + jsonDiff(o1[k], o2[k], stringScorer, numberScorer, preserveStrings), + ), ) ).filter((s) => s !== null) as number[]; return baseScores.reduce((acc, s) => acc + s, 0) / baseScores.length; @@ -69,11 +93,14 @@ async function jsonDiff( return 1; } + // eslint-disable-next-line @typescript-eslint/consistent-type-assertions const baseScores = ( await Promise.all( Array.from({ length: Math.min(o1.length, o2.length), - }).map((_, i) => jsonDiff(o1[i], o2[i], stringScorer, numberScorer)), + }).map((_, i) => + jsonDiff(o1[i], o2[i], stringScorer, numberScorer, preserveStrings), + ), ) ).filter((s) => s !== null) as number[]; return ( @@ -134,7 +161,9 @@ function validJSON(output: string, schema?: Schema | JSONSchemaType) { if (isObject(parsed) || isArray(parsed)) { return 1; } - } catch (err) {} + } catch { + // Ignore errors + } return 0; } diff --git a/py/autoevals/json.py b/py/autoevals/json.py index c9ba1ac..9a6abad 100644 --- a/py/autoevals/json.py +++ b/py/autoevals/json.py @@ -15,14 +15,22 @@ class JSONDiff(ScorerWithPartial): (defaults to Levenshtein) and numbers (defaults to NumericDiff). """ - def __init__(self, string_scorer: Scorer = None, number_scorer: Scorer = None): + def __init__(self, string_scorer: Scorer = None, number_scorer: Scorer = None, preserve_strings: bool = False): self.string_scorer = string_scorer or Levenshtein() self.number_scorer = number_scorer or NumericDiff() + self.preserve_strings = preserve_strings + self._valid_json = ValidJSON() def _run_eval_sync(self, output, expected=None, **kwargs): return Score(name=self._name(), score=self.json_diff(output, expected)) def json_diff(self, o1, o2): + if not self.preserve_strings: + if isinstance(o1, str) and self._valid_json.valid_json(o1) == 1: + o1 = json.loads(o1) + if isinstance(o2, str) and self._valid_json.valid_json(o2) == 1: + o2 = json.loads(o2) + if isinstance(o1, dict) and isinstance(o2, dict): if len(o1) == 0 and len(o2) == 0: return 1 diff --git a/py/autoevals/test_json.py b/py/autoevals/test_json.py index c8ae539..9ff0aab 100644 --- a/py/autoevals/test_json.py +++ b/py/autoevals/test_json.py @@ -1,6 +1,8 @@ from pytest import approx from autoevals.json import JSONDiff, ValidJSON +from autoevals.number import NumericDiff +from autoevals.value import ExactMatch def test_string_as_json(): @@ -98,3 +100,46 @@ def test_valid_json(): for output, expected, schema in cases: print(f"[{output}]", expected) assert evaluator(output, schema).score == expected + + +def test_semantic_json(): + cases = [ + ('{"x": 1, "y": 2}', '{"y": 2, "x": 1}', 1), + ( + '{"zs": ["a", "b"], "x": 1, "y": 2}', + '{"y": 2, "zs": ["a", "b"], "x": 1}', + 1, + ), + ( + '{"o1": {"x": 1, "y": 2}}', + '{"o1": {"y": 2, "x": 1}}', + 1, + ), + ( + '{"xs": [{"o1": {"x": 1, "y": [2]}}]}', + '{"xs": [{"o1": {"y": [2], "x": 1}}]}', + 1, + ), + ( + '{"o1": {"x": 2, "y": 2}}', + '{"o1": {"y": 2, "x": 1}}', + 0.83333, + ), + ( + {"o1": {"x": 2, "y": 2}}, + '{"o1": {"y": 2, "x": 1}}', + 0.83333, + ), + ('{"x": 1, "y": 2}', '{"x": 1, "z": 2}', 0.3333), + ("[1, 2]", "[1, 2]", 1), + ("[1, 2]", "[2, 1]", 0.66667), + ] + + evaluator = JSONDiff() + for a, b, expected in cases: + for exact_number in [True, False]: + score = evaluator(a, b, number_scorer=ExactMatch() if exact_number else NumericDiff()).score + if not exact_number: + assert abs(score - expected) < 0.0001 + else: + assert round(score * 100) <= round(expected * 100)