Skip to content

Commit

Permalink
Remove unneeded jsondiff output parsing
Browse files Browse the repository at this point in the history
Why these changes are being introduced:

Now that the DeepDiff library provides an explicit list of
modified "root" fields -- i.e. TIMDEX fields -- as a built-in
property, we no longer need any additional logic to parse the diff
and surface what fields were modified.

How this addresses that need:
* This removes the helper function generate_field_diff_bools_for_record()
and any tests related to it.

Side effects of this change:
* None

Relevant ticket(s):
* https://mitlibraries.atlassian.net/browse/TIMX-373
  • Loading branch information
ghukill committed Nov 4, 2024
1 parent f181958 commit edc5f80
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 48 deletions.
26 changes: 0 additions & 26 deletions abdiff/core/calc_ab_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,32 +104,6 @@ def create_record_diff_matrix_dataset(
return metrics_dataset


def generate_field_diff_bools_for_record(diff_data: dict) -> dict:
"""Function to return dictionary of fields that have a diff.
Determining if a field had a diff is as straight-forward as looking to see if it shows
up in the parsed diff JSON. The fields may be at the root of the diff, or they could
be nested under "$insert" or "$delete" nodes in the diff.
If a field from the original A/B records are not in the diff at all, then they did not
have changes, and therefore will not receive a 1 here to indicate a diff.
"""
fields_with_diffs = {}

for key in diff_data:

# identify modified fields nested in $insert or $delete blocks
if key in ("$insert", "$delete"):
for subfield in diff_data[key]:
fields_with_diffs[subfield] = 1

# identified modified fields at root of diff
else:
fields_with_diffs[key] = 1

return fields_with_diffs


def calculate_metrics_data(field_matrix_parquet: str) -> dict:
"""Create a dictionary of metrics via DuckDB queries."""
summary: dict = {}
Expand Down
22 changes: 0 additions & 22 deletions tests/test_calc_ab_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,32 +12,10 @@
calc_ab_metrics,
calculate_metrics_data,
create_record_diff_matrix_dataset,
generate_field_diff_bools_for_record,
)
from abdiff.core.utils import load_dataset, read_run_json


def test_record_field_diffs_no_diffs():
diff_data = {}
assert generate_field_diff_bools_for_record(diff_data) == {}


def test_record_field_diffs_one_diff():
diff_data = {"color": "green"}
assert generate_field_diff_bools_for_record(diff_data) == {"color": 1}


def test_record_field_diffs_diff_from_inserts_and_deletes_counted_only_once():
diff_data = {
"$insert": {"fruits": "strawberry"},
"$delete": {"vegetables": "onion"},
}
assert generate_field_diff_bools_for_record(diff_data) == {
"fruits": 1,
"vegetables": 1,
}


def test_sparse_matrix_dataset_created_success(run_directory, diffs_dataset_directory):
diff_matrix_dataset_filepath = create_record_diff_matrix_dataset(
run_directory, diffs_dataset_directory
Expand Down

0 comments on commit edc5f80

Please sign in to comment.