From 6716ce3e436ec4c5aa873f4fc5e4b92321893ab1 Mon Sep 17 00:00:00 2001 From: Jonathan Klabunde Tomer Date: Sun, 9 Jan 2022 22:07:51 -0800 Subject: [PATCH] healthequity: ignore balance-after when merging The healthequity source merges newly-downloaded transaction data into the previously-saved file. This failed when there were multiple transactions of the same type in a single day, because the order in which HealthEquity reports transactions is not stable and so the running balances were not either. The result was that past transactions would sometimes be spontaneously duplicated in the list upon a new finance-dl run. This change causes the merge process to ignore the "Balance After" column. This also means that the running balance within a day may end up incorrect, if newly-available transactions happen to be listed before previously-available ones in the new download. There's no really good way to prevent this except either recalculating the balance-after column ourselves after the merge or throwing it out entirely, neither of which is proposed in this change. --- finance_dl/healthequity.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/finance_dl/healthequity.py b/finance_dl/healthequity.py index 5d79319..9caa9a2 100644 --- a/finance_dl/healthequity.py +++ b/finance_dl/healthequity.py @@ -174,7 +174,11 @@ def write_transactions(raw_transactions_data, path): rows.append(row_values) rows.reverse() csv_merge.merge_into_file(filename=path, field_names=output_headers, - data=rows, sort_by=lambda x: x['Date']) + data=rows, sort_by=lambda x: x['Date'], + # Don't consider balance-after in comparing rows, + # because txn order (and therefore running + # balance) is not stable across visits + compare_fields = output_headers[0:3]) class Scraper(scrape_lib.Scraper):