From 3897a937ff1be529a45e90b160f1bc96da1cd46b Mon Sep 17 00:00:00 2001 From: talumbau Date: Mon, 14 Nov 2016 17:24:11 -0600 Subject: [PATCH 1/2] Addition totals calculated for dropq calculation - Instead of just a summary calculation of the delta between the policy and reform for total individual income tax, total payroll tax, and total combined tax for each budget year, we also compute these values for baseline and reform separately. --- taxcalc/dropq/dropq.py | 59 +++++++++++++++++++++++++++++++------ taxcalc/tests/test_dropq.py | 10 +++++++ 2 files changed, 60 insertions(+), 9 deletions(-) diff --git a/taxcalc/dropq/dropq.py b/taxcalc/dropq/dropq.py index 14303a790..c8800999c 100644 --- a/taxcalc/dropq/dropq.py +++ b/taxcalc/dropq/dropq.py @@ -32,6 +32,7 @@ total_row_names = ["ind_tax", "payroll_tax", "combined_tax"] + GDP_elast_row_names = ["gdp_elasticity"] ogusa_row_names = ["GDP", "Consumption", "Investment", "Hours Worked", "Wages", @@ -241,6 +242,7 @@ def groupby_means_and_comparisons(df1, df2, mask, debug=False): df1, df2 = drop_records(df1, df2, mask) + # Totals for diff between baseline and reform dec_sum = (df2['tax_diff_dec'] * df2['s006']).sum() bin_sum = (df2['tax_diff_bin'] * df2['s006']).sum() pr_dec_sum = (df2['payrolltax_diff_dec'] * df2['s006']).sum() @@ -248,6 +250,18 @@ def groupby_means_and_comparisons(df1, df2, mask, debug=False): combined_dec_sum = (df2['combined_diff_dec'] * df2['s006']).sum() combined_bin_sum = (df2['combined_diff_bin'] * df2['s006']).sum() + # Totals for baseline + sum_baseline = (df1['_iitax'] * df1['s006']).sum() + pr_sum_baseline = (df1['_payrolltax'] * df1['s006']).sum() + combined_sum_baseline = (df1['_combined'] * df1['s006']).sum() + + # Totals for reform + sum_reform = (df2['_iitax_dec'] * df2['s006']).sum() + pr_sum_reform = (df2['_payrolltax_dec'] * df2['s006']).sum() + combined_sum_reform = (df2['_combined_dec'] * df2['s006']).sum() + + # Totals for reform + # Create Difference tables, grouped by deciles and bins diffs_dec = dropq_diff_table(df1, df2, groupby="weighted_deciles", @@ -299,7 +313,9 @@ def groupby_means_and_comparisons(df1, df2, mask, debug=False): return (mY_dec, mX_dec, diffs_dec, pr_diffs_dec, comb_diffs_dec, mY_bin, mX_bin, diffs_bin, pr_diffs_bin, comb_diffs_bin, - dec_sum, pr_dec_sum, combined_dec_sum) + dec_sum, pr_dec_sum, combined_dec_sum, sum_baseline, + pr_sum_baseline, combined_sum_baseline, sum_reform, + pr_sum_reform, combined_sum_reform) def results(c): @@ -503,7 +519,9 @@ def run_nth_year(year_n, start_year, is_strict, tax_dta="", user_mods="", # Means of plan Y by income bin # diffs of plan Y by income bin mY_dec, mX_dec, df_dec, pdf_dec, cdf_dec, mY_bin, mX_bin, df_bin, \ - pdf_bin, cdf_bin, diff_sum, payrolltax_diff_sum, combined_diff_sum = \ + pdf_bin, cdf_bin, diff_sum, payrolltax_diff_sum, combined_diff_sum, \ + sum_baseline, pr_sum_baseline, combined_sum_baseline, sum_reform, \ + pr_sum_reform, combined_sum_reform =\ groupby_means_and_comparisons(soit_baseline, soit_reform, mask) elapsed_time = time.time() - start_time @@ -511,7 +529,15 @@ def run_nth_year(year_n, start_year, is_strict, tax_dta="", user_mods="", start_year += 1 tots = [diff_sum, payrolltax_diff_sum, combined_diff_sum] - fiscal_tots = pd.DataFrame(data=tots, index=total_row_names) + fiscal_tots_diff = pd.DataFrame(data=tots, index=total_row_names) + + tots_baseline = [sum_baseline, pr_sum_baseline, combined_sum_baseline] + fiscal_tots_baseline = pd.DataFrame(data=tots_baseline, + index=total_row_names) + + tots_reform = [sum_reform, pr_sum_reform, combined_sum_reform] + fiscal_tots_reform = pd.DataFrame(data=tots_reform, + index=total_row_names) # Get rid of negative incomes df_bin.drop(df_bin.index[0], inplace=True) @@ -529,7 +555,9 @@ def append_year(x): append_year(pdf_dec), append_year(cdf_dec), append_year(mY_bin), append_year(mX_bin), append_year(df_bin), append_year(pdf_bin), append_year(cdf_bin), - append_year(fiscal_tots)) + append_year(fiscal_tots_diff), + append_year(fiscal_tots_baseline), + append_year(fiscal_tots_reform)) decile_row_names_i = [x + '_' + str(year_n) for x in decile_row_names] @@ -577,14 +605,24 @@ def append_year(x): row_names=bin_row_names_i, column_types=diff_column_types) - fiscal_yr_total = create_json_table(fiscal_tots, - row_names=total_row_names_i) + fiscal_yr_total_df = create_json_table(fiscal_tots_diff, + row_names=total_row_names_i) + + fiscal_yr_total_bl = create_json_table(fiscal_tots_baseline, + row_names=total_row_names_i) + + fiscal_yr_total_rf = create_json_table(fiscal_tots_reform, + row_names=total_row_names_i) + # Make the one-item lists of strings just strings - fiscal_yr_total = dict((k, v[0]) for k, v in fiscal_yr_total.items()) + fiscal_yr_total_df = dict((k, v[0]) for k, v in fiscal_yr_total_df.items()) + fiscal_yr_total_bl = dict((k, v[0]) for k, v in fiscal_yr_total_bl.items()) + fiscal_yr_total_rf = dict((k, v[0]) for k, v in fiscal_yr_total_rf.items()) return (mY_dec_table_i, mX_dec_table_i, df_dec_table_i, pdf_dec_table_i, cdf_dec_table_i, mY_bin_table_i, mX_bin_table_i, df_bin_table_i, - pdf_bin_table_i, cdf_bin_table_i, fiscal_yr_total) + pdf_bin_table_i, cdf_bin_table_i, fiscal_yr_total_df, + fiscal_yr_total_bl, fiscal_yr_total_rf) def run_models(tax_dta, start_year, is_strict=False, user_mods="", @@ -614,9 +652,12 @@ def run_models(tax_dta, start_year, is_strict=False, user_mods="", (mY_dec_table_i, mX_dec_table_i, df_dec_table_i, pdf_dec_table_i, cdf_dec_table_i, mY_bin_table_i, mX_bin_table_i, df_bin_table_i, - pdf_bin_table_i, cdf_bin_table_i, num_fiscal_year_total) = json_tables + pdf_bin_table_i, cdf_bin_table_i, num_fiscal_year_total, + num_fiscal_year_total_bl, num_fiscal_year_total_rf) = json_tables num_fiscal_year_totals.append(num_fiscal_year_total) + num_fiscal_year_totals.append(num_fiscal_year_total_bl) + num_fiscal_year_totals.append(num_fiscal_year_total_rf) mY_dec_table.update(mY_dec_table_i) mX_dec_table.update(mX_dec_table_i) df_dec_table.update(df_dec_table_i) diff --git a/taxcalc/tests/test_dropq.py b/taxcalc/tests/test_dropq.py index f3e977af4..910bbb61e 100644 --- a/taxcalc/tests/test_dropq.py +++ b/taxcalc/tests/test_dropq.py @@ -2,6 +2,7 @@ import numpy as np import pandas as pd import pytest +import numpy.testing as npt from pandas import DataFrame, Series from taxcalc.dropq.dropq_utils import * @@ -103,6 +104,15 @@ def test_full_dropq_puf(puf_path): # Assert that dropq revenue is similar to the "pure" calculation assert diff / dropq_reform_revenue < 0.02 + # Assert that Reform - Baseline = Reported Delta + delta_yr0 = fiscal_tots[0] + baseline_yr0 = fiscal_tots[1] + reform_yr0 = fiscal_tots[2] + diff_yr0 = (reform_yr0.loc['combined_tax'] - + baseline_yr0.loc['combined_tax']).values + delta_yr0 = delta_yr0.loc['combined_tax'].values + npt.assert_array_almost_equal(diff_yr0, delta_yr0, decimal=3) + @pytest.mark.parametrize("is_strict, rjson, growth_params, no_elast", [(True, True, False, False), (True, True, True, True), From 48c83c2ad0c0a4e540492c42b952b677b32075c3 Mon Sep 17 00:00:00 2001 From: talumbau Date: Tue, 15 Nov 2016 06:29:22 -0600 Subject: [PATCH 2/2] remove backslash line continuation --- taxcalc/dropq/dropq.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/taxcalc/dropq/dropq.py b/taxcalc/dropq/dropq.py index c8800999c..0dd20b954 100644 --- a/taxcalc/dropq/dropq.py +++ b/taxcalc/dropq/dropq.py @@ -518,11 +518,12 @@ def run_nth_year(year_n, start_year, is_strict, tax_dta="", user_mods="", # diffs of plan Y by decile # Means of plan Y by income bin # diffs of plan Y by income bin - mY_dec, mX_dec, df_dec, pdf_dec, cdf_dec, mY_bin, mX_bin, df_bin, \ - pdf_bin, cdf_bin, diff_sum, payrolltax_diff_sum, combined_diff_sum, \ - sum_baseline, pr_sum_baseline, combined_sum_baseline, sum_reform, \ - pr_sum_reform, combined_sum_reform =\ - groupby_means_and_comparisons(soit_baseline, soit_reform, mask) + (mY_dec, mX_dec, df_dec, pdf_dec, cdf_dec, mY_bin, mX_bin, df_bin, + pdf_bin, cdf_bin, diff_sum, payrolltax_diff_sum, combined_diff_sum, + sum_baseline, pr_sum_baseline, combined_sum_baseline, sum_reform, + pr_sum_reform, + combined_sum_reform) = groupby_means_and_comparisons(soit_baseline, + soit_reform, mask) elapsed_time = time.time() - start_time print("elapsed time for this run: ", elapsed_time)