Merge pull request #31 from PSLmodels/master

update from forked repo
bodiyang · Feb 12, 2024 · e4970fc · e4970fc
2 parents 9f010c4 + abd503e
commit e4970fc
Show file tree

Hide file tree

Showing 16 changed files with 463 additions and 31 deletions.
diff --git a/Makefile b/Makefile
@@ -32,7 +32,7 @@ help:
 clean:
 	@find . -name *pyc -exec rm {} \;
 	@find . -name *cache -maxdepth 1 -exec rm -r {} \;
-	@conda uninstall taxcalc --yes --quiet 2>&1 > /dev/null
+	@pip uninstall taxcalc --yes --quiet 2>&1 > /dev/null
 
 .PHONY=package
 package:
@@ -51,7 +51,7 @@ endef
 .PHONY=pytest-cps
 pytest-cps:
 	@$(pytest-setup)
-	@cd taxcalc ; pytest -n4 -m "not requires_pufcsv and not pre_release"
+	@cd taxcalc ; pytest -n4 --disable-warnings -m "not requires_pufcsv and not pre_release"
 	@$(pytest-cleanup)
 
 .PHONY=pytest

diff --git a/docs/about/releases.md b/docs/about/releases.md
@@ -3,6 +3,27 @@ Release history
 Go [here](https://github.com/PSLmodels/Tax-Calculator/pulls?q=is%3Apr+is%3Aclosed)
 for a complete commit history.
 
+2024-02-10 Release 3.5.0
+------------------------
+(last merged pull request is
+[#2715](https://github.com/PSLmodels/Tax-Calculator/pull/2715))
+
+**This is an enhancement and bug-fix release.**
+
+**API Changes**
+
+**New Features**
+- Add baseline table output to `cli` command [[#2714](https://github.com/PSLmodels/Tax-Calculator/pull/2714) by Martin Holmer]
+- Additional TAXSIM-35 validation tools, serveral PRS by Bodi Yang and Jason DeBacker
+
+**Bug Fixes**
+- Avoid Pandas deprecation warnings[[#2715](https://github.com/PSLmodels/Tax-Calculator/pull/2785) by Martin Holmer]
+- Correctly account for the `odc` variable as refundable in 2021 [[#2703](https://github.com/PSLmodels/Tax-Calculator/pull/2704) by Bodi Yang]
+- Fix incorrect value for `EITC_ps_MarriedJ` in 2020 [[#2699](https://github.com/PSLmodels/Tax-Calculator/pull/2699) by Bodi Yang]
+- Fix incorrect value for ACTC amount for 2023-2025 to reflect inflation adjustment [[#2691](https://github.com/PSLmodels/Tax-Calculator/pull/2691) by Jason DeBacker]
+
+
+
 2023-06-20 Release 3.4.1
 ------------------------
 (last merged pull request is

diff --git a/docs/index.md b/docs/index.md
@@ -51,7 +51,7 @@ The cross-model validation work with NBER's TAXSIM-27 model is described
 
 ## Latest release
 
-{doc}`3.4.1 (2023-06-20) <about/releases>`
+{doc}`3.5.0 (2024-02-10) <about/releases>`
 
 If you are already using Tax-Calculator, upgrade using the following command:
 

diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 with open('README.md') as f:
     longdesc = f.read()
 
-version = '3.4.1'
+version = '3.5.0'
 
 config = {
     'description': 'Tax Calculator',

diff --git a/taxcalc.egg-info/PKG-INFO b/taxcalc.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: taxcalc
-Version: 3.4.1
+Version: 3.5.0
 Summary: taxcalc
 Home-page: https://github.com/PSLmodels/Tax-Calculator
 Download-URL: https://github.com/PSLmodels/Tax-Calculator
@@ -16,6 +16,12 @@ Classifier: Programming Language :: Python :: 3.7
 Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
+License-File: LICENSE.md
+Requires-Dist: setuptools
+Requires-Dist: numpy
+Requires-Dist: pandas
+Requires-Dist: bokeh
+Requires-Dist: numba
 
 [![PSL cataloged](https://img.shields.io/badge/PSL-cataloged-a0a0a0.svg)](https://www.PSLmodels.org)
 [![Python 3.6+](https://img.shields.io/badge/python-3.6%2B-blue.svg)](https://www.python.org/downloads/release/python-360/)

diff --git a/taxcalc.egg-info/SOURCES.txt b/taxcalc.egg-info/SOURCES.txt
@@ -1,3 +1,4 @@
+LICENSE.md
 MANIFEST.in
 README.md
 setup.py

diff --git a/taxcalc/__init__.py b/taxcalc/__init__.py
@@ -14,4 +14,4 @@
 from taxcalc.utils import *
 from taxcalc.cli import *
 
-__version__ = '3.4.1'
+__version__ = '3.5.0'
diff --git a/taxcalc/calcfunctions.py b/taxcalc/calcfunctions.py
@@ -577,7 +577,7 @@ def CapGains(p23250, p22250, sep, ALD_StudentLoan_hc,
 
 
 @iterate_jit(nopython=True)
-def SSBenefits(MARS, ymod, e02400, SS_thd50, SS_thd85,
+def SSBenefits(MARS, ymod, e02400, SS_all_in_agi, SS_thd50, SS_thd85,
                SS_percentage1, SS_percentage2, c02500):
     """
     Calculates OASDI benefits included in AGI, c02500.
@@ -590,6 +590,8 @@ def SSBenefits(MARS, ymod, e02400, SS_thd50, SS_thd85,
         Variable that is used in OASDI benefit taxation logic
     e02400: float
         Total social security (OASDI) benefits
+    SS_all_in_agi: bool
+        Whether all social security benefits are included in AGI
     SS_thd50: list
         Threshold for social security benefit taxability (1)
     SS_thd85: list
@@ -615,6 +617,8 @@ def SSBenefits(MARS, ymod, e02400, SS_thd50, SS_thd85,
                      SS_percentage1 *
                      min(e02400, SS_thd85[MARS - 1] -
                          SS_thd50[MARS - 1]), SS_percentage2 * e02400)
+    if SS_all_in_agi:
+        c02500 = e02400
     return c02500
 
 
@@ -1595,7 +1599,7 @@ def GainsTax(e00650, c01000, c23650, p23250, e01100, e58990, e00200,
              CG_nodiff, PT_EligibleRate_active, PT_EligibleRate_passive,
              PT_wages_active_income, PT_top_stacking,
              CG_rt1, CG_rt2, CG_rt3, CG_rt4, CG_brk1, CG_brk2, CG_brk3,
-             dwks10, dwks13, dwks14, dwks19, c05700, taxbc):
+             dwks10, dwks13, dwks14, dwks19, dwks43, c05700, taxbc):
     """
     GainsTax function implements (2015) Schedule D Tax Worksheet logic for
     the special taxation of long-term capital gains and qualified dividends
@@ -1725,6 +1729,8 @@ def GainsTax(e00650, c01000, c23650, p23250, e01100, e58990, e00200,
         Maximum of 0 and dwks1 - dwks13
     dwks19: float
         Maximum of dwks17 and dwks16
+    dwks43: float
+        separate tax on long-term capital gains and qualified dividends
     c05700: float
         Lump sum distributions
     taxbc: float
@@ -1740,6 +1746,8 @@ def GainsTax(e00650, c01000, c23650, p23250, e01100, e58990, e00200,
         Maximum of 0 and dwks1 - dwks13
     dwks19: float
         Maximum of dwks17 and dwks16
+    dwks43: float
+        separate tax on long-term capital gains and qualified dividends
     c05700: float
         Lump sum distributions
     taxbc: float
@@ -1834,12 +1842,13 @@ def GainsTax(e00650, c01000, c23650, p23250, e01100, e58990, e00200,
         dwks13 = 0.
         dwks14 = 0.
         dwks19 = 0.
+        dwks43 = 0.
 
     # final calculations done no matter what the value of hasqdivltcg
     c05100 = c24580  # because foreign earned income exclusion is assumed zero
     c05700 = 0.  # no Form 4972, Lump Sum Distributions
     taxbc = c05700 + c05100
-    return (dwks10, dwks13, dwks14, dwks19, c05700, taxbc)
+    return (dwks10, dwks13, dwks14, dwks19, dwks43, c05700, taxbc)
 
 
 @iterate_jit(nopython=True)

diff --git a/taxcalc/cli/tc.py b/taxcalc/cli/tc.py
@@ -109,7 +109,8 @@ def cli_tc_main():
                         default=None)
     parser.add_argument('--sqldb',
                         help=('optional flag that writes SQLite database '
-                              'with dump table containing same output as '
+                              'with two tables (baseline and reform) each '
+                              'containing same output variables as '
                               'produced by --dump option.'),
                         default=False,
                         action="store_true")

diff --git a/taxcalc/policy_current_law.json b/taxcalc/policy_current_law.json
@@ -356,6 +356,32 @@
             "cps": true
         }
     },
+    "SS_all_in_agi": {
+        "title": "Include all social security benefits in AGI",
+        "description": "All social security benefits will be included in AGI.",
+        "notes": "",
+        "section_1": "Social Security Taxability",
+        "section_2": "Social Security Benefit Taxability",
+        "indexable": false,
+        "indexed": false,
+        "type": "bool",
+        "value": [
+            {
+                "year": 2013,
+                "value": false
+            }
+        ],
+        "validators": {
+            "range": {
+                "min": false,
+                "max": true
+            }
+        },
+        "compatible_data": {
+            "puf": true,
+            "cps": true
+        }
+    },
     "SS_thd50": {
         "title": "Threshold for Social Security benefit taxability 1",
         "description": "The first threshold for Social Security benefit taxability: if taxpayers have provisional income greater than this threshold, up to rate 1 of their Social Security benefit will be subject to tax under current law.",
@@ -20666,4 +20692,4 @@
             "cps": true
         }
     }
-}
+}
diff --git a/taxcalc/records.py b/taxcalc/records.py
@@ -350,7 +350,7 @@ def _adjust(self, year):
         # pylint: disable=no-member
         if self.ADJ.size > 0:
             # Interest income
-            self.e00300 *= self.ADJ['INT{}'.format(year)][self.agi_bin].values
+            self.e00300 *= self.ADJ[f'INT{year}'].iloc[self.agi_bin].values
 
     def _read_ratios(self, ratios):
         """

diff --git a/taxcalc/records_variables.json b/taxcalc/records_variables.json
@@ -1081,6 +1081,11 @@
       "desc": "search taxcalc/calcfunctions.py for how calculated and used",
       "form": {"2013-20??": "calculated variable"}
     },
+    "dwks43": {
+      "type": "float",
+      "desc": "separate tax on long-term capital gains and qualified dividends",
+      "form": {"2013-20??": "calculated variable"}
+    },
     "fstax": {
       "type": "float",
       "desc": "search taxcalc/calcfunctions.py for how calculated and used",

diff --git a/taxcalc/taxcalcio.py b/taxcalc/taxcalcio.py
@@ -427,8 +427,9 @@ def analyze(self, writing_output_file=False,
            calculated variables using their Tax-Calculator names
 
         output_sqldb: boolean
-           whether or not to write SQLite3 database with dump table
-           containing same output as written by output_dump to a csv file
+           whether or not to write SQLite3 database with two tables
+           (baseline and reform) each containing same output as written
+           by output_dump to a csv file
 
         Returns
         -------
@@ -449,18 +450,28 @@ def analyze(self, writing_output_file=False,
             (mtr_paytax, mtr_inctax,
              _) = self.calc.mtr(wrt_full_compensation=False,
                                 calc_all_already_called=True)
+            self.calc_base.calc_all()
+            calc_base_calculated = True
+            (mtr_paytax_base, mtr_inctax_base,
+             _) = self.calc_base.mtr(wrt_full_compensation=False,
+                                     calc_all_already_called=True)
         else:
             # definitely do not need marginal tax rates
             mtr_paytax = None
             mtr_inctax = None
+            mtr_paytax_base = None
+            mtr_inctax_base = None
         # extract output if writing_output_file
         if writing_output_file:
             self.write_output_file(output_dump, dump_varset,
                                    mtr_paytax, mtr_inctax)
             self.write_doc_file()
         # optionally write --sqldb output to SQLite3 database
         if output_sqldb:
-            self.write_sqldb_file(dump_varset, mtr_paytax, mtr_inctax)
+            self.write_sqldb_file(
+                dump_varset, mtr_paytax, mtr_inctax,
+                mtr_paytax_base, mtr_inctax_base
+            )
         # optionally write --tables output to text file
         if output_tables:
             if not calc_base_calculated:
@@ -480,7 +491,9 @@ def write_output_file(self, output_dump, dump_varset,
         Write output to CSV-formatted file.
         """
         if output_dump:
-            outdf = self.dump_output(dump_varset, mtr_inctax, mtr_paytax)
+            outdf = self.dump_output(
+                self.calc, dump_varset, mtr_inctax, mtr_paytax
+            )
             column_order = sorted(outdf.columns)
         else:
             outdf = self.minimal_output()
@@ -504,15 +517,25 @@ def write_doc_file(self):
         with open(doc_fname, 'w') as dfile:
             dfile.write(doc)
 
-    def write_sqldb_file(self, dump_varset, mtr_paytax, mtr_inctax):
+    def write_sqldb_file(self, dump_varset, mtr_paytax, mtr_inctax,
+                         mtr_paytax_base, mtr_inctax_base):
         """
         Write dump output to SQLite3 database table dump.
         """
-        outdf = self.dump_output(dump_varset, mtr_inctax, mtr_paytax)
-        assert len(outdf.index) == self.calc.array_len
         db_fname = self._output_filename.replace('.csv', '.db')
         dbcon = sqlite3.connect(db_fname)
-        outdf.to_sql('dump', dbcon, if_exists='replace', index=False)
+        # write baseline table
+        outdf = self.dump_output(
+            self.calc_base, dump_varset, mtr_inctax_base, mtr_paytax_base
+        )
+        assert len(outdf.index) == self.calc.array_len
+        outdf.to_sql('baseline', dbcon, if_exists='replace', index=False)
+        # write reform table
+        outdf = self.dump_output(
+            self.calc, dump_varset, mtr_inctax, mtr_paytax
+        )
+        assert len(outdf.index) == self.calc.array_len
+        outdf.to_sql('reform', dbcon, if_exists='replace', index=False)
         dbcon.close()
         del outdf
         gc.collect()
@@ -565,7 +588,7 @@ def write_decile_table(dfx, tfile, tkind='Totals'):
                                               decile_details=False,
                                               pop_quantiles=False,
                                               weight_by_income_measure=False)
-        gdfx = dfx.groupby('table_row', as_index=False)
+        gdfx = dfx.groupby('table_row', as_index=False, observed=True)
         rtns_series = gdfx.apply(unweighted_sum, 's006').values[:, 1]
         xinc_series = gdfx.apply(weighted_sum, 'expanded_income').values[:, 1]
         itax_series = gdfx.apply(weighted_sum, 'iitax').values[:, 1]
@@ -687,7 +710,7 @@ def minimal_output(self):
         odf = pd.DataFrame(data=odict, columns=varlist)
         return odf
 
-    def dump_output(self, dump_varset, mtr_inctax, mtr_paytax):
+    def dump_output(self, calcx, dump_varset, mtr_inctax, mtr_paytax):
         """
         Extract dump output and return it as Pandas DataFrame.
         """
@@ -699,7 +722,7 @@ def dump_output(self, dump_varset, mtr_inctax, mtr_paytax):
         # create and return dump output DataFrame
         odf = pd.DataFrame()
         for varname in varset:
-            vardata = self.calc.array(varname)
+            vardata = calcx.array(varname)
             if varname in recs_vinfo.INTEGER_VARS:
                 odf[varname] = vardata
             else:

diff --git a/taxcalc/tests/conftest.py b/taxcalc/tests/conftest.py
@@ -4,7 +4,6 @@
 import numpy
 import pandas
 import pytest
-
 from pytest_harvest import get_session_results_df
 
 
@@ -143,27 +142,33 @@ def pytest_sessionfinish(session):
     new_stats_df = get_session_results_df(session)
     # move test_id from index into unique column
     new_stats_df.reset_index(inplace=True)
-    old_stats_df = pandas.read_csv(os.path.join(tests_path, 'test_stats_benchmark.csv'))
+    old_stats_df = pandas.read_csv(os.path.join(
+        tests_path, 'test_stats_benchmark.csv')
+    )
 
     merge_df = new_stats_df.merge(old_stats_df, on=['test_id'], how='left')
     # time diff for new tests is set to 0
-    merge_df['time_diff'] = merge_df['duration_ms_x'] - merge_df['duration_ms_y']
+    merge_df['time_diff'] = (
+        merge_df['duration_ms_x'] - merge_df['duration_ms_y']
+    )
     merge_df['time_diff'] = merge_df['time_diff'].fillna(0)
 
     tol = 1.0 # choose tolerance in seconds
     tol *= 1000
 
+    print('\n')
     for ind, row in merge_df.iterrows():
         if row['time_diff'] > tol:
             diff = round(abs(row['time_diff']), 3)
-            print(f"{row['test_id']} is slower than the current benchmark by {diff} ms")
+            print((f"{row['test_id']} is slower than the "
+                   f"current benchmark by {diff} ms"))
 
     print('\n')
-
     for ind, row in merge_df.iterrows():
         if row['time_diff'] < (-1 * tol):
             diff = round(abs(row['time_diff']), 3)
-            print(f"{row['test_id']} is faster than the current benchmark by {diff} ms")
+            print((f"{row['test_id']} is faster than the "
+                   f"current benchmark by {diff} ms"))
 
     # Save new test stats to disk including time diff
     new_stats_df['time_diff'] = merge_df['time_diff'].values