Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revise TaxCalcIO logic; remove TMD data files and test #2810

Merged
merged 6 commits into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,3 @@ include taxcalc/policy_current_law.json
include taxcalc/puf_weights.csv.gz
include taxcalc/puf_ratios.csv
include taxcalc/records_variables.json
include taxcalc/tmd_weights.csv.gz
include taxcalc/tmd_growfactors.csv
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ help:
@echo "clean : remove .pyc files and local taxcalc package"
@echo "package : build and install local package"
@echo "pytest-cps : generate report for and cleanup after"
@echo " pytest -m 'not requires_pufcsv and not requires_tmdcsv and not pre_release'"
@echo " pytest -m 'not requires_pufcsv and not pre_release'"
@echo "pytest : generate report for and cleanup after"
@echo " pytest -m 'not pre_release'"
@echo "pytest-all : generate report for and cleanup after"
Expand Down Expand Up @@ -51,7 +51,7 @@ endef
.PHONY=pytest-cps
pytest-cps:
@$(pytest-setup)
@cd taxcalc ; pytest -n4 --disable-warnings --durations=0 --durations-min=2 -m "not requires_pufcsv and not requires_tmdcsv and not pre_release"
@cd taxcalc ; pytest -n4 --disable-warnings --durations=0 --durations-min=2 -m "not requires_pufcsv and not pre_release"
@$(pytest-cleanup)

.PHONY=pytest
Expand Down Expand Up @@ -103,7 +103,7 @@ define coverage-cleanup
rm -f .coverage htmlcov/*
endef

COVMARK = "not requires_pufcsv and not requires_tmdcsv and not pre_release"
COVMARK = "not requires_pufcsv and not pre_release"

OS := $(shell uname -s)

Expand Down
13 changes: 8 additions & 5 deletions docs/usage/data.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,16 @@ file.

The [tax-microdata
repository](https://github.com/PSLmodels/tax-microdata-benchmarking)
produces an input variables file (`tmd.csv`) and a
`tmd_weights.csv.gz` file that is included in the Tax-Calculator
produces an input variables file (`tmd.csv`), a national weights file
(`tmd_weights.csv.gz`), and a variable growth factors file
(`tmd_growfactors.csv`) that can be used with the Tax-Calculator
package beginning with the 3.6.0 release. The `tmd.csv` file is
available only to Tax-Calculator users who have purchased their own
version of the 2015 IRS-SOI PUF. For those users, the
`Records.tmd_constructor()` method creates a `Records` class object
containing the `tmd` variables and weights.
version of the 2015 IRS-SOI PUF. For those users, those three files
are avaiable from the tax-microdata repository. These three tmd files
can be used with the Tax-Calculator Python API (using the
`Records.tmd_constructor()` static method) or with the Tax-Calculator
CLI tool, `tc`.

## Using other data with Tax-Calculator

Expand Down
1 change: 0 additions & 1 deletion pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ testpaths =
taxcalc
markers =
requires_pufcsv
requires_tmdcsv
pre_release
compatible_data
local
Expand Down
3 changes: 0 additions & 3 deletions taxcalc.egg-info/SOURCES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,6 @@ taxcalc/puf_weights.csv.gz
taxcalc/records.py
taxcalc/records_variables.json
taxcalc/taxcalcio.py
taxcalc/tmd_growfactors.csv
taxcalc/tmd_weights.csv.gz
taxcalc/utils.py
taxcalc/utilsprvt.py
taxcalc.egg-info/PKG-INFO
Expand Down Expand Up @@ -214,7 +212,6 @@ taxcalc/tests/test_records.py
taxcalc/tests/test_reforms.py
taxcalc/tests/test_responses.py
taxcalc/tests/test_taxcalcio.py
taxcalc/tests/test_tmdcsv.py
taxcalc/tests/test_utils.py
taxcalc/validation/CSV_INPUT_VARS.md
taxcalc/validation/CSV_OUTPUT_VARS.md
Expand Down
3 changes: 3 additions & 0 deletions taxcalc/records.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,9 @@ def tmd_constructor(
eliminate the need to specify all the details of the PUF input
data.
"""
assert isinstance(data_path, Path)
assert isinstance(weights_path, Path)
assert isinstance(growfactors_path, Path)
return Records(
data=pd.read_csv(data_path),
start_year=Records.TMDCSV_YEAR,
Expand Down
57 changes: 39 additions & 18 deletions taxcalc/taxcalcio.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ def __init__(self, input_data, tax_year, baseline, reform, assump,
self.puf_input_data = False
self.cps_input_data = False
self.tmd_input_data = False
self.tmd_weights = None
self.tmd_gfactor = None
if isinstance(input_data, str):
# remove any leading directory path from INPUT filename
fname = os.path.basename(input_data)
Expand All @@ -90,6 +92,23 @@ def __init__(self, input_data, tax_year, baseline, reform, assump,
if not self.cps_input_data and not os.path.isfile(input_data):
msg = 'INPUT file could not be found'
self.errmsg += 'ERROR: {}\n'.format(msg)
# if tmd_input_data is True, construct weights and gfactor paths
if self.tmd_input_data: # pragma: no cover
tmd_dir = os.path.dirname(input_data)
if 'TMD_AREA' in os.environ:
area = os.environ['TMD_AREA']
wfile = f'{area}_tmd_weights.csv.gz'
inp = f'{fname[:-4]}_{area}-{str(tax_year)[2:]}'
else: # using national weights
wfile = 'tmd_weights.csv.gz'
self.tmd_weights = os.path.join(tmd_dir, wfile)
self.tmd_gfactor = os.path.join(tmd_dir, 'tmd_growfactors.csv')
if not os.path.isfile(self.tmd_weights):
msg = f'weights file {self.tmd_weights} could not be found'
self.errmsg += 'ERROR: {}\n'.format(msg)
if not os.path.isfile(self.tmd_gfactor):
msg = f'gfactor file {self.tmd_gfactor} could not be found'
self.errmsg += 'ERROR: {}\n'.format(msg)
elif isinstance(input_data, pd.DataFrame):
inp = 'df-{}'.format(str(tax_year)[2:])
else:
Expand Down Expand Up @@ -123,7 +142,7 @@ def __init__(self, input_data, tax_year, baseline, reform, assump,
elif isinstance(reform, str):
self.specified_reform = True
# split any compound reform into list of simple reforms
refnames = list()
refnames = []
reforms = reform.split('+')
for rfm in reforms:
# remove any leading directory path from rfm filename
Expand Down Expand Up @@ -206,7 +225,7 @@ def __init__(self, input_data, tax_year, baseline, reform, assump,
self.calc = None
self.calc_base = None
self.param_dict = None
self.policy_dicts = list()
self.policy_dicts = []

def init(self, input_data, tax_year, baseline, reform, assump,
aging_input_data, exact_calculations):
Expand Down Expand Up @@ -234,7 +253,7 @@ def init(self, input_data, tax_year, baseline, reform, assump,
# get assumption sub-dictionaries
paramdict = Calculator.read_json_param_objects(None, assump)
# get policy parameter dictionaries from --reform file(s)
policydicts = list()
policydicts = []
if self.specified_reform:
reforms = reform.split('+')
for ref in reforms:
Expand All @@ -252,9 +271,7 @@ def init(self, input_data, tax_year, baseline, reform, assump,
self.errmsg += valerr_msg.__str__()
# create GrowFactors base object that incorporates gdiff_baseline
if self.tmd_input_data:
gfactors_base = GrowFactors( # pragma: no cover
Records.TMD_GROWFACTORS_FILENAME
)
gfactors_base = GrowFactors(self.tmd_gfactor) # pragma: no cover
else:
gfactors_base = GrowFactors()
gdiff_baseline.apply_to(gfactors_base)
Expand All @@ -266,9 +283,7 @@ def init(self, input_data, tax_year, baseline, reform, assump,
self.errmsg += valerr_msg.__str__()
# create GrowFactors ref object that has all gdiff objects applied
if self.tmd_input_data:
gfactors_ref = GrowFactors( # pragma: no cover
Records.TMD_GROWFACTORS_FILENAME
)
gfactors_ref = GrowFactors(self.tmd_gfactor) # pragma: no cover
else:
gfactors_ref = GrowFactors()
gdiff_baseline.apply_to(gfactors_ref)
Expand Down Expand Up @@ -333,14 +348,20 @@ def init(self, input_data, tax_year, baseline, reform, assump,
exact_calculations=exact_calculations
)
elif self.tmd_input_data:
recs = Records.tmd_constructor(
data=input_data,
recs = Records(
data=pd.read_csv(input_data),
start_year=Records.TMDCSV_YEAR,
weights=self.tmd_weights,
gfactors=gfactors_ref,
adjust_ratios=None,
exact_calculations=exact_calculations
) # pragma: no cover
recs_base = Records.tmd_constructor(
data=input_data,
recs_base = Records(
data=pd.read_csv(input_data),
start_year=Records.TMDCSV_YEAR,
weights=self.tmd_weights,
gfactors=gfactors_base,
adjust_ratios=None,
exact_calculations=exact_calculations
) # pragma: no cover
else: # if not {cps|tmd}_input_data but aging_input_data
Expand Down Expand Up @@ -541,7 +562,7 @@ def write_doc_file(self):
doc = Calculator.reform_documentation(self.param_dict,
self.policy_dicts[1:])
doc_fname = self._output_filename.replace('.csv', '-doc.text')
with open(doc_fname, 'w') as dfile:
with open(doc_fname, 'w', encoding='utf-8') as dfile:
dfile.write(doc)

def write_sqldb_file(self, dump_varset, mtr_paytax, mtr_inctax,
Expand Down Expand Up @@ -575,7 +596,7 @@ def write_tables_file(self):
tab_fname = self._output_filename.replace('.csv', '-tab.text')
# skip tables if there are not some positive weights
if self.calc_base.total_weight() <= 0.:
with open(tab_fname, 'w') as tfile:
with open(tab_fname, 'w', encoding='utf-8') as tfile:
msg = 'No tables because sum of weights is not positive\n'
tfile.write(msg)
return
Expand All @@ -597,7 +618,7 @@ def write_tables_file(self):
diff = nontax + change # using expanded_income under baseline policy
diffdf = pd.DataFrame(data=np.column_stack(diff), columns=all_vars)
# write each kind of distributional table
with open(tab_fname, 'w') as tfile:
with open(tab_fname, 'w', encoding='utf-8') as tfile:
TaxCalcIO.write_decile_table(distdf, tfile, tkind='Reform Totals')
tfile.write('\n')
TaxCalcIO.write_decile_table(diffdf, tfile, tkind='Differences')
Expand Down Expand Up @@ -730,15 +751,15 @@ def write_empty_graph_file(fname, title, reason):
'<head><title>{}</title></head>\n'
'<body><center<h1>{}</h1></center></body>\n'
'</html>\n').format(title, reason)
with open(fname, 'w') as gfile:
with open(fname, 'w', encoding='utf-8') as gfile:
gfile.write(txt)

def minimal_output(self):
"""
Extract minimal output and return it as Pandas DataFrame.
"""
varlist = ['RECID', 'YEAR', 'WEIGHT', 'INCTAX', 'LSTAX', 'PAYTAX']
odict = dict()
odict = {}
scalc = self.calc
odict['RECID'] = scalc.array('RECID') # id for tax filing unit
odict['YEAR'] = self.tax_year() # tax calculation year
Expand Down
38 changes: 0 additions & 38 deletions taxcalc/tests/test_tmdcsv.py

This file was deleted.

Loading
Loading