Skip to content

Commit

Permalink
Reorganize taxcalc validation code
Browse files Browse the repository at this point in the history
  • Loading branch information
martinholmer committed Mar 14, 2018
1 parent 862f8c2 commit 0f1c1c7
Show file tree
Hide file tree
Showing 10 changed files with 66 additions and 147 deletions.
2 changes: 1 addition & 1 deletion TESTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ against the PEP8 standard. Do the checks this way:

```
cd taxcalc
pep8 --ignore=E402 .
pep8 .
```

No messages indicate the PEP8 tests pass. Once you get that result,
Expand Down
25 changes: 13 additions & 12 deletions taxcalc/validation/csv_input.py → puf_fuzz.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Tax-Calculator validation script that adds random amounts to most
Tax-Calculator Python script that adds random amounts to most
variables in the puf.csv input file, which must be located in the
top-level directory of the Tax-Calculator source code tree.
The resulting input file is xYY.csv, where YY denotes the tax year.
Expand All @@ -9,19 +9,17 @@
generated by the standard puf.csv input file.
"""
# CODING-STYLE CHECKS:
# pep8 --ignore=E402 csv_input.py
# pylint --disable=locally-disabled csv_input.py
# pep8 --ignore=E402 puf_fuzz.py
# pylint --disable=locally-disabled puf_fuzz.py

import argparse
import sys
import os
import numpy as np
import pandas as pd
CUR_PATH = os.path.abspath(os.path.dirname(__file__))
sys.path.append(os.path.join(CUR_PATH, '..', '..'))
# pylint: disable=import-error,wrong-import-position
from taxcalc import Records


# specify maximum allowed values for command-line parameters
MAX_YEAR = 2023 # maximum tax year allowed for tax calculations
MAX_SEED = 999999999 # maximum allowed seed for random-number generator
Expand All @@ -40,11 +38,13 @@
'e09700', 'e09800', 'e09900', 'e11200'])

# specify set of variables whose values are not to be randomized
Records.read_var_info()
if DEBUG:
SKIP_VARS = Records.USABLE_READ_VARS
else:
SKIP_VARS = set(['RECID', 'MARS', 'DSI', 'MIDR', 'FLPDYR',
'age_head', 'age_spouse',
'nu18', 'n1820', 'n21',
'XTOT', 'EIC', 'n24', 'f2441',
'f6251'])

Expand Down Expand Up @@ -131,7 +131,8 @@ def main(taxyear, rnseed, ssize):
Contains high-level logic of the script.
"""
# read puf.csv file into a Pandas DataFrame
pufcsv_filename = os.path.join(CUR_PATH, '..', '..', 'puf.csv')
current_path = os.path.abspath(os.path.dirname(__file__))
pufcsv_filename = os.path.join(current_path, '..', '..', 'puf.csv')
if not os.path.isfile(pufcsv_filename):
msg = 'ERROR: puf.csv file not found in top-level directory'
sys.stderr.write(msg + '\n')
Expand Down Expand Up @@ -182,17 +183,17 @@ def main(taxyear, rnseed, ssize):
if __name__ == '__main__':
# parse command-line arguments:
PARSER = argparse.ArgumentParser(
prog='python csv_input.py',
prog='python puf_fuzz.py',
description=('Adds random amounts to certain variables in '
'puf.csv input file and writes the randomized '
'CSV-formatted input file to xYY.csv file.'))
PARSER.add_argument('YEAR', nargs='?', type=int, default=0,
PARSER.add_argument('YEAR', type=int, default=0,
help=('YEAR is tax year; '
'must be in [2013,{}] range.'.format(MAX_YEAR)))
PARSER.add_argument('SEED', nargs='?', type=int, default=0,
PARSER.add_argument('SEED', type=int, default=0,
help=('SEED is random-number seed; '
'must be in [1,{}] range.'.format(MAX_SEED)))
PARSER.add_argument('SIZE', nargs='?', type=int, default=0,
PARSER.add_argument('SIZE', type=int, default=0,
help=('SIZE is sample size; '
'must be in [1,{}] range.'.format(MAX_SIZE)))
ARGS = PARSER.parse_args()
Expand All @@ -211,7 +212,7 @@ def main(taxyear, rnseed, ssize):
sys.stderr.write('ERROR: SIZE {} not in {}\n'.format(ARGS.SIZE, RSTR))
ARGS_ERROR = True
if ARGS_ERROR:
sys.stderr.write('USAGE: python csv_input.py --help\n')
sys.stderr.write('USAGE: python puf_fuzz.py --help\n')
RCODE = 1
else:
RCODE = main(ARGS.YEAR, ARGS.SEED, ARGS.SIZE)
Expand Down
8 changes: 6 additions & 2 deletions taxcalc/validation/taxsim/simtax.py → simtax.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
"""
SIMple input-output capabilities for TAX-calculator.
SIMple input-output capabilities for TAX-calculator used in validation work
"""
# CODING-STYLE CHECKS:
# pep8 --ignore=E402 simtax.py
# pylint --disable=locally-disabled simtax.py

import argparse
import sys
from simpletaxio import SimpleTaxIO
import os
import re
import six
import pandas as pd
from taxcalc import SimpleTaxIO


def main():
Expand Down
1 change: 1 addition & 0 deletions taxcalc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from taxcalc.growfactors import *
from taxcalc.growdiff import *
from taxcalc.records import *
from taxcalc.simpletaxio import *
from taxcalc.taxcalcio import *
from taxcalc.utils import *
from taxcalc.macro_elasticity import *
Expand Down
26 changes: 26 additions & 0 deletions taxcalc/csv_show.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash
if [[ "$#" -ne 2 ]]; then
echo "csvshow prints all non-zero CSV file column values for RECID"
echo "ERROR: must specify exactly two command-line arguments"
echo "USAGE: csvshow FILENAME RECID"
exit 1
fi
awk -F, '
BEGIN {
recid_varnum = 0
}
NR == 1 {
for ( i = 1; i <= NF; i++ ) {
varname[i] = $i
if ( $i == "RECID" ) recid_varnum = i
}
}
$recid_varnum == id {
for ( i = 1; i <= NF; i++ ) {
if ( $i != 0 ) {
print i, varname[i], $i
}
}
exit
}
' id=$2 $1
13 changes: 13 additions & 0 deletions taxcalc/csv_vars.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
if [[ "$#" -ne 1 ]]; then
echo "csvvars prints all CSV file column numbers and names"
echo "ERROR: number of command-line arguments not equal to one"
echo "USAGE: csvvars FILENAME"
exit 1
fi
awk -F, '
NR == 1 {
for( i = 1; i <= NF; i++ ) {
print i, $i
}
' $1
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,9 @@
import re
import six
import pandas as pd
CUR_PATH = os.path.abspath(os.path.dirname(__file__))
sys.path.append(os.path.join(CUR_PATH, '..', '..', '..'))
# pylint: disable=import-error,wrong-import-position
from taxcalc import Policy, Records, Calculator
from taxcalc.policy import Policy
from taxcalc.records import Records
from taxcalc.calculate import Calculator


class SimpleTaxIO(object):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import os
import tempfile
import pytest
from simpletaxio import SimpleTaxIO
from taxcalc import SimpleTaxIO # pylint: disable=import-error


NUM_INPUT_LINES = 4
Expand Down
125 changes: 0 additions & 125 deletions taxcalc/validation/csv_extract.py

This file was deleted.

4 changes: 2 additions & 2 deletions taxcalc/validation/taxsim/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@ YY=${LYY:1:2}
tclsh taxsim_in.tcl 20$YY $L > $LYY.in
# Generate simtax.py OUTPUT for specified INPUT and REFORM
if [[ "$REFORM" == "." ]] ; then
python simtax.py --taxsim2441 $LYY.in
python ../../../simtax.py --taxsim2441 $LYY.in
SUFFIX=""
OVAR4=""
else
RJSON="reform-$REFORM.json"
python simtax.py --taxsim2441 --reform $RJSON $LYY.in
python ../../../simtax.py --taxsim2441 --reform $RJSON $LYY.in
SUFFIX="-reform-$REFORM"
OVAR4="--ovar4"
fi
Expand Down

0 comments on commit 0f1c1c7

Please sign in to comment.