Merge pull request #1148 from martinholmer/split-reform-assump

Split policy reform and economic assumption parameters into two separate JSON files
PSLmodels · Jan 24, 2017 · b085c35 · b085c35
2 parents 5440f28 + 3305f62
commit b085c35
Show file tree

Hide file tree

Showing 22 changed files with 532 additions and 359 deletions.
diff --git a/inctax.py b/inctax.py
@@ -46,11 +46,18 @@ def main():
                         action="store_true")
     parser.add_argument('--reform',
                         help=('REFORM is name of optional file that contains '
-                              'tax reform "policy" parameters and "behavior" '
-                              'parameters and "growth" parameters; the '
-                              'REFORM file is specified using JSON that may '
-                              'include //-comments. No --reform implies use '
-                              'of current-law policy.'),
+                              'reform "policy" parameters; the REFORM file '
+                              'is specified using JSON that may include '
+                              '//-comments. No --reform implies use of '
+                              'current-law policy.'),
+                        default=None)
+    parser.add_argument('--assump',
+                        help=('ASSUMP is name of optional file that contains '
+                              'economic assumption parameters ("behavior", '
+                              '"consumption" and "growth" parameters); the '
+                              'ASSUMP file is specified using JSON that may '
+                              'include //-comments. No --assump implies use '
+                              'of static analysis assumptions.'),
                         default=None)
     parser.add_argument('--exact',
                         help=('optional flag to suppress smoothing in income '
@@ -150,10 +157,16 @@ def main():
         sys.stderr.write('ERROR: must specify TAXYEAR >= 2013;\n')
         sys.stderr.write('USAGE: python inctax.py --help\n')
         return 1
+    # check consistency of REFORM and ASSUMP options
+    if args.assump and not args.reform:
+        sys.stderr.write('ERROR: cannot specify ASSUMP without a REFORM\n')
+        sys.stderr.write('USAGE: python inctax.py --help\n')
+        return 1
     # instantiate IncometaxIO object and do federal income tax calculations
     inctax = IncomeTaxIO(input_data=args.INPUT,
                          tax_year=args.TAXYEAR,
                          reform=args.reform,
+                         assump=args.assump,
                          exact_calculations=args.exact,
                          blowup_input_data=args.blowup,
                          output_weights=args.weights,

diff --git a/taxcalc/calculate.py b/taxcalc/calculate.py
@@ -398,53 +398,66 @@ def current_law_version(self):
         return calc
 
     @staticmethod
-    def read_json_reform_file(reform_filename):
+    def read_json_param_files(reform_filename, assump_filename):
         """
-        Read JSON reform file and call Calculator.read_json_reform_text method.
+        Read JSON files and call Calculator.read_json_*_text methods.
         """
-        if os.path.isfile(reform_filename):
+        if reform_filename is None:
+            rpol_dict = dict()
+        elif os.path.isfile(reform_filename):
             txt = open(reform_filename, 'r').read()
-            return Calculator.read_json_reform_text(txt)
+            rpol_dict = Calculator.read_json_policy_reform_text(txt)
         else:
-            msg = 'reform file {} could not be found'
+            msg = 'policy reform file {} could not be found'
             raise ValueError(msg.format(reform_filename))
+        if assump_filename is None:
+            behv_dict = dict()
+            cons_dict = dict()
+            grow_dict = dict()
+        elif os.path.isfile(assump_filename):
+            txt = open(assump_filename, 'r').read()
+            (behv_dict,
+             cons_dict,
+             grow_dict) = Calculator.read_json_econ_assump_text(txt)
+        else:
+            msg = 'economic assumption file {} could not be found'
+            raise ValueError(msg.format(assump_filename))
+        return (rpol_dict, behv_dict, cons_dict, grow_dict)
+
+    REQUIRED_REFORM_KEYS = set(['policy'])
+    REQUIRED_ASSUMP_KEYS = set(['behavior', 'consumption', 'growth'])
 
     @staticmethod
-    def read_json_reform_text(text_string):
+    def read_json_policy_reform_text(text_string):
         """
-        Strip //-comments from text_string and return 4 dict based on the JSON.
-        The reform text is JSON with four high-level string:object pairs:
-           a "policy": {...} pair,
-           a "behavior": {...} pair,
-           a "growth": {...} pair, and
-           a "consumption": {...} pair.
-           In all four cases the {...} object may be empty (that is, be {}),
-           or may contain one or more pairs with parameter string primary keys
-           and string years as secondary keys.  See tests/test_calculate.py for
-           an extended example of a commented JSON reform text that can be read
-           by this method.  Note that parameter code in the policy object is
-           enclosed inside a pair of double pipe characters (||) as shown
-           in the REFORM_CONTENTS string in the tests/test_calculate.py file.
-        Returned dictionaries (reform_policy, reform_behavior,
-                               reform_growth reform_consumption)
-           have integer years as primary keys
+        Strip //-comments from text_string and return 1 dict based on the JSON.
+        Specified text is JSON with at least 1 high-level string:object pair:
+          a "policy": {...} pair.
+          Other high-level pairs will be ignored by this method, except that
+          a "behavior", "consumption" or "growth" key will raise a ValueError.
+          The {...}  object may be empty (that is, be {}), or
+          may contain one or more pairs with parameter string primary keys
+          and string years as secondary keys.  See tests/test_calculate.py for
+          an extended example of a commented JSON policy reform text
+          that can be read by this method.
+        Note that parameter code in the policy object is enclosed inside a
+          pair of double pipe characters (||) as shown in the REFORM_CONTENTS
+          string in the tests/test_calculate.py file.
+        Returned dictionary rpol_dict
+           has integer years as primary keys
            and string parameters as secondary keys.
-        The returned dictionaries are suitable as the argument to
-           the Policy implement_reform(reform_policy) method, or
-           the Behavior update_behavior(reform_behavior) method, or
-           the Growth update_growth(reform_growth) method, or
-           the Consumption update_consumption(reform_consumption) method.
+        The returned dictionary is suitable as the argument to
+           the Policy implement_reform(rpol_dict) method.
         """
+        # define function used by re.sub to process parameter code
+        def repl_func(mat):
+            code = mat.group(2).replace('\r', '\\r').replace('\n', '\\n')
+            return '"' + code + '"'
         # strip out //-comments without changing line numbers
         json_without_comments = re.sub('//.*', ' ', text_string)
         # convert multi-line string between pairs of || into a simple string
-
-        def repl(mat):
-            code = mat.group(2).replace('\r', '\\r').replace('\n', '\\n')
-            return '"' + code + '"'
-
         json_str = re.sub('(\|\|)(.*?)(\|\|)',  # pylint: disable=W1401
-                          repl, json_without_comments, flags=re.DOTALL)
+                          repl_func, json_without_comments, flags=re.DOTALL)
         # convert JSON text into a Python dictionary
         try:
             raw_dict = json.loads(json_str)
@@ -462,12 +475,16 @@ def repl(mat):
                 msg += '{:02d}{}'.format(linenum, line) + '\n'
             msg += bline + '\n'
             raise ValueError(msg)
-        # check contents of dictionary
-        expect_keys = set(['policy', 'behavior', 'growth', 'consumption'])
-        actual_keys = set(raw_dict.keys())
-        if actual_keys != expect_keys:
-            msg = 'reform keys {} not equal to {}'
-            raise ValueError(msg.format(actual_keys, expect_keys))
+        # check key contents of dictionary
+        actual_keys = raw_dict.keys()
+        for rkey in Calculator.REQUIRED_REFORM_KEYS:
+            if rkey not in actual_keys:
+                msg = 'key "{}" is not in policy reform file'
+                raise ValueError(msg.format(rkey))
+        for rkey in actual_keys:
+            if rkey in Calculator.REQUIRED_ASSUMP_KEYS:
+                msg = 'key "{}" should be in economic assumption file'
+                raise ValueError(msg.format(rkey))
         # handle special param_code key in raw_dict policy component dictionary
         paramcode = raw_dict['policy'].pop('param_code', None)
         if paramcode:
@@ -476,34 +493,91 @@ def repl(mat):
                 raise ValueError(msg)
             for param, code in paramcode.items():
                 raw_dict['policy'][param] = {'0': code}
-        # convert raw_dict component dictionaries
-        pol_dict = Calculator.convert_reform_dict(raw_dict['policy'])
-        beh_dict = Calculator.convert_reform_dict(raw_dict['behavior'])
-        gro_dict = Calculator.convert_reform_dict(raw_dict['growth'])
-        con_dict = Calculator.convert_reform_dict(raw_dict['consumption'])
-        return (pol_dict, beh_dict, gro_dict, con_dict)
+        # convert the policy dictionary in raw_dict
+        rpol_dict = Calculator.convert_parameter_dict(raw_dict['policy'])
+        return rpol_dict
+
+    @staticmethod
+    def read_json_econ_assump_text(text_string):
+        """
+        Strip //-comments from text_string and return 3 dict based on the JSON.
+        Specified text is JSON with at least 3 high-level string:object pairs:
+          a "behavior": {...} pair,
+          a "consumption": {...} pair, and
+          a "growth": {...} pair.
+          Other high-level pairs will be ignored by this method, except that
+          a "policy" key will raise a ValueError.
+          The {...}  object may be empty (that is, be {}), or
+          may contain one or more pairs with parameter string primary keys
+          and string years as secondary keys.  See tests/test_calculate.py for
+          an extended example of a commented JSON economic assumption text
+          that can be read by this method.
+        Note that an example is shown in the ASSUMP_CONTENTS string in
+          tests/test_calculate.py file.
+        Returned dictionaries (behv_dict, cons_dict, grow_dict)
+           have integer years as primary keys
+           and string parameters as secondary keys.
+        The returned dictionaries are suitable as the arguments to
+           the Behavior update_behavior(behv_dict) method, or
+           the Consumption update_consumption(cons_dict) method, or
+           the Growth update_growth(grow_dict) method.
+        """
+        # strip out //-comments without changing line numbers
+        json_str = re.sub('//.*', ' ', text_string)
+        # convert JSON text into a Python dictionary
+        try:
+            raw_dict = json.loads(json_str)
+        except ValueError as valerr:
+            msg = 'Economic assumption text below contains invalid JSON:\n'
+            msg += str(valerr) + '\n'
+            msg += 'Above location of the first error may be approximate.\n'
+            msg += 'The invalid JSON asssump text is between the lines:\n'
+            bline = 'XX----.----1----.----2----.----3----.----4'
+            bline += '----.----5----.----6----.----7'
+            msg += bline + '\n'
+            linenum = 0
+            for line in json_str.split('\n'):
+                linenum += 1
+                msg += '{:02d}{}'.format(linenum, line) + '\n'
+            msg += bline + '\n'
+            raise ValueError(msg)
+        # check key contents of dictionary
+        actual_keys = raw_dict.keys()
+        for rkey in Calculator.REQUIRED_ASSUMP_KEYS:
+            if rkey not in actual_keys:
+                msg = 'key "{}" is not in economic assumption file'
+                raise ValueError(msg.format(rkey))
+        for rkey in actual_keys:
+            if rkey in Calculator.REQUIRED_REFORM_KEYS:
+                msg = 'key "{}" should be in policy reform file'
+                raise ValueError(msg.format(rkey))
+        # convert the assumption dictionaries in raw_dict
+        behv_dict = Calculator.convert_parameter_dict(raw_dict['behavior'])
+        cons_dict = Calculator.convert_parameter_dict(raw_dict['consumption'])
+        grow_dict = Calculator.convert_parameter_dict(raw_dict['growth'])
+        return (behv_dict, cons_dict, grow_dict)
 
     @staticmethod
-    def convert_reform_dict(param_key_dict):
+    def convert_parameter_dict(param_key_dict):
         """
         Converts specified param_key_dict into a dictionary whose primary
           keys are calendary years, and hence, is suitable as the argument to
           the Policy implement_reform(reform_policy) method, or
           the Behavior update_behavior(reform_behavior) method, or
-          the Growth update_growth(reform_growth) method, or
-          the Consumption update_consumption(reform_consumption) method.
+          the Consumption update_consumption(reform_consumption) method, or
+          the Growth update_growth(reform_growth) method.
         Specified input dictionary has string parameter primary keys and
            string years as secondary keys.
         Returned dictionary has integer years as primary keys and
            string parameters as secondary keys.
         """
         # convert year skey strings to integers and lists into np.arrays
-        reform_pkey_param = {}
+        year_param = dict()
         for pkey, sdict in param_key_dict.items():
             if not isinstance(pkey, six.string_types):
                 msg = 'pkey {} in reform is not a string'
                 raise ValueError(msg.format(pkey))
-            rdict = {}
+            rdict = dict()
             if not isinstance(sdict, dict):
                 msg = 'pkey {} in reform is not paired with a dict'
                 raise ValueError(msg.format(pkey))
@@ -515,14 +589,14 @@ def convert_reform_dict(param_key_dict):
                     year = int(skey)
                 rdict[year] = (np.array(val)
                                if isinstance(val, list) else val)
-            reform_pkey_param[pkey] = rdict
-        # convert reform_pkey_param dictionary to reform_pkey_year dictionary
+            year_param[pkey] = rdict
+        # convert year_param dictionary to year_key_dict dictionary
+        year_key_dict = dict()
         years = set()
-        reform_pkey_year = dict()
-        for param, sdict in reform_pkey_param.items():
+        for param, sdict in year_param.items():
             for year, val in sdict.items():
                 if year not in years:
                     years.add(year)
-                    reform_pkey_year[year] = {}
-                reform_pkey_year[year][param] = val
-        return reform_pkey_year
+                    year_key_dict[year] = dict()
+                year_key_dict[year][param] = val
+        return year_key_dict
diff --git a/taxcalc/incometaxio.py b/taxcalc/incometaxio.py
@@ -47,6 +47,11 @@ class IncomeTaxIO(object):
         string is name of optional REFORM file, or
         dictionary suitable for passing to Policy.implement_reform() method.
 
+    assump: None or string
+        None implies economic assumptions are baseline and statuc analysis
+        of reform is conducted, or
+        string is name of optional ASSUMP file.
+
     exact_calculations: boolean
         specifies whether or not exact tax calculations are done without
         any smoothing of "stair-step" provisions in income tax law.
@@ -72,6 +77,7 @@ class IncomeTaxIO(object):
     ValueError:
         if file specified by input_data string does not exist.
         if reform is neither None, string, nor dictionary.
+        if assump is neither None nor string.
         if tax_year before Policy start_year.
         if tax_year after Policy end_year.
 
@@ -80,7 +86,7 @@ class IncomeTaxIO(object):
     class instance: IncomeTaxIO
     """
 
-    def __init__(self, input_data, tax_year, reform,
+    def __init__(self, input_data, tax_year, reform, assump,
                  exact_calculations,
                  blowup_input_data, output_weights,
                  output_records, csv_dump):
@@ -106,6 +112,18 @@ def __init__(self, input_data, tax_year, reform,
             msg = 'INPUT is neither string nor Pandas DataFrame'
             raise ValueError(msg)
         # construct output_filename and delete old output file if it exists
+        if assump is None:
+            asm = ''
+            self._assump = False
+        elif isinstance(assump, six.string_types):
+            if assump.endswith('.json'):
+                asm = '-{}'.format(assump[:-5])
+            else:
+                asm = '-{}'.format(assump)
+            self._assump = True
+        else:
+            msg = 'IncomeTaxIO.ctor assump is neither None nor str'
+            raise ValueError(msg)
         if reform is None:
             ref = ''
             self._reform = False
@@ -125,11 +143,11 @@ def __init__(self, input_data, tax_year, reform,
                 msg = 'IncomeTaxIO.ctor reform is neither None, str, nor dict'
                 raise ValueError(msg)
         if output_records:
-            self._output_filename = '{}.records{}'.format(inp, ref)
+            self._output_filename = '{}.records{}{}'.format(inp, ref, asm)
         elif csv_dump:
-            self._output_filename = '{}.csvdump{}'.format(inp, ref)
+            self._output_filename = '{}.csvdump{}{}'.format(inp, ref, asm)
         else:
-            self._output_filename = '{}.out-inctax{}'.format(inp, ref)
+            self._output_filename = '{}.out-inctax{}{}'.format(inp, ref, asm)
         if os.path.isfile(self._output_filename):
             os.remove(self._output_filename)
         # check for existence of INPUT file
@@ -146,14 +164,21 @@ def __init__(self, input_data, tax_year, reform,
         if tax_year > pol.end_year:
             msg = 'tax_year {} greater than policy.end_year {}'
             raise ValueError(msg.format(tax_year, pol.end_year))
-        # implement reform if one is specified
+        # implement reform and assump if specified
+        ref_d = dict()
+        beh_d = dict()
+        con_d = dict()
+        gro_d = dict()
         if self._reform:
             if self._using_reform_file:
-                (r_pol, r_beh,
-                 r_gro, r_con) = Calculator.read_json_reform_file(reform)
+                (ref_d, beh_d, con_d,
+                 gro_d) = Calculator.read_json_param_files(reform, assump)
             else:
-                r_pol = reform
-            pol.implement_reform(r_pol)
+                ref_d = reform
+                beh_d = dict()
+                con_d = dict()
+                gro_d = dict()
+            pol.implement_reform(ref_d)
         # set tax policy parameters to specified tax_year
         pol.set_year(tax_year)
         # read input file contents into Records object
@@ -183,16 +208,16 @@ def __init__(self, input_data, tax_year, reform,
             clp.set_year(tax_year)
             recs_clp = copy.deepcopy(recs)
             con = Consumption()
-            con.update_consumption(r_con)
+            con.update_consumption(con_d)
             gro = Growth()
-            gro.update_growth(r_gro)
+            gro.update_growth(gro_d)
             self._calc_clp = Calculator(policy=clp, records=recs_clp,
                                         verbose=False,
                                         consumption=con,
                                         growth=gro,
                                         sync_years=blowup_input_data)
             beh = Behavior()
-            beh.update_behavior(r_beh)
+            beh.update_behavior(beh_d)
             self._calc = Calculator(policy=pol, records=recs,
                                     verbose=True,
                                     behavior=beh,