From e4ac4d30d3b4f2a3a6f1cc69cab1b399f3718879 Mon Sep 17 00:00:00 2001
From: jdebacker <jason.debacker@gmail.com>
Date: Sat, 18 May 2024 14:57:29 -0400
Subject: [PATCH 01/12] add HSV functions to model

---
 ogcore/txfunc.py | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/ogcore/txfunc.py b/ogcore/txfunc.py
index ac0990b1d..1b208a6f4 100644
--- a/ogcore/txfunc.py
+++ b/ogcore/txfunc.py
@@ -106,6 +106,15 @@ def get_tax_rates(
                     * ((income**-phi1) + phi2) ** ((-1 - phi1) / phi1)
                 )
             )
+    if tax_func_type == "HSV":
+        lambda_s, tau_s = (
+            np.squeeze(params[..., 0]),
+            np.squeeze(params[..., 1]),
+        )
+        if rate_type == "etr":
+            txrates = 1 - (lambda_s * (income ** (-tau_s)))
+        else:  # marginal tax rate function
+            txrates = 1 - (lambda_s * (1 - tau_s) * (income ** (-tau_s)))
     elif tax_func_type == "DEP":
         (
             A,
@@ -774,6 +783,29 @@ def txfunc_est(
         params = np.zeros(numparams)
         params[:3] = np.array([phi0til, phi1til, phi2til])
         params_to_plot = params
+    elif tax_func_type == "HSV":
+        # '''
+        # Estimate Heathcote, Storesletten, Violante (2017) parameters via
+        # OLS
+        # '''
+        constant = np.ones_like(income)
+        ln_income = np.log(income)
+        X = np.column_stack((constant, ln_income))
+        Y = 1 - txrates
+        param_est = np.linalg.inv(X.T @ X) @ X.T @ Y
+        params = np.zeros(numparams)
+        if rate_type == "etr":
+            ln_lambda_s_hat, minus_tau_s_hat = param_est
+            params[:2] = np.array([np.exp(ln_lambda_s_hat), -minus_tau_s_hat])
+        else:
+            constant, minus_tau_s_hat = param_est
+            lambda_s_hat = np.exp(constant - np.log(1 + minus_tau_s_hat))
+            params[:2] = np.array([lambda_s_hat, -minus_tau_s_hat])
+        # Calculate the WSSE
+        Y_hat = X @ params
+        wsse = ((Y - Y_hat) ** 2 * wgts).sum()
+        obs = df.shape[0]
+        params_to_plot = params
     elif tax_func_type == "linear":
         # '''
         # For linear rates, just take the mean ETR or MTR by age-year.
@@ -1438,6 +1470,7 @@ def tax_func_estimate(
         "DEP": 12,
         "DEP_totalinc": 6,
         "GS": 3,
+        "HSV": 2,
         "linear": 1,
         "mono": 1,
         "mono2D": 1,

From e4f7775ee7248351df06e525399486e9533609af Mon Sep 17 00:00:00 2001
From: jdebacker <jason.debacker@gmail.com>
Date: Sat, 18 May 2024 15:06:14 -0400
Subject: [PATCH 02/12] make log of net of tax rate

---
 ogcore/txfunc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ogcore/txfunc.py b/ogcore/txfunc.py
index 1b208a6f4..3fc293c2b 100644
--- a/ogcore/txfunc.py
+++ b/ogcore/txfunc.py
@@ -791,7 +791,7 @@ def txfunc_est(
         constant = np.ones_like(income)
         ln_income = np.log(income)
         X = np.column_stack((constant, ln_income))
-        Y = 1 - txrates
+        Y = np.log(1 - txrates)
         param_est = np.linalg.inv(X.T @ X) @ X.T @ Y
         params = np.zeros(numparams)
         if rate_type == "etr":

From 7ffda26f6de2a88caf4975cf3ae8ec0f35133300 Mon Sep 17 00:00:00 2001
From: jdebacker <jason.debacker@gmail.com>
Date: Sun, 19 May 2024 20:53:36 -0400
Subject: [PATCH 03/12] initial value from global optimizer

---
 ogcore/txfunc.py | 301 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 229 insertions(+), 72 deletions(-)

diff --git a/ogcore/txfunc.py b/ogcore/txfunc.py
index 3fc293c2b..1e7ad0953 100644
--- a/ogcore/txfunc.py
+++ b/ogcore/txfunc.py
@@ -568,7 +568,8 @@ def replace_outliers(param_list, sse_big_mat):
 
 
 def txfunc_est(
-    df, s, t, rate_type, tax_func_type, numparams, output_dir, graph
+    df, s, t, rate_type, tax_func_type, numparams, output_dir, graph,
+    params_init=None, global_opt=False
 ):
     """
     This function uses tax tax rate and income data for individuals of a
@@ -629,31 +630,33 @@ def txfunc_est(
         # Estimate DeBacker, Evans, Phillips (2018) ratio of polynomial
         # tax functions.
         # '''
-        Atil_init = 1.0
-        Btil_init = 1.0
-        Ctil_init = 1.0
-        Dtil_init = 1.0
-        max_x_init = np.minimum(
-            txrates[(df["total_capinc"] < y_20pctl)].max(), MAX_ETR + 0.05
-        )
-        max_y_init = np.minimum(
-            txrates[(df["total_labinc"] < x_20pctl)].max(), MAX_ETR + 0.05
-        )
-        shift = txrates[
-            (df["total_labinc"] < x_20pctl) | (df["total_capinc"] < y_20pctl)
-        ].min()
-        share_init = 0.5
-        params_init = np.array(
-            [
-                Atil_init,
-                Btil_init,
-                Ctil_init,
-                Dtil_init,
-                max_x_init,
-                max_y_init,
-                share_init,
-            ]
-        )
+        # if Atil_init not exist, set to 1.0
+        if params_init is None:
+            Atil_init = 1.0
+            Btil_init = 1.0
+            Ctil_init = 1.0
+            Dtil_init = 1.0
+            max_x_init = np.minimum(
+                txrates[(df["total_capinc"] < y_20pctl)].max(), MAX_ETR + 0.05
+            )
+            max_y_init = np.minimum(
+                txrates[(df["total_labinc"] < x_20pctl)].max(), MAX_ETR + 0.05
+            )
+            shift = txrates[
+                (df["total_labinc"] < x_20pctl) | (df["total_capinc"] < y_20pctl)
+            ].min()
+            share_init = 0.5
+            params_init = np.array(
+                [
+                    Atil_init,
+                    Btil_init,
+                    Ctil_init,
+                    Dtil_init,
+                    max_x_init,
+                    max_y_init,
+                    share_init,
+                ]
+            )
         shift_x = 0.0  # temp value
         shift_y = 0.0  # temp value
         tx_objs = (
@@ -676,14 +679,22 @@ def txfunc_est(
             (lb_max_y, MAX_ETR + 0.15),
             (0, 1),
         )
-        params_til = opt.minimize(
-            wsumsq,
-            params_init,
-            args=(tx_objs),
-            method="L-BFGS-B",
-            bounds=bnds,
-            tol=1e-15,
-        )
+        if global_opt:
+            params_til = opt.differential_evolution(
+                wsumsq,
+                bounds=bnds,
+                args=(tx_objs),
+                seed=1
+                )
+        else:
+            params_til = opt.minimize(
+                wsumsq,
+                params_init,
+                args=(tx_objs),
+                method="L-BFGS-B",
+                bounds=bnds,
+                tol=1e-15,
+            )
         Atil, Btil, Ctil, Dtil, max_x, max_y, share = params_til.x
         # message = ("(max_x, min_x)=(" + str(max_x) + ", " + str(min_x) +
         #     "), (max_y, min_y)=(" + str(max_y) + ", " + str(min_y) + ")")
@@ -700,27 +711,40 @@ def txfunc_est(
             [max_x, max_y, share, min_x, min_y, shift_x, shift_y, shift]
         )
         params_to_plot = params
+        # set initial values to parameter estimates
+        params_init = np.array(
+                [
+                    Atil,
+                    Btil,
+                    Ctil,
+                    Dtil,
+                    max_x,
+                    max_y,
+                    share,
+                ]
+            )
     elif tax_func_type == "DEP_totalinc":
         # '''
         # Estimate DeBacker, Evans, Phillips (2018) ratio of polynomial
         # tax functions as a function of total income.
         # '''
-        Atil_init = 1.0
-        Btil_init = 1.0
-        max_x_init = np.minimum(
-            txrates[(df["total_capinc"] < y_20pctl)].max(), MAX_ETR + 0.05
-        )
-        max_y_init = np.minimum(
-            txrates[(df["total_labinc"] < x_20pctl)].max(), MAX_ETR + 0.05
-        )
-        max_income_init = max(max_x_init, max_y_init)
-        min_income = min(min_x, min_y)
-        shift = txrates[
-            (df["total_labinc"] < x_20pctl) | (df["total_capinc"] < y_20pctl)
-        ].min()
-        share_init = 0.5
-        shift_inc = 0.0  # temp value
-        params_init = np.array([Atil_init, Btil_init, max_income_init])
+        if params_init is None:
+            Atil_init = 1.0
+            Btil_init = 1.0
+            max_x_init = np.minimum(
+                txrates[(df["total_capinc"] < y_20pctl)].max(), MAX_ETR + 0.05
+            )
+            max_y_init = np.minimum(
+                txrates[(df["total_labinc"] < x_20pctl)].max(), MAX_ETR + 0.05
+            )
+            max_income_init = max(max_x_init, max_y_init)
+            min_income = min(min_x, min_y)
+            shift = txrates[
+                (df["total_labinc"] < x_20pctl) | (df["total_capinc"] < y_20pctl)
+            ].min()
+            share_init = 0.5
+            shift_inc = 0.0  # temp value
+            params_init = np.array([Atil_init, Btil_init, max_income_init])
         tx_objs = (
             np.array([min_income, shift_inc, shift]),
             X,
@@ -732,14 +756,22 @@ def txfunc_est(
         )
         lb_max_income = np.maximum(min_income, 0.0) + 1e-4
         bnds = ((1e-12, None), (1e-12, None), (lb_max_income, MAX_ETR + 0.15))
-        params_til = opt.minimize(
-            wsumsq,
-            params_init,
-            args=(tx_objs),
-            method="L-BFGS-B",
-            bounds=bnds,
-            tol=1e-15,
-        )
+        if global_opt:
+            params_til = opt.differential_evolution(
+                wsumsq,
+                bounds=bnds,
+                args=(tx_objs),
+                seed=1
+                )
+        else:
+            params_til = opt.minimize(
+                wsumsq,
+                params_init,
+                args=(tx_objs),
+                method="L-BFGS-B",
+                bounds=bnds,
+                tol=1e-15,
+            )
         Atil, Btil, max_income = params_til.x
         wsse = params_til.fun
         obs = df.shape[0]
@@ -750,15 +782,19 @@ def txfunc_est(
         params[:2] = np.array([Atil, Btil]) / np.array([income2bar, Ibar])
         params[2:] = np.array([max_income, min_income, shift_income, shift])
         params_to_plot = params
+        # set initial values to parameter estimates
+        params_init = np.array([Atil, Btil, max_income])
     elif tax_func_type == "GS":
         # '''
         # Estimate Gouveia-Strauss parameters via least squares.
         # Need to use a different functional form than for DEP function.
         # '''
-        phi0_init = 1.0
-        phi1_init = 1.0
-        phi2_init = 1.0
-        params_init = np.array([phi0_init, phi1_init, phi2_init])
+        if params_init is None:
+            phi0_init = 1.0
+            phi1_init = 1.0
+            phi2_init = 1.0
+            params_init = np.array([phi0_init, phi1_init, phi2_init])
+        print("Initial phi0, phi1, phi2: ", params_init)
         tx_objs = (
             np.array([None]),
             X,
@@ -768,21 +804,32 @@ def txfunc_est(
             tax_func_type,
             rate_type,
         )
-        bnds = ((1e-12, None), (1e-12, None), (1e-12, None))
-        params_til = opt.minimize(
-            wsumsq,
-            params_init,
-            args=(tx_objs),
-            method="L-BFGS-B",
-            bounds=bnds,
-            tol=1e-15,
-        )
+        # bnds = ((1e-12, None), (1e-12, None), (1e-12, None))
+        bnds = ((1e-12, 9999), (1e-12, 9999), (1e-12, 9999))
+        if global_opt:
+            params_til = opt.differential_evolution(
+                wsumsq,
+                bounds=bnds,
+                args=(tx_objs),
+                seed=1
+                )
+        else:
+            params_til = opt.minimize(
+                wsumsq,
+                params_init,
+                args=(tx_objs),
+                method="L-BFGS-B",
+                bounds=bnds,
+                tol=1e-15,
+            )
         phi0til, phi1til, phi2til = params_til.x
         wsse = params_til.fun
         obs = df.shape[0]
         params = np.zeros(numparams)
         params[:3] = np.array([phi0til, phi1til, phi2til])
         params_to_plot = params
+        # set initial values to parameter estimates
+        params_init = np.array([phi0til, phi1til, phi2til])
     elif tax_func_type == "HSV":
         # '''
         # Estimate Heathcote, Storesletten, Violante (2017) parameters via
@@ -879,7 +926,7 @@ def txfunc_est(
     # Garbage collection
     del df, txrates
 
-    return params, wsse, obs
+    return params, wsse, obs, params_init
 
 
 def tax_data_sample(
@@ -1055,6 +1102,110 @@ def tax_func_loop(
     NoData_cnt = np.min(min_age - s_min, 0)
 
     # Each age s must be done in serial
+    # Set initial values
+    # TODO: update this, so if using DEP or GS initial parameters are estimated on all ages first
+    if tax_func_type in ["DEP", "DEP_totalinc", "GS"]:
+        s = 0
+        df = data
+        df_etr = df.loc[
+            df[
+                (np.isfinite(df["etr"]))
+                & (np.isfinite(df["total_labinc"]))
+                & (np.isfinite(df["total_capinc"]))
+                & (np.isfinite(df["weight"]))
+            ].index,
+            [
+                "mtr_labinc",
+                "mtr_capinc",
+                "total_labinc",
+                "total_capinc",
+                "etr",
+                "weight",
+            ],
+        ].copy()
+        df_mtrx = df.loc[
+            df[
+                (np.isfinite(df["mtr_labinc"]))
+                & (np.isfinite(df["total_labinc"]))
+                & (np.isfinite(df["total_capinc"]))
+                & (np.isfinite(df["weight"]))
+            ].index,
+            ["mtr_labinc", "total_labinc", "total_capinc", "weight"],
+        ].copy()
+        df_mtry = df.loc[
+            df[
+                (np.isfinite(df["mtr_capinc"]))
+                & (np.isfinite(df["total_labinc"]))
+                & (np.isfinite(df["total_capinc"]))
+                & (np.isfinite(df["weight"]))
+            ].index,
+            ["mtr_capinc", "total_labinc", "total_capinc", "weight"],
+        ].copy()
+        # Estimate effective tax rate function ETR(x,y)
+        (
+            etrparams,
+            etr_wsumsq_arr[s - s_min],
+            etr_obs_arr[s - s_min],
+            params_init_etr
+        ) = txfunc_est(
+            df_etr,
+            s,
+            t,
+            "etr",
+            tax_func_type,
+            numparams,
+            output_dir,
+            False,
+            None,
+            True
+        )
+        etrparam_list[s - s_min] = etrparams
+        del df_etr
+
+        # Estimate marginal tax rate of labor income function
+        # MTRx(x,y)
+        (
+            mtrxparams,
+            mtrx_wsumsq_arr[s - s_min],
+            mtrx_obs_arr[s - s_min],
+            params_init_mtrx
+        ) = txfunc_est(
+            df_mtrx,
+            s,
+            t,
+            "mtrx",
+            tax_func_type,
+            numparams,
+            output_dir,
+            False,
+            None,
+            True
+        )
+        del df_mtrx
+        # Estimate marginal tax rate of capital income function
+        # MTRy(x,y)
+        (
+            mtryparams,
+            mtry_wsumsq_arr[s - s_min],
+            mtry_obs_arr[s - s_min],
+            params_init_mtry
+        ) = txfunc_est(
+            df_mtry,
+            s,
+            t,
+            "mtry",
+            tax_func_type,
+            numparams,
+            output_dir,
+            False,
+            None,
+            True
+        )
+        mtryparam_list[s - s_min] = mtryparams
+    else:
+        params_init_etr = None
+        params_init_mtrx = None
+        params_init_mtry = None
     for s in ages_list:
         if age_specific:
             print("Year=", t, "Age=", s)
@@ -1199,6 +1350,7 @@ def tax_func_loop(
                 etrparams,
                 etr_wsumsq_arr[s - s_min],
                 etr_obs_arr[s - s_min],
+                params_init
             ) = txfunc_est(
                 df_etr,
                 s,
@@ -1208,6 +1360,7 @@ def tax_func_loop(
                 numparams,
                 output_dir,
                 graph_est,
+                params_init_etr
             )
             etrparam_list[s - s_min] = etrparams
             del df_etr
@@ -1218,6 +1371,7 @@ def tax_func_loop(
                 mtrxparams,
                 mtrx_wsumsq_arr[s - s_min],
                 mtrx_obs_arr[s - s_min],
+                params_init
             ) = txfunc_est(
                 df_mtrx,
                 s,
@@ -1227,6 +1381,7 @@ def tax_func_loop(
                 numparams,
                 output_dir,
                 graph_est,
+                params_init_mtrx
             )
             mtrxparam_list[s - s_min] = mtrxparams
             del df_mtrx
@@ -1236,6 +1391,7 @@ def tax_func_loop(
                 mtryparams,
                 mtry_wsumsq_arr[s - s_min],
                 mtry_obs_arr[s - s_min],
+                params_init
             ) = txfunc_est(
                 df_mtry,
                 s,
@@ -1245,6 +1401,7 @@ def tax_func_loop(
                 numparams,
                 output_dir,
                 graph_est,
+                params_init_mtry
             )
             mtryparam_list[s - s_min] = mtryparams
 

From abe58791012cb9bb952497f9a0961ba3d75ae41b Mon Sep 17 00:00:00 2001
From: jdebacker <jason.debacker@gmail.com>
Date: Mon, 20 May 2024 13:55:46 -0400
Subject: [PATCH 04/12] update bounds for diff evo

---
 ogcore/txfunc.py | 36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/ogcore/txfunc.py b/ogcore/txfunc.py
index 1e7ad0953..54f1888e1 100644
--- a/ogcore/txfunc.py
+++ b/ogcore/txfunc.py
@@ -642,9 +642,6 @@ def txfunc_est(
             max_y_init = np.minimum(
                 txrates[(df["total_labinc"] < x_20pctl)].max(), MAX_ETR + 0.05
             )
-            shift = txrates[
-                (df["total_labinc"] < x_20pctl) | (df["total_capinc"] < y_20pctl)
-            ].min()
             share_init = 0.5
             params_init = np.array(
                 [
@@ -657,6 +654,9 @@ def txfunc_est(
                     share_init,
                 ]
             )
+        shift = txrates[
+                (df["total_labinc"] < x_20pctl) | (df["total_capinc"] < y_20pctl)
+            ].min()
         shift_x = 0.0  # temp value
         shift_y = 0.0  # temp value
         tx_objs = (
@@ -670,11 +670,20 @@ def txfunc_est(
         )
         lb_max_x = np.maximum(min_x, 0.0) + 1e-4
         lb_max_y = np.maximum(min_y, 0.0) + 1e-4
+        # bnds = (
+        #     (1e-12, None),
+        #     (1e-12, None),
+        #     (1e-12, None),
+        #     (1e-12, None),
+        #     (lb_max_x, MAX_ETR + 0.15),
+        #     (lb_max_y, MAX_ETR + 0.15),
+        #     (0, 1),
+        # )
         bnds = (
-            (1e-12, None),
-            (1e-12, None),
-            (1e-12, None),
-            (1e-12, None),
+            (1e-12, 9999),
+            (1e-12, 9999),
+            (1e-12, 9999),
+            (1e-12, 9999),
             (lb_max_x, MAX_ETR + 0.15),
             (lb_max_y, MAX_ETR + 0.15),
             (0, 1),
@@ -738,13 +747,13 @@ def txfunc_est(
                 txrates[(df["total_labinc"] < x_20pctl)].max(), MAX_ETR + 0.05
             )
             max_income_init = max(max_x_init, max_y_init)
-            min_income = min(min_x, min_y)
-            shift = txrates[
-                (df["total_labinc"] < x_20pctl) | (df["total_capinc"] < y_20pctl)
-            ].min()
             share_init = 0.5
-            shift_inc = 0.0  # temp value
             params_init = np.array([Atil_init, Btil_init, max_income_init])
+        shift = txrates[
+                (df["total_labinc"] < x_20pctl) | (df["total_capinc"] < y_20pctl)
+            ].min()
+        min_income = min(min_x, min_y)
+        shift_inc = 0.0  # temp value
         tx_objs = (
             np.array([min_income, shift_inc, shift]),
             X,
@@ -755,7 +764,8 @@ def txfunc_est(
             rate_type,
         )
         lb_max_income = np.maximum(min_income, 0.0) + 1e-4
-        bnds = ((1e-12, None), (1e-12, None), (lb_max_income, MAX_ETR + 0.15))
+        # bnds = ((1e-12, None), (1e-12, None), (lb_max_income, MAX_ETR + 0.15))
+        bnds = ((1e-12, 99999), (1e-12, 9999), (lb_max_income, MAX_ETR + 0.15))
         if global_opt:
             params_til = opt.differential_evolution(
                 wsumsq,

From 339282ae3d669e3eafde5984830c2739f5cd1b48 Mon Sep 17 00:00:00 2001
From: jdebacker <jason.debacker@gmail.com>
Date: Wed, 29 May 2024 17:12:45 -0400
Subject: [PATCH 05/12] test HSV rate functions

---
 tests/test_txfunc.py | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/tests/test_txfunc.py b/tests/test_txfunc.py
index 40afe7961..f687d2598 100644
--- a/tests/test_txfunc.py
+++ b/tests/test_txfunc.py
@@ -434,6 +434,7 @@ def test_tax_func_loop():
         assert np.allclose(test_tuple[i], v, atol=1e-06)
 
 
+# DEP parameters
 A = 0.02
 B = 0.01
 C = 0.003
@@ -444,12 +445,17 @@ def test_tax_func_loop():
 min_y = 0.05
 shift = 0.03
 share = 0.7
+shift_x = np.maximum(-min_x, 0.0) + 0.01 * (max_x - min_x)
+shift_y = np.maximum(-min_y, 0.0) + 0.01 * (max_y - min_y)
+# GS parameters
 phi0 = 0.6
 phi1 = 0.5
 phi2 = 0.6
-shift_x = np.maximum(-min_x, 0.0) + 0.01 * (max_x - min_x)
-shift_y = np.maximum(-min_y, 0.0) + 0.01 * (max_y - min_y)
+# Linear parameters
 avg_rate = 0.17
+# HSV parameters
+lambda_s = 0.5
+tau_s = 0.1
 
 
 @pytest.mark.parametrize(
@@ -606,6 +612,24 @@ def test_tax_func_loop():
             False,
             np.array([0.64187414, 0.63823569, 0.27160586, 0.09619512]),
         ),
+        (
+            "HSV",
+            "etr",
+            np.array([lambda_s, tau_s]),
+            True,
+            False,
+            False,
+            np.array([0.670123022, 0.684204102, 0.543778232, 0.455969612]),
+        ),
+        (
+            "HSV",
+            "mtr",
+            np.array([lambda_s, tau_s]),
+            True,
+            False,
+            False,
+            np.array([0.70311072, 0.715783692, 0.589400409, 0.510372651]),
+        ),
     ],
     ids=[
         "DEP for estimation",
@@ -618,6 +642,8 @@ def test_tax_func_loop():
         "DEP, analytical MTRs",
         "DEP analytical capital MTRs",
         "DEP_totalinc, analytical MTRs",
+        "HSV, etr",
+        "HSV, mtr",
     ],
 )
 def test_get_tax_rates(

From 7bab83ae181b6a106773737ec387b2d046fac795 Mon Sep 17 00:00:00 2001
From: jdebacker <jason.debacker@gmail.com>
Date: Wed, 29 May 2024 17:12:56 -0400
Subject: [PATCH 06/12] format

---
 ogcore/txfunc.py | 85 ++++++++++++++++++++++++------------------------
 1 file changed, 42 insertions(+), 43 deletions(-)

diff --git a/ogcore/txfunc.py b/ogcore/txfunc.py
index 54f1888e1..7a80d874d 100644
--- a/ogcore/txfunc.py
+++ b/ogcore/txfunc.py
@@ -568,8 +568,16 @@ def replace_outliers(param_list, sse_big_mat):
 
 
 def txfunc_est(
-    df, s, t, rate_type, tax_func_type, numparams, output_dir, graph,
-    params_init=None, global_opt=False
+    df,
+    s,
+    t,
+    rate_type,
+    tax_func_type,
+    numparams,
+    output_dir,
+    graph,
+    params_init=None,
+    global_opt=False,
 ):
     """
     This function uses tax tax rate and income data for individuals of a
@@ -655,8 +663,8 @@ def txfunc_est(
                 ]
             )
         shift = txrates[
-                (df["total_labinc"] < x_20pctl) | (df["total_capinc"] < y_20pctl)
-            ].min()
+            (df["total_labinc"] < x_20pctl) | (df["total_capinc"] < y_20pctl)
+        ].min()
         shift_x = 0.0  # temp value
         shift_y = 0.0  # temp value
         tx_objs = (
@@ -690,11 +698,8 @@ def txfunc_est(
         )
         if global_opt:
             params_til = opt.differential_evolution(
-                wsumsq,
-                bounds=bnds,
-                args=(tx_objs),
-                seed=1
-                )
+                wsumsq, bounds=bnds, args=(tx_objs), seed=1
+            )
         else:
             params_til = opt.minimize(
                 wsumsq,
@@ -722,16 +727,16 @@ def txfunc_est(
         params_to_plot = params
         # set initial values to parameter estimates
         params_init = np.array(
-                [
-                    Atil,
-                    Btil,
-                    Ctil,
-                    Dtil,
-                    max_x,
-                    max_y,
-                    share,
-                ]
-            )
+            [
+                Atil,
+                Btil,
+                Ctil,
+                Dtil,
+                max_x,
+                max_y,
+                share,
+            ]
+        )
     elif tax_func_type == "DEP_totalinc":
         # '''
         # Estimate DeBacker, Evans, Phillips (2018) ratio of polynomial
@@ -750,8 +755,8 @@ def txfunc_est(
             share_init = 0.5
             params_init = np.array([Atil_init, Btil_init, max_income_init])
         shift = txrates[
-                (df["total_labinc"] < x_20pctl) | (df["total_capinc"] < y_20pctl)
-            ].min()
+            (df["total_labinc"] < x_20pctl) | (df["total_capinc"] < y_20pctl)
+        ].min()
         min_income = min(min_x, min_y)
         shift_inc = 0.0  # temp value
         tx_objs = (
@@ -768,11 +773,8 @@ def txfunc_est(
         bnds = ((1e-12, 99999), (1e-12, 9999), (lb_max_income, MAX_ETR + 0.15))
         if global_opt:
             params_til = opt.differential_evolution(
-                wsumsq,
-                bounds=bnds,
-                args=(tx_objs),
-                seed=1
-                )
+                wsumsq, bounds=bnds, args=(tx_objs), seed=1
+            )
         else:
             params_til = opt.minimize(
                 wsumsq,
@@ -818,11 +820,8 @@ def txfunc_est(
         bnds = ((1e-12, 9999), (1e-12, 9999), (1e-12, 9999))
         if global_opt:
             params_til = opt.differential_evolution(
-                wsumsq,
-                bounds=bnds,
-                args=(tx_objs),
-                seed=1
-                )
+                wsumsq, bounds=bnds, args=(tx_objs), seed=1
+            )
         else:
             params_til = opt.minimize(
                 wsumsq,
@@ -1156,7 +1155,7 @@ def tax_func_loop(
             etrparams,
             etr_wsumsq_arr[s - s_min],
             etr_obs_arr[s - s_min],
-            params_init_etr
+            params_init_etr,
         ) = txfunc_est(
             df_etr,
             s,
@@ -1167,7 +1166,7 @@ def tax_func_loop(
             output_dir,
             False,
             None,
-            True
+            True,
         )
         etrparam_list[s - s_min] = etrparams
         del df_etr
@@ -1178,7 +1177,7 @@ def tax_func_loop(
             mtrxparams,
             mtrx_wsumsq_arr[s - s_min],
             mtrx_obs_arr[s - s_min],
-            params_init_mtrx
+            params_init_mtrx,
         ) = txfunc_est(
             df_mtrx,
             s,
@@ -1189,7 +1188,7 @@ def tax_func_loop(
             output_dir,
             False,
             None,
-            True
+            True,
         )
         del df_mtrx
         # Estimate marginal tax rate of capital income function
@@ -1198,7 +1197,7 @@ def tax_func_loop(
             mtryparams,
             mtry_wsumsq_arr[s - s_min],
             mtry_obs_arr[s - s_min],
-            params_init_mtry
+            params_init_mtry,
         ) = txfunc_est(
             df_mtry,
             s,
@@ -1209,7 +1208,7 @@ def tax_func_loop(
             output_dir,
             False,
             None,
-            True
+            True,
         )
         mtryparam_list[s - s_min] = mtryparams
     else:
@@ -1360,7 +1359,7 @@ def tax_func_loop(
                 etrparams,
                 etr_wsumsq_arr[s - s_min],
                 etr_obs_arr[s - s_min],
-                params_init
+                params_init,
             ) = txfunc_est(
                 df_etr,
                 s,
@@ -1370,7 +1369,7 @@ def tax_func_loop(
                 numparams,
                 output_dir,
                 graph_est,
-                params_init_etr
+                params_init_etr,
             )
             etrparam_list[s - s_min] = etrparams
             del df_etr
@@ -1381,7 +1380,7 @@ def tax_func_loop(
                 mtrxparams,
                 mtrx_wsumsq_arr[s - s_min],
                 mtrx_obs_arr[s - s_min],
-                params_init
+                params_init,
             ) = txfunc_est(
                 df_mtrx,
                 s,
@@ -1391,7 +1390,7 @@ def tax_func_loop(
                 numparams,
                 output_dir,
                 graph_est,
-                params_init_mtrx
+                params_init_mtrx,
             )
             mtrxparam_list[s - s_min] = mtrxparams
             del df_mtrx
@@ -1401,7 +1400,7 @@ def tax_func_loop(
                 mtryparams,
                 mtry_wsumsq_arr[s - s_min],
                 mtry_obs_arr[s - s_min],
-                params_init
+                params_init,
             ) = txfunc_est(
                 df_mtry,
                 s,
@@ -1411,7 +1410,7 @@ def tax_func_loop(
                 numparams,
                 output_dir,
                 graph_est,
-                params_init_mtry
+                params_init_mtry,
             )
             mtryparam_list[s - s_min] = mtryparams
 

From 49647b0ee3e2c3af14a4186f2acbc01b54997d1e Mon Sep 17 00:00:00 2001
From: jdebacker <jason.debacker@gmail.com>
Date: Wed, 29 May 2024 17:50:13 -0400
Subject: [PATCH 07/12] rename X and Y so unique

---
 ogcore/txfunc.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/ogcore/txfunc.py b/ogcore/txfunc.py
index 7a80d874d..809d0183d 100644
--- a/ogcore/txfunc.py
+++ b/ogcore/txfunc.py
@@ -846,9 +846,9 @@ def txfunc_est(
         # '''
         constant = np.ones_like(income)
         ln_income = np.log(income)
-        X = np.column_stack((constant, ln_income))
-        Y = np.log(1 - txrates)
-        param_est = np.linalg.inv(X.T @ X) @ X.T @ Y
+        X_mat = np.column_stack((constant, ln_income))
+        Y_vec = np.log(1 - txrates)
+        param_est = np.linalg.inv(X_mat.T @ X_mat) @ X_mat.T @ Y_vec
         params = np.zeros(numparams)
         if rate_type == "etr":
             ln_lambda_s_hat, minus_tau_s_hat = param_est
@@ -858,8 +858,8 @@ def txfunc_est(
             lambda_s_hat = np.exp(constant - np.log(1 + minus_tau_s_hat))
             params[:2] = np.array([lambda_s_hat, -minus_tau_s_hat])
         # Calculate the WSSE
-        Y_hat = X @ params
-        wsse = ((Y - Y_hat) ** 2 * wgts).sum()
+        Y_hat = X_mat @ params
+        wsse = ((Y_vec - Y_hat) ** 2 * wgts).sum()
         obs = df.shape[0]
         params_to_plot = params
     elif tax_func_type == "linear":

From 84036b1e90d515c311670322c2404ac994e5f9e1 Mon Sep 17 00:00:00 2001
From: jdebacker <jason.debacker@gmail.com>
Date: Wed, 29 May 2024 17:50:26 -0400
Subject: [PATCH 08/12] test HSV estimation

---
 tests/test_txfunc.py | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/tests/test_txfunc.py b/tests/test_txfunc.py
index f687d2598..c59fe0639 100644
--- a/tests/test_txfunc.py
+++ b/tests/test_txfunc.py
@@ -204,6 +204,21 @@ def test_replace_outliers():
 )
 expected_tuple_linear_mtrx = (0.37030104, 0.0, 152900)
 expected_tuple_linear_mtry = (0.24793767, 0.0, 152900)
+expected_tuple_HSV = (
+    np.array([1.68456651, 0.06149992]),
+    792935850.6195159,
+    152900,
+)
+expected_tuple_HSV_mtrx = (
+    np.array([1.35287201, 0.05298318]),
+    642774637.8247124,
+    152900,
+)
+expected_tuple_HSV_mtry = (
+    np.array([1.76056732, 0.05796658]),
+    798149828.3166732,
+    152900,
+)
 
 
 @pytest.mark.local  # only marking as local because platform
@@ -259,8 +274,18 @@ def test_txfunc_est(
         ("etr", "linear", 1, expected_tuple_linear),
         ("mtrx", "linear", 1, expected_tuple_linear_mtrx),
         ("mtry", "linear", 1, expected_tuple_linear_mtry),
+        ("etr", "HSV", 2, expected_tuple_HSV),
+        ("mtrx", "HSV", 2, expected_tuple_HSV_mtrx),
+        ("mtry", "HSV", 2, expected_tuple_HSV_mtry),
+    ],
+    ids=[
+        "linear, etr",
+        "linear, mtrx",
+        "linear, mtry",
+        "HSV, etr",
+        "HSV, mtrx",
+        "HSV, mtry",
     ],
-    ids=["linear", "linear, mtrx", "linear, mtry"],
 )
 def test_txfunc_est_on_GH(
     rate_type, tax_func_type, numparams, expected_tuple, tmpdir
@@ -294,6 +319,7 @@ def test_txfunc_est_on_GH(
     )
 
     for i, v in enumerate(expected_tuple):
+        print("For element", i, ", test tuple =", test_tuple[i])
         assert np.allclose(test_tuple[i], v, rtol=0.0, atol=1e-04)
 
 

From 2d693b9a3b6c031d81dd9ca966f909b7533dcea6 Mon Sep 17 00:00:00 2001
From: jdebacker <jason.debacker@gmail.com>
Date: Fri, 31 May 2024 09:20:02 -0400
Subject: [PATCH 09/12] use sse for hsv

---
 ogcore/txfunc.py     | 3 ++-
 tests/test_txfunc.py | 9 ++++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/ogcore/txfunc.py b/ogcore/txfunc.py
index 809d0183d..82dda2739 100644
--- a/ogcore/txfunc.py
+++ b/ogcore/txfunc.py
@@ -859,7 +859,8 @@ def txfunc_est(
             params[:2] = np.array([lambda_s_hat, -minus_tau_s_hat])
         # Calculate the WSSE
         Y_hat = X_mat @ params
-        wsse = ((Y_vec - Y_hat) ** 2 * wgts).sum()
+        # wsse = ((Y_vec - Y_hat) ** 2 * wgts).sum()
+        wsse = ((Y_vec - Y_hat) ** 2).sum()
         obs = df.shape[0]
         params_to_plot = params
     elif tax_func_type == "linear":
diff --git a/tests/test_txfunc.py b/tests/test_txfunc.py
index c59fe0639..6e86dd21e 100644
--- a/tests/test_txfunc.py
+++ b/tests/test_txfunc.py
@@ -206,17 +206,20 @@ def test_replace_outliers():
 expected_tuple_linear_mtry = (0.24793767, 0.0, 152900)
 expected_tuple_HSV = (
     np.array([1.68456651, 0.06149992]),
-    792935850.6195159,
+    # 792935850.6195159,
+    1116189.8044088066,
     152900,
 )
 expected_tuple_HSV_mtrx = (
     np.array([1.35287201, 0.05298318]),
-    642774637.8247124,
+    # 642774637.8247124,
+    904161.7256125457,
     152900,
 )
 expected_tuple_HSV_mtry = (
     np.array([1.76056732, 0.05796658]),
-    798149828.3166732,
+    # 798149828.3166732,
+    1119562.1877437222,
     152900,
 )
 

From 88e8fde7f80d964e4b6068a3d1dfb0e426c6a6d0 Mon Sep 17 00:00:00 2001
From: jdebacker <jason.debacker@gmail.com>
Date: Thu, 6 Jun 2024 13:29:59 -0400
Subject: [PATCH 10/12] add HSV functions and reference

---
 docs/book/OGCore_references.bib        | 15 +++++++++++++++
 docs/book/content/theory/government.md | 12 ++++++++++--
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/docs/book/OGCore_references.bib b/docs/book/OGCore_references.bib
index f98ab96ea..275e93522 100755
--- a/docs/book/OGCore_references.bib
+++ b/docs/book/OGCore_references.bib
@@ -215,6 +215,21 @@ @Article{GouveiaStrauss:1994
   month={June},
 }
 
+@Article{HeathcoteStoreslettenViolante:2017,
+  author={Jonathan Heathcote and Kjetil Storesletten and Giovanni L. Violante},
+  title={{Optimal Tax Progressivity: An Analytical Framework}},
+  journal={The Quarterly Journal of Economics},
+  year=2017,
+  volume={132},
+  number={4},
+  pages={1693-1754},
+  month={},
+  keywords={},
+  doi={},
+  abstract={What shapes the optimal degree of progressivity of the tax and transfer system? On the one hand, a progressive tax system can counteract inequality in initial conditions and substitute for imperfect private insurance against idiosyncratic earnings risk. On the other hand, progressivity reduces incentives to work and to invest in skills, distortions that are especially costly when the government must finance public goods. We develop a tractable equilibrium model that features all of these trade-offs. The analytical expressions we derive for social welfare deliver a transparent understanding of how preference, technology, and market structure parameters influence the optimal degree of progressivity. A calibration for the U.S. economy indicates that endogenous skill investment, flexible labor supply, and the desire to finance government purchases play quantitatively similar roles in limiting optimal progressivity. In a version of the model where poverty constrains skill investment, optimal progressivity is close to the U.S. value. An empirical analysis on cross-country data offers support to the theory.},
+  url={https://ideas.repec.org/a/oup/qjecon/v132y2017i4p1693-1754..html}
+}
+
 @TECHREPORT{MoorePecoraro:2021,
   AUTHOR = {Rachel Moore and Brandon Pecoraro},
   TITLE = {Quantitative Analysis of a Wealth Tax in the United States: Exclusions, Evasion, and Expenditures},
diff --git a/docs/book/content/theory/government.md b/docs/book/content/theory/government.md
index 0b88fcd96..f3e8d3767 100644
--- a/docs/book/content/theory/government.md
+++ b/docs/book/content/theory/government.md
@@ -251,9 +251,17 @@ The second difficulty in modeling realistic tax and incentive detail is the need
 
      Users can select this option by setting the parameter `tax_func_type="GS"`.  The three parameters of this function ($\phi_{0}, \phi_{1}, \phi_{2}$) are estimated using the weighted sum of squares estimated described in Equation {eq}`EqTaxCalcThetaWSSQ`.
 
-  3. Linear tax functions (i.e., $\tau =$ a constant).  Users can select this option by setting the parameter `tax_func_type="linear"`.  The constant rate is found by taking the weighted average of the appropriate tax rate (effective tax rate, marginal tax rate on labor income, marginal tax rate on labor income) for each age and year, where the values are weighted by sampling weights and income.
+  3. Tax functions from {cite}`HeathcoteStoreslettenViolante:2017`:
 
-  4. Monotonically increasing smoothing spline `tax_func_type="mono"`. This functional approach is based on {cite}`EilersMarx:1996`, {cite}`Eilers:2003` and {cite}`Eilers:2005`, which is a least squares smoothing spline that penalizes nonmonotonicities. We used k-fold cross validation to show that the optimal smoothing parameter for this function on our tax data is $\lambda=12$, which is set as the default. These tax functions are fit to total income data (labor income plus capital income).
+     ```{math}
+       \tau = 1 - \phi_{0}(x+y)^{1-phi_1}
+     ```
+
+     Users can select this option by setting the parameter `tax_func_type="HSV"`.  The two parameters of this function ($\phi_{0}, \phi_{1}$) are estimated using ordinary least squares.
+
+  4. Linear tax functions (i.e., $\tau =$ a constant).  Users can select this option by setting the parameter `tax_func_type="linear"`.  The constant rate is found by taking the weighted average of the appropriate tax rate (effective tax rate, marginal tax rate on labor income, marginal tax rate on labor income) for each age and year, where the values are weighted by sampling weights and income.
+
+  5. Monotonically increasing smoothing spline `tax_func_type="mono"`. This functional approach is based on {cite}`EilersMarx:1996`, {cite}`Eilers:2003` and {cite}`Eilers:2005`, which is a least squares smoothing spline that penalizes nonmonotonicities. We used k-fold cross validation to show that the optimal smoothing parameter for this function on our tax data is $\lambda=12$, which is set as the default. These tax functions are fit to total income data (labor income plus capital income).
 
   Among all of these tax functional forms, users can set the `age_specific` parameter to `False` if they wish to have one function for all ages $s$.  In addition, for the functions based on {cite}`DeBackerEtAl:2019` (`tax_func_type="DEP"` or `tax_func_type="DEP_totinc"`), one can set `analytical_mtrs=True` if they wish to have the $\tau^{mtrx}_{s,t}$ and $\tau^{mtry}_{s,t}$ derived from the $\tau^{etr}_{s,t}$ functions.  This provides theoretical consistency, but reduced fit of the functions (see {cite}`DeBackerEtAl:2019` for more details).
 

From 75f269ff40da8913efc2d36ac9d4bb02f95a39cd Mon Sep 17 00:00:00 2001
From: jdebacker <jason.debacker@gmail.com>
Date: Fri, 7 Jun 2024 16:57:07 -0400
Subject: [PATCH 11/12] bump version to 0.11.7

---
 CHANGELOG.md       | 6 ++++++
 ogcore/__init__.py | 2 +-
 setup.py           | 2 +-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 746ad548a..503c9c49d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 
+## [0.11.7] - 2024-06-07 01:00:00
+
+### Added
+
+- Heathcote, Storesletten, and Violante (2017) tax functions to `txfunc.py`
+
 ## [0.11.6] - 2024-04-19 01:00:00
 
 ### Added
diff --git a/ogcore/__init__.py b/ogcore/__init__.py
index 48ac6749f..06baceb87 100644
--- a/ogcore/__init__.py
+++ b/ogcore/__init__.py
@@ -20,4 +20,4 @@
 from ogcore.txfunc import *
 from ogcore.utils import *
 
-__version__ = "0.11.6"
+__version__ = "0.11.7"
diff --git a/setup.py b/setup.py
index 387207319..035159fe2 100755
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name="ogcore",
-    version="0.11.6",
+    version="0.11.7",
     author="Jason DeBacker and Richard W. Evans",
     license="CC0 1.0 Universal (CC0 1.0) Public Domain Dedication",
     description="A general equilibribum overlapping generations model for fiscal policy analysis",

From 3da5b4a7dc690b54e10fcd3c464c44ddf89cba25 Mon Sep 17 00:00:00 2001
From: Richard Evans <rickecon@gmail.com>
Date: Fri, 7 Jun 2024 15:53:19 -0600
Subject: [PATCH 12/12] Update CHANGELOG.md

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 503c9c49d..5dfde10e8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -233,6 +233,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Any earlier versions of OG-USA can be found in the [`OG-Core`](https://github.com/PSLmodels/OG-Core) repository [release history](https://github.com/PSLmodels/OG-Core/releases) from [v.0.6.4](https://github.com/PSLmodels/OG-Core/releases/tag/v0.6.4) (Jul. 20, 2021) or earlier.
 
 
+[0.11.7]: https://github.com/PSLmodels/OG-Core/compare/v0.11.6...v0.11.7
 [0.11.6]: https://github.com/PSLmodels/OG-Core/compare/v0.11.5...v0.11.6
 [0.11.5]: https://github.com/PSLmodels/OG-Core/compare/v0.11.4...v0.11.5
 [0.11.4]: https://github.com/PSLmodels/OG-Core/compare/v0.11.3...v0.11.4