econ-ark · sbenthall · Jan 14, 2021 · Jan 11, 2021 · Jan 11, 2021 · Jan 12, 2021
diff --git a/Documentation/CHANGELOG.md b/Documentation/CHANGELOG.md
@@ -14,14 +14,15 @@ Release Data: TBD
 
 #### Major Changes
 
-* Adds a constructor for LogNormal distributions from mean and standard deviation (#891)[https://github.com/econ-ark/HARK/pull/891/]
-* Uses new LogNormal constructor in ConsPortfolioModel (#891)[https://github.com/econ-ark/HARK/pull/891/]
-* calcExpectations method for taking the expectation of a distribution over a function (#884)[https://github.com/econ-ark/HARK/pull/884/] (#897)[https://github.com/econ-ark/HARK/pull/897/]
+* Adds a constructor for LogNormal distributions from mean and standard deviation [#891](https://github.com/econ-ark/HARK/pull/891/)
+* Uses new LogNormal constructor in ConsPortfolioModel [#891](https://github.com/econ-ark/HARK/pull/891/)
+* calcExpectations method for taking the expectation of a distribution over a function [#884](https://github.com/econ-ark/HARK/pull/884/] (#897)[https://github.com/econ-ark/HARK/pull/897/)
 * Centralizes the definition of value, marginal value, and marginal marginal value functions that use inverse-space
 interpolation for problems with CRRA utility. See [#888](https://github.com/econ-ark/HARK/pull/888).
-* MarkovProcess class (#902)[https://github.com/econ-ark/HARK/pull/902]
+* MarkovProcess class [#902](https://github.com/econ-ark/HARK/pull/902)
+* Adds a SSA life tables and methods to extract survival probabilities from them [#986](https://github.com/econ-ark/HARK/pull/906).
 * Fix the return fields of `dcegm/calcCrossPoints`[#909](https://github.com/econ-ark/HARK/pull/909).
-* Corrects location of constructor documentation to class string for Sphinx rendering (#908)[https://github.com/econ-ark/HARK/pull/908]
+* Corrects location of constructor documentation to class string for Sphinx rendering [#908](https://github.com/econ-ark/HARK/pull/908)
 
 #### Minor Changes
 

diff --git a/HARK/datasets/life_tables/__init__.py b/HARK/datasets/life_tables/__init__.py
diff --git a/HARK/datasets/life_tables/us_ssa/PerLifeTables_F_Alt2_TR2020.csv b/HARK/datasets/life_tables/us_ssa/PerLifeTables_F_Alt2_TR2020.csv
diff --git a/HARK/datasets/life_tables/us_ssa/PerLifeTables_F_Hist_TR2020.csv b/HARK/datasets/life_tables/us_ssa/PerLifeTables_F_Hist_TR2020.csv
diff --git a/HARK/datasets/life_tables/us_ssa/PerLifeTables_M_Alt2_TR2020.csv b/HARK/datasets/life_tables/us_ssa/PerLifeTables_M_Alt2_TR2020.csv
diff --git a/HARK/datasets/life_tables/us_ssa/PerLifeTables_M_Hist_TR2020.csv b/HARK/datasets/life_tables/us_ssa/PerLifeTables_M_Hist_TR2020.csv
diff --git a/HARK/datasets/life_tables/us_ssa/README.md b/HARK/datasets/life_tables/us_ssa/README.md
@@ -0,0 +1,40 @@
+# United States Social Security Administration Life-Tables
+
+This folder contains tools for producing sequences of life-cycle survival probabilities for microeconomic
+models from official U.S. mortality estimates.
+
+## Sources
+
+The life tables contained in this folder correspond to the downloadable
+''Period Life Tables'' from the 2020 Annual Trustees Report of the SSA.
+
+They were downloaded from [this link](https://www.ssa.gov/oact/HistEst/PerLifeTables/2020/PerLifeTables2020.html)
+on January 7, 2021.
+
+## Format
+
+There are four `.csv` files:
+- `PerLifeTables_F_Hist_TR2020.csv` contains historical (1900-2017) information for females.
+- `PerLifeTables_F_Alt2_TR2020.csv` contains projected (2018-2095) information for females.
+- `PerLifeTables_M_Hist_TR2020.csv` contains historical (1900-2017) information for males.
+- `PerLifeTables_M_Alt2_TR2020.csv` contains projected (2018-2095) information for males.
+
+All the tables have the same format. There are three columns of interest for our purposes:
+- `Year`.
+- `x`: age.
+- `q(x)`: probability of death.
+
+As an example, the probability that a male who was 27 years old in 1990 would die within a year (from 1990 to 1991) is found in file `PerLifeTables_M_Hist_TR2020.csv`, in column `q(x)` and the row in which `Year == 1990` and `x == 27`.
+
+Visit the [SSA's site](https://www.ssa.gov/oact/HistEst/PerLifeTables/2020/PerLifeTables2020.html) for a complete description of the tables.
+
+## Usage
+
+`SSATools.py` contains functions that translate the information in the `.csv`
+life-tables into sequences of survival probabilities in the format that HARK's
+life-cycle models require.
+
+The main function is `parse_ssa_life_table`, which produces survival
+probabilities for a given sex, age-range, and year of birth. See the function's
+documentation for details. `examples/Calibration/US_SSA_life_tables.py` contains
+examples of its use.
diff --git a/HARK/datasets/life_tables/us_ssa/SSATools.py b/HARK/datasets/life_tables/us_ssa/SSATools.py
@@ -0,0 +1,193 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Jan  8 15:36:14 2021
+
+@author: Mateo
+"""
+
+import numpy as np
+import pandas as pd
+from warnings import warn
+import os
+
+ssa_tables_dir = os.path.dirname(os.path.abspath(__file__))
+
+
+def get_ssa_life_tables():
+    """
+    Reads all the SSA life tables and combines them, adding columns indicating
+    where each row came from (male or female, historical or projected).
+
+    Returns
+    -------
+    Pandas DataFrame
+        A DataFrame containing the information in SSA life-tables for both
+        sexes and all the available years. It returns all the columns in the
+        original tables.
+
+    """
+    # Read the four tables and add columns identifying them
+    dsets = []
+    for sex in ["M", "F"]:
+        for method in ["Historical", "Projected"]:
+
+            # Construct file name
+            infix = "Hist" if method == "Historical" else "Alt2"
+            filename = os.path.join(
+                ssa_tables_dir, "PerLifeTables_" + sex + "_" + infix + "_TR2020.csv"
+            )
+
+            # Read csv
+            table = pd.read_csv(filename, sep=",", skiprows=4)
+
+            # Add identifying info
+            table["Sex"] = sex
+            table["Method"] = method
+
+            dsets.append(table)
+
+    # Concatenate tables by row and return them
+    return pd.concat(dsets)
+
+
+def parse_ssa_life_table(
+    min_age, max_age, female=True, cohort=None, cross_sec=False, year=None
+):
+    """
+    Reads (year,age)-specifc death probabilities form SSA life tables and
+    transforms them to a list of survival probabilities in the format that
+    HARK expects.
+
+    Two methods are supported:
+        - Cross-sectional: finds the 1-year survival probabilities for
+          individuals in the age range for a fixed year.
+          In the output,
+          SurvPrb(age) = 1 - DeathPrb(age, year)
+
+        - Longitudinal: finds the 1-year survival probabilities for individuals
+          of a fixed cohort at different ages (and years). 
+          In the output,
+          SurvPrb(age) = 1 - DeathPrb(age, cohort + age)
+
+    Parameters
+    ----------
+    min_age : int
+        Minimum age for survival probabilities.
+    max_age : int
+        Maximum age for survival probabilities.
+    female : bool, optional
+        Boolean indicating wether to use female or male survival probabilities.
+        The default is True (female).
+    cohort : int, optional
+        If longitudinal probabilities are requested, this is the birth year of
+        the cohort that will be tracked. The default is None.
+    cross_sec : bool, optional
+        Boolean indicating whether the cross-sectional method should be used.
+        The default is False (using the longitudinal method).
+    year : int, optional
+        If cross-sectional probabilities are requestedm this is the year at
+        which they will be taken. The default is None.
+
+    Returns
+    -------
+    LivPrb : [float]
+        List of 1-year survival probabilities.
+        LivPrb[n] corresponds to the probability that an indivivual of age
+        'min_age' + n survives one year, in the year 'year' if the
+        cross-sectional method is used or 'cohort' + ('min_age' + n) if the
+        longitudinal method is used.
+
+    """
+
+    # Infix for file name depending on sex
+    abb = "F" if female else "M"
+
+    # Find year - age combinations that we need
+    assert max_age >= min_age, "The maximum age can not be lower than the minimum age."
+    ages = np.arange(min_age, max_age + 1)
+
+    if cross_sec:
+
+        if year is None:
+            raise (
+                TypeError(
+                    "You must provide a year when using "
+                    + "cross-sectional survival probabilities."
+                )
+            )
+
+        years = np.repeat(year, ages.shape)
+
+    else:
+
+        if cohort is None:
+            raise (
+                TypeError(
+                    "You must provide a cohort (birth year) when "
+                    + "using longitudinal survival probabilities."
+                )
+            )
+
+        years = cohort + ages
+
+    # Create filenames
+
+    # Historical and forecasted
+    file_hist = os.path.join(
+        ssa_tables_dir, "PerLifeTables_" + abb + "_Hist_TR2020.csv"
+    )
+    file_fore = os.path.join(
+        ssa_tables_dir, "PerLifeTables_" + abb + "_Alt2_TR2020.csv"
+    )
+
+    # Read them
+    hist_tab = pd.read_csv(
+        file_hist,
+        sep=",",
+        skiprows=4,
+        usecols=["Year", "x", "q(x)"],
+        index_col=["Year", "x"],
+    )
+    fore_tab = pd.read_csv(
+        file_fore,
+        sep=",",
+        skiprows=4,
+        usecols=["Year", "x", "q(x)"],
+        index_col=["Year", "x"],
+    )
+
+    # Find the point at which projections start
+    max_hist = max(hist_tab.index.get_level_values("Year"))
+
+    # Warn the user if projections are used.
+    if max(years) > max_hist:
+        message = "Survival probabilities beyond {} are projections.".format(max_hist)
+        warn(message)
+
+    # Concatenate them
+    tab = pd.concat([hist_tab, fore_tab])
+
+    # Subset and sort deathrates.
+
+    message = (
+        "Parsed life tables do not contain all the requested "
+        + "age-year combinations."
+    )
+    try:
+
+        DeathPrb = tab.loc[zip(years, ages)].sort_values(by="x")
+
+    except KeyError as e:
+
+        raise Exception(message).with_traceback(e.__traceback__)
+
+    # Transform to numpy survival probabilities
+    LivPrb = 1 - DeathPrb["q(x)"].to_numpy()
+
+    # Make sure we got all the probabilities
+    assert len(LivPrb) == max_age - min_age + 1, message
+
+    # Transform from array to list
+    LivPrb = list(LivPrb)
+
+    return LivPrb
diff --git a/HARK/datasets/life_tables/us_ssa/__init__.py b/HARK/datasets/life_tables/us_ssa/__init__.py
diff --git a/examples/Calibration/US_SSA_life_tables.py b/examples/Calibration/US_SSA_life_tables.py
@@ -0,0 +1,56 @@
+from HARK.datasets.life_tables.us_ssa.SSATools import (
+    parse_ssa_life_table,
+    get_ssa_life_tables
+)
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+# %% Inspect lifetables
+
+tables = get_ssa_life_tables()
+print(tables.head)
+
+# %% Survival probabilities from the SSA
+
+# We will find 1-year survival probabilities from ages 21 to 100
+min_age = 21
+max_age = 100
+ages = np.arange(min_age, max_age + 1)
+
+# In the years 1900 and 1950
+years = [1900, 1950]
+
+# %%
+
+# First, the "longitudinal method", which gives us the probabilities
+# experienced by agents born in "year" throughout their lived
+plt.figure()
+for cohort in years:
+    for s in ['male', 'female']:
+
+        fem = s == 'female'
+        LivPrb = parse_ssa_life_table(female = fem, cohort = cohort,
+                                      min_age = min_age, max_age = max_age)
+
+        plt.plot(ages, LivPrb, label = s + ' born in ' + str(cohort))
+
+plt.legend()
+plt.title('Longitudinal survival probabilities')
+
+# %%
+
+# Second, the "cross-sectional method", which gives us the probabilities of
+# survivals of individuals of differnet ages that are alive in the given year.
+plt.figure()
+for year in years:
+    for s in ['male', 'female']:
+
+        fem = s == 'female'
+        LivPrb = parse_ssa_life_table(female = fem, year = year, cross_sec= True,
+                                      min_age = min_age, max_age = max_age)
+
+        plt.plot(ages, LivPrb, label = s + 's in ' + str(year))
+
+plt.legend()
+plt.title('Cross-sectional survival probabilities')