diff --git a/docs/book/content/theory/images/SS_images.py b/docs/book/content/theory/images/SS_images.py index 75f94b5ab..d65c0e816 100644 --- a/docs/book/content/theory/images/SS_images.py +++ b/docs/book/content/theory/images/SS_images.py @@ -4,6 +4,7 @@ documentation ------------------------------------------------------------------------ """ + # Import libraries, packages, and modules import pickle import os diff --git a/ogcore/SS.py b/ogcore/SS.py index e4183a09f..6753225c7 100644 --- a/ogcore/SS.py +++ b/ogcore/SS.py @@ -481,9 +481,6 @@ def inner_loop(outer_loop_vars, p, client): debt_service, p, ) - print("Agg tax = ", total_tax_revenue) - print("Agg pension outlays = ", agg_pension_outlays) - print("Agg UBI outlays = ", UBI_outlays) new_TR = fiscal.get_TR( Y, TR, diff --git a/ogcore/__init__.py b/ogcore/__init__.py index 61f79adca..fee4d8cbb 100644 --- a/ogcore/__init__.py +++ b/ogcore/__init__.py @@ -1,6 +1,7 @@ """ Specify what is available to import from the ogcore package. """ + from ogcore.SS import * from ogcore.TPI import * from ogcore.aggregates import * diff --git a/ogcore/demographics.py b/ogcore/demographics.py new file mode 100644 index 000000000..d8b251217 --- /dev/null +++ b/ogcore/demographics.py @@ -0,0 +1,983 @@ +""" +------------------------------------------------------------------------ +Functions for generating demographic objects necessary for the OG-USA +model +------------------------------------------------------------------------ +""" + +# Import packages +import os +import numpy as np +import scipy.optimize as opt +import pandas as pd +import matplotlib.pyplot as plt +from ogcore.utils import get_legacy_session +from ogcore import parameter_plots as pp + +START_YEAR = 2023 +END_YEAR = 2023 +UN_COUNTRY_CODE = "840" # UN code for USA +# create output director for figures +CUR_PATH = os.path.split(os.path.abspath(__file__))[0] +OUTPUT_DIR = os.path.join(CUR_PATH, "..", "data", "OUTPUT", "Demographics") +if os.access(OUTPUT_DIR, os.F_OK) is False: + os.makedirs(OUTPUT_DIR) + + +""" +------------------------------------------------------------------------ +Define functions +------------------------------------------------------------------------ +""" + + +def get_un_data( + variable_code, + country_id=UN_COUNTRY_CODE, + start_year=START_YEAR, + end_year=END_YEAR, +): + """ + This function retrieves data from the United Nations Data Portal API + for UN population data (see + https://population.un.org/dataportal/about/dataapi) + + Args: + variable_code (str): variable code for UN data + country_id (str): country id for UN data + start_year (int): start year for UN data + end_year (int): end year for UN data + + Returns: + df (Pandas DataFrame): DataFrame of UN data + """ + target = ( + "https://population.un.org/dataportalapi/api/v1/data/indicators/" + + variable_code + + "/locations/" + + country_id + + "/start/" + + str(start_year) + + "/end/" + + str(end_year) + ) + + # get data from url + response = get_legacy_session().get(target) + # Check if the request was successful before processing + if response.status_code == 200: + # Converts call into JSON + j = response.json() + # Convert JSON into a pandas DataFrame. + # pd.json_normalize flattens the JSON to accommodate nested lists + # within the JSON structure + df = pd.json_normalize(j["data"]) + # Loop until there are new pages with data + while j["nextPage"] is not None: + # Reset the target to the next page + target = j["nextPage"] + # call the API for the next page + response = get_legacy_session().get(target) + # Convert response to JSON format + j = response.json() + # Store the next page in a data frame + df_temp = pd.json_normalize(j["data"]) + # Append next page to the data frame + df = pd.concat([df, df_temp]) + # keep just what is needed from data + df = df[df.variant == "Median"] + df = df[df.sex == "Both sexes"][["timeLabel", "ageLabel", "value"]] + df.rename( + {"timeLabel": "year", "ageLabel": "age"}, axis=1, inplace=True + ) + df.loc[df.age == "100+", "age"] = 100 + df.age = df.age.astype(int) + df.year = df.year.astype(int) + df = df[df.age < 100] # need to drop 100+ age category + else: + print( + f"Failed to retrieve population data. HTTP status code: {response.status_code}" + ) + assert False + + return df + + +def get_fert( + totpers=100, + min_age=0, + max_age=99, + country_id=UN_COUNTRY_CODE, + start_year=START_YEAR, + end_year=END_YEAR, + graph=False, + plot_path=None, +): + """ + This function generates a vector of fertility rates by model period + age that corresponds to the fertility rate data by age in years. + + Args: + totpers (int): total number of agent life periods (E+S), >= 3 + min_age (int): age in years at which agents are born, >= 0 + max_age (int): age in years at which agents die with certainty, + >= 4, < 100 (max age in UN data is 99, 100+ i same group) + country_id (str): country id for UN data + start_year (int): start year for UN data + end_year (int): end year for UN data + graph (bool): =True if want graphical output + plot_path (str): path to save fertility rate plot + + Returns: + fert_rates (Numpy array): fertility rates for each year of data + and model age + fig (Matplotlib Figure): figure object if graph=True and plot_path=None + + """ + # initialize fert rates array + fert_rates_2D = np.zeros((end_year + 1 - start_year, totpers)) + # Read UN data, 1 year at a time + for y in range(start_year, end_year + 1): + df = get_un_data("68", country_id=country_id, start_year=y, end_year=y) + # put in vector + fert_rates = df.value.values + # fill in with zeros for ages < 15 and > 49 + # NOTE: this assumes min_year < 15 and max_age > 49 + fert_rates = np.append(fert_rates, np.zeros(max_age - 49)) + fert_rates = np.append(np.zeros(15 - min_age), fert_rates) + # divide by 1000 because fertility rates are number of births per + # 1000 woman and we want births per person (might update to account + # from fraction men more correctly - below assumes 50/50 men and women) + fert_rates = fert_rates / 2000 + # Rebin data in the case that model period not equal to one calendar + # year + fert_rates = pop_rebin(fert_rates, totpers) + fert_rates_2D[y - start_year, :] = fert_rates + + # Create plots if needed + if graph: + if plot_path: + pp.plot_fert_rates( + fert_rates_2D, + start_year, + [start_year, end_year], + path=plot_path, + ) + return fert_rates_2D + else: + fig = pp.plot_fert_rates( + fert_rates_2D, + start_year, + [start_year, end_year], + ) + return fert_rates_2D, fig + else: + return fert_rates_2D + + +def get_mort( + totpers=100, + min_age=0, + max_age=99, + country_id=UN_COUNTRY_CODE, + start_year=START_YEAR, + end_year=END_YEAR, + graph=False, + plot_path=None, +): + """ + This function generates a vector of mortality rates by model period + age. + + Args: + totpers (int): total number of agent life periods (E+S), >= 3 + min_age (int): age in years at which agents are born, >= 0 + max_age (int): age in years at which agents die with certainty, + >= 4, < 100 (max age in UN data is 99, 100+ i same group) + country_id (str): country id for UN data + start_year (int): start year for UN data + end_year (int): end year for UN data + graph (bool): =True if want graphical output + plot_path (str): path to save mortality rate plot + + Returns: + mort_rates (Numpy array) mortality rates for each year of data + and model age + infmort_rate_vec (Numpy array): infant mortality rates for each + fig (Matplotlib Figure): figure object if graph=True and plot_path=None + + """ + mort_rates_2D = np.zeros((end_year + 1 - start_year, totpers)) + infmort_rate_vec = np.zeros(end_year + 1 - start_year) + # Read UN data + for y in range(start_year, end_year + 1): + df = get_un_data("80", country_id=country_id, start_year=y, end_year=y) + # put in vector + mort_rates_data = df.value.values + # In UN data, mortality rates for 0 year olds are the infant + # mortality rates + infmort_rate = mort_rates_data[0] + # Rebin data in the case that model period not equal to one calendar + # year + # make mort rates those from age 1-100 and set to 1 for age 100 + mort_rates_data = np.append(mort_rates_data[1:], 1.0) + mort_rates = pop_rebin(mort_rates_data, totpers) + # put in 2D array + mort_rates_2D[y - start_year, :] = mort_rates + infmort_rate_vec[y - start_year] = infmort_rate + + # Create plots if needed + if graph: + if plot_path: + pp.plot_mort_rates_data( + mort_rates_2D, + start_year, + [start_year, end_year], + path=plot_path, + ) + return mort_rates_2D, infmort_rate_vec + else: + fig = pp.plot_mort_rates_data( + mort_rates_2D, + start_year, + [start_year, end_year], + ) + return mort_rates_2D, infmort_rate_vec, fig + else: + return mort_rates_2D, infmort_rate_vec + + +def get_pop( + E=20, + S=80, + min_age=0, + max_age=99, + country_id=UN_COUNTRY_CODE, + start_year=START_YEAR, + end_year=END_YEAR, +): + """ + Retrives the population distribution data from the UN data API + + Args: + E (int): number of model periods in which agent is not + economically active, >= 1 + S (int): number of model periods in which agent is economically + active, >= 3 + min_age (int): age in years at which agents are born, >= 0 + max_age (int): age in years at which agents die with certainty, + >= 4, < 100 (max age in UN data is 99, 100+ i same group) + country_id (str): country id for UN data + start_year (int): start year data + end_year (int): end year for data + + Returns: + pop_2D (Numpy array): population distribution over T0 periods + pre_pop (Numpy array): population distribution one year before + initial year for calibration of omega_S_preTP + """ + # Generate time path of the nonstationary population distribution + # Get path up to end of data year + pop_2D = np.zeros((end_year + 1 - start_year + 1, E + S)) + # Read UN data + for y in range(start_year, end_year + 2): + pop_data = get_un_data( + "47", + country_id=country_id, + start_year=y, + end_year=y, + ) + pop_data_sample = pop_data[ + (pop_data["age"] >= min_age) & (pop_data["age"] <= max_age) + ] + pop = pop_data_sample.value.values + # Generate the current population distribution given that E+S might + # be less than max_age-min_age+1 + # age_per_EpS = np.arange(1, E + S + 1) + pop_EpS = pop_rebin(pop, E + S) + pop_2D[y - start_year, :] = pop_EpS + # get population distribution one year before initial year for + # calibration of omega_S_preTP + pre_pop_data = get_un_data( + "47", + country_id=country_id, + start_year=start_year - 1, + end_year=start_year - 1, + ) + pre_pop_sample = pre_pop_data[ + (pre_pop_data["age"] >= min_age) & (pre_pop_data["age"] <= max_age) + ] + pre_pop = pre_pop_sample.value.values + + return pop_2D, pre_pop + + +def pop_rebin(curr_pop_dist, totpers_new): + """ + For cases in which totpers (E+S) is less than the number of periods + in the population distribution data, this function calculates a new + population distribution vector with totpers (E+S) elements. + + Args: + curr_pop_dist (Numpy array): population distribution over N + periods + totpers_new (int): number of periods to which we are + transforming the population distribution, >= 3 + + Returns: + curr_pop_new (Numpy array): new population distribution over + totpers (E+S) periods that approximates curr_pop_dist + + """ + # Number of periods in original data + assert totpers_new >= 3 + + # Number of periods in original data + totpers_orig = len(curr_pop_dist) + if int(totpers_new) == totpers_orig: + curr_pop_new = curr_pop_dist + elif int(totpers_new) < totpers_orig: + num_sub_bins = float(10000) + curr_pop_sub = np.repeat( + np.float64(curr_pop_dist) / num_sub_bins, num_sub_bins + ) + len_subbins = (np.float64(totpers_orig * num_sub_bins)) / totpers_new + curr_pop_new = np.zeros(totpers_new, dtype=np.float64) + end_sub_bin = 0 + for i in range(totpers_new): + beg_sub_bin = int(end_sub_bin) + end_sub_bin = int(np.rint((i + 1) * len_subbins)) + curr_pop_new[i] = curr_pop_sub[beg_sub_bin:end_sub_bin].sum() + # Return curr_pop_new to single precision float (float32) + # datatype + curr_pop_new = np.float32(curr_pop_new) + + return curr_pop_new + + +def get_imm_rates( + totpers=100, + min_age=0, + max_age=99, + fert_rates=None, + mort_rates=None, + infmort_rates=None, + pop_dist=None, + country_id=UN_COUNTRY_CODE, + start_year=START_YEAR, + end_year=END_YEAR, + graph=False, + plot_path=None, +): + """ + Calculate immigration rates by age as a residual given population + levels in different periods, then output average calculated + immigration rate. We have to replace the first mortality rate in + this function in order to adjust the first implied immigration rate + + Args: + totpers (int): total number of agent life periods (E+S), >= 3 + min_age (int): age in years at which agents are born, >= 0 + max_age (int): age in years at which agents die with certainty, + >= 4 + fert_rates (Numpy array): fertility rates for each year of data + and model age + mort_rates (Numpy array): mortality rates for each year of data + and model age + infmort_rates (Numpy array): infant mortality rates for each year + of data + pop_dist (Numpy array): population distribution over T0+1 periods + country_id (str): country id for UN data + start_year (int): start year for UN data + end_year (int): end year for UN data + graph (bool): =True if want graphical output + plot_path (str): path to save figure to + + Returns: + imm_rates_2D (Numpy array):immigration rates that correspond to + each year of data and period of life, length E+S + + """ + imm_rates_2D = np.zeros((end_year + 1 - start_year, totpers)) + if fert_rates is None: + # get fert rates from UN data from initial year to data year + fert_rates = get_fert( + totpers, min_age, max_age, country_id, start_year, end_year + ) + else: + # ensure that user provided fert_rates and mort rates of same size + assert fert_rates.shape == mort_rates.shape + if mort_rates is None: + # get mort rates from UN data from initial year to data year + mort_rates, infmort_rates = get_mort( + totpers, min_age, max_age, country_id, start_year, end_year + ) + else: + # ensure that user provided fert_rates and mort rates of same size + assert fert_rates.shape == mort_rates.shape + assert infmort_rates is not None + assert infmort_rates.shape[0] == mort_rates.shape[0] + # Read UN data + for y in range(start_year, end_year + 1): + if pop_dist is None: + # need to read UN population data by age for each year + df = get_un_data( + "47", country_id=country_id, start_year=y, end_year=y + ) + pop_t = df[(df.age < 100) & (df.age >= 0)].value.values + pop_t = pop_rebin(pop_t, totpers) + df = get_un_data( + "47", country_id=country_id, start_year=y + 1, end_year=y + 1 + ) + pop_tp1 = df[(df.age < 100) & (df.age >= 0)].value.values + pop_tp1 = pop_rebin(pop_tp1, totpers) + else: + # Make sure shape conforms + assert pop_dist.shape[1] == mort_rates.shape[1] + pop_t = pop_dist[y - start_year, :] + pop_tp1 = pop_dist[y - start_year + 1, :] + # initialize imm_rate vector + imm_rates = np.zeros(totpers) + # back out imm rates by age for each year + newborns = np.dot(fert_rates[y - start_year, :], pop_t) + # new born imm_rate + imm_rates[0] = ( + pop_tp1[0] - (1 - infmort_rates[y - start_year]) * newborns + ) / pop_t[0] + # all other age imm_rates + imm_rates[1:] = ( + pop_tp1[1:] - (1 - mort_rates[y - start_year, :-1]) * pop_t[:-1] + ) / pop_t[1:] + + imm_rates_2D[y - start_year, :] = imm_rates + + # Create plots if needed + if graph: + if plot_path: + pp.plot_imm_rates( + imm_rates_2D, + start_year, + [start_year, end_year], + path=plot_path, + ) + return imm_rates_2D + else: + fig = pp.plot_imm_rates( + imm_rates_2D, + start_year, + [start_year, end_year], + ) + return imm_rates_2D, fig + else: + return imm_rates_2D + + +def immsolve(imm_rates, *args): + """ + This function generates a vector of errors representing the + difference in two consecutive periods stationary population + distributions. This vector of differences is the zero-function + objective used to solve for the immigration rates vector, similar to + the original immigration rates vector from get_imm_rates(), that + sets the steady-state population distribution by age equal to the + population distribution in period int(1.5*S) + + Args: + imm_rates (Numpy array):immigration rates that correspond to + each period of life, length E+S + args (tuple): (fert_rates, mort_rates, infmort_rates, omega_cur, + g_n_SS) + + Returns: + omega_errs (Numpy array): difference between omega_new and + omega_cur_pct, length E+S + + """ + fert_rates, mort_rates, infmort_rates, omega_cur_lev, g_n_SS = args + omega_cur_pct = omega_cur_lev / omega_cur_lev.sum() + totpers = len(fert_rates) + OMEGA = np.zeros((totpers, totpers)) + OMEGA[0, :] = (1 - infmort_rates) * fert_rates + np.hstack( + (imm_rates[0], np.zeros(totpers - 1)) + ) + OMEGA[1:, :-1] += np.diag(1 - mort_rates[:-1]) + OMEGA[1:, 1:] += np.diag(imm_rates[1:]) + omega_new = np.dot(OMEGA, omega_cur_pct) / (1 + g_n_SS) + omega_errs = omega_new - omega_cur_pct + + return omega_errs + + +def get_pop_objs( + E=20, + S=80, + T=320, + min_age=0, + max_age=99, + fert_rates=None, + mort_rates=None, + infmort_rates=None, + imm_rates=None, + pop_dist=None, + pre_pop_dist=None, + country_id=UN_COUNTRY_CODE, + initial_data_year=START_YEAR - 1, + final_data_year=START_YEAR + 2, # as default data year goes until T1 + GraphDiag=True, +): + """ + This function produces the demographics objects to be used in the + OG-USA model package. + + Args: + E (int): number of model periods in which agent is not + economically active, >= 1 + S (int): number of model periods in which agent is economically + active, >= 3 + T (int): number of periods to be simulated in TPI, > 2*S + min_age (int): age in years at which agents are born, >= 0 + max_age (int): age in years at which agents die with certainty, + >= 4, < 100 (max age in UN data is 99, 100+ i same group) + fert_rates (array_like): user provided fertility rates, dimensions + are T0 x E+S + mort_rates (array_like): user provided mortality rates, dimensions + are T0 x E+S + infmort_rates (array_like): user provided infant mortality rates, + length T0 + imm_rates (array_like): user provided immigration rates, dimensions + are T0 x E+S + pop_dist (array_like): user provided population distribution, + dimensions are T0+1 x E+S + pre_pop_dist (array_like): user provided population distribution + for the year before the initial year for calibration, + length E+S + country_id (str): country id for UN data + initial_data_year (int): initial year of data to use + (not relevant if have user provided data) + final_data_year (int): final year of data to use, + T0=intial_year-final_year + 1 + pop_dist (array_like): user provided population distribution, last + dimension is of length E+S + GraphDiag (bool): =True if want graphical output and printed + diagnostics + + Returns: + pop_dict (dict): includes: + omega_path_S (Numpy array), time path of the population + distribution from the current state to the steady-state, + size T+S x S + g_n_SS (scalar): steady-state population growth rate + omega_SS (Numpy array): normalized steady-state population + distribution, length S + surv_rates (Numpy array): survival rates that correspond to + each model period of life, length S + mort_rates (Numpy array): mortality rates that correspond to + each model period of life, length S + g_n_path (Numpy array): population growth rates over the time + path, length T + S + + """ + # TODO: this function does not generalize with T. + # It assumes one model period is equal to one calendar year in the + # time dimesion (it does adjust for S, however) + T0 = ( + final_data_year - initial_data_year + 1 + ) # number of periods until constant fertility and mortality rates + print( + "Demographics data: Initial Data year = ", + initial_data_year, + ", Final Data year = ", + final_data_year, + ) + assert E + S <= max_age - min_age + 1 + assert initial_data_year >= 2011 and initial_data_year <= 2100 + assert final_data_year >= 2011 and final_data_year <= 2100 + # Ensure that the last year of data used is before SS transition assumed + # Really, it will need to be well before this + assert final_data_year > initial_data_year + assert final_data_year < initial_data_year + T + assert ( + T > 2 * T0 + ) # ensure time path 2x as long as allows rates to fluctuate + + # Get fertility rates if not provided + if fert_rates is None: + # get fert rates from UN data from initial year to data year + fert_rates = get_fert( + E + S, + min_age, + max_age, + country_id, + initial_data_year, + final_data_year, + ) + else: + # ensure that user provided fert_rates are of the correct shape + assert fert_rates.shape[0] == T0 + assert fert_rates.shape[-1] == E + S + # Extrapolate fertility rates for the rest of the transition path + # the implicit assumption is that they are constant after the + # last year of UN or user provided data + fert_rates = np.concatenate( + ( + fert_rates, + np.tile( + fert_rates[-1, :].reshape(1, E + S), + (T - fert_rates.shape[0], 1), + ), + ), + axis=0, + ) + # Get mortality rates if not provided + if mort_rates is None: + # get mort rates from UN data from initial year to data year + mort_rates, infmort_rates = get_mort( + E + S, + min_age, + max_age, + country_id, + initial_data_year, + final_data_year, + ) + else: + # ensure that user provided mort_rates are of the correct shape + assert mort_rates.shape[0] == T0 + assert mort_rates.shape[-1] == E + S + assert infmort_rates is not None + assert infmort_rates.shape[0] == mort_rates.shape[0] + # Extrapolate mortality rates for the rest of the transition path + # the implicit assumption is that they are constant after the + # last year of UN or user provided data + mort_rates = np.concatenate( + ( + mort_rates, + np.tile( + mort_rates[-1, :].reshape(1, E + S), + (T - mort_rates.shape[0], 1), + ), + ), + axis=0, + ) + infmort_rates = np.concatenate( + ( + infmort_rates, + np.tile(infmort_rates[-1], (T - infmort_rates.shape[0])), + ) + ) + mort_rates_S = mort_rates[:, E:] + # Get population distribution if not provided + if pop_dist is None: + pop_2D, pre_pop = get_pop( + E, + S, + min_age, + max_age, + country_id, + initial_data_year, + final_data_year, + ) + else: + # Check first dims of pop_dist as input by user + assert pop_dist.shape[0] == T0 + 1 # population needs to be + # one year longer in order to find immigration rates + assert pop_dist.shape[-1] == E + S + # Check that pre_pop specified + assert pre_pop_dist is not None + assert pre_pop_dist.shape[0] == pop_dist.shape[1] + pre_pop = pre_pop_dist + # Create 2D array of population distribution + pop_2D = np.zeros((T0 + 1, E + S)) + for t in range(T0 + 1): + pop_EpS = pop_rebin(pop_dist[t, :], E + S) + pop_2D[t, :] = pop_EpS + # Get percentage distribution for S periods for pre-TP period + pre_pop_EpS = pop_rebin(pre_pop, E + S) + # Get immigration rates if not provided + if imm_rates is None: + imm_rates_orig = get_imm_rates( + E + S, + min_age, + max_age, + fert_rates, + mort_rates, + infmort_rates, + pop_2D, + country_id, + initial_data_year, + final_data_year, + ) + else: + # ensure that user provided imm_rates are of the correct shape + assert imm_rates.shape[0] == T0 + assert imm_rates.shape[-1] == E + S + imm_rates_orig = imm_rates + # Extrapolate immigration rates for the rest of the transition path + # the implicit assumption is that they are constant after the + # last year of UN or user provided data + imm_rates_orig = np.concatenate( + ( + imm_rates_orig, + np.tile( + imm_rates_orig[-1, :].reshape(1, E + S), + (T - imm_rates_orig.shape[0], 1), + ), + ), + axis=0, + ) + # If the population distribution was given, check it for consistency + # with the fertility, mortality, and immigration rates + if pop_dist is not None: + len_pop_dist = pop_dist.shape[0] + pop_counter_2D = np.zeros((len_pop_dist, E + S)) + # set initial population distribution in the counterfactual to + # the first year of the user provided distribution + pop_counter_2D[0, :] = pop_dist[0, :] + for t in range(1, len_pop_dist): + # find newborns next period + newborns = np.dot(fert_rates[t - 1, :], pop_counter_2D[t - 1, :]) + + pop_counter_2D[t, 0] = ( + 1 - infmort_rates[t - 1] + ) * newborns + imm_rates[t - 1, 0] * pop_counter_2D[t - 1, 0] + pop_counter_2D[t, 1:] = ( + pop_counter_2D[t - 1, :-1] * (1 - mort_rates[t - 1, :-1]) + + pop_counter_2D[t - 1, 1:] * imm_rates_orig[t - 1, 1:] + ) + # Check that counterfactual pop dist is close to pop dist given + assert np.allclose(pop_counter_2D, pop_dist) + # Create the transition matrix for the population distribution + # from T0 going forward (i.e., past when we have data on forecasts) + OMEGA_orig = np.zeros((E + S, E + S)) + OMEGA_orig[0, :] = (1 - infmort_rates[-1]) * fert_rates[-1, :] + np.hstack( + (imm_rates_orig[-1, 0], np.zeros(E + S - 1)) + ) + OMEGA_orig[1:, :-1] += np.diag(1 - mort_rates[-1, :-1]) + OMEGA_orig[1:, 1:] += np.diag(imm_rates_orig[-1, 1:]) + + # Solve for steady-state population growth rate and steady-state + # population distribution by age using eigenvalue and eigenvector + # decomposition + eigvalues, eigvectors = np.linalg.eig(OMEGA_orig) + g_n_SS = (eigvalues[np.isreal(eigvalues)].real).max() - 1 + eigvec_raw = eigvectors[ + :, (eigvalues[np.isreal(eigvalues)].real).argmax() + ].real + omega_SS_orig = eigvec_raw / eigvec_raw.sum() + + # Generate time path of the population distribution after final + # year of data + omega_path_lev = np.zeros((T + S, E + S)) + pop_curr = pop_2D[T0 - 1, :] + omega_path_lev[:T0, :] = pop_2D[:T0, :] + for per in range(T0, T + S): + pop_next = np.dot(OMEGA_orig, pop_curr) + omega_path_lev[per, :] = pop_next.copy() + pop_curr = pop_next.copy() + + # Force the population distribution after 1.5*S periods to be the + # steady-state distribution by adjusting immigration rates, holding + # constant mortality, fertility, and SS growth rates + imm_tol = 1e-14 + fixper = int(1.5 * S + T0) + omega_SSfx = omega_path_lev[fixper, :] / omega_path_lev[fixper, :].sum() + imm_objs = ( + fert_rates[fixper, :], + mort_rates[fixper, :], + infmort_rates[fixper], + omega_path_lev[fixper, :], + g_n_SS, + ) + imm_fulloutput = opt.fsolve( + immsolve, + imm_rates_orig[fixper, :], + args=(imm_objs), + full_output=True, + xtol=imm_tol, + ) + imm_rates_adj = imm_fulloutput[0] + imm_diagdict = imm_fulloutput[1] + omega_path_S = omega_path_lev[:, -S:] / ( + omega_path_lev[:, -S:].sum(axis=1).reshape((T + S, 1)) + ) + omega_path_S[fixper:, :] = np.tile( + omega_path_S[fixper, :].reshape((1, S)), (T + S - fixper, 1) + ) + g_n_path = np.zeros(T + S) + g_n_path[1:] = ( + omega_path_lev[1:, -S:].sum(axis=1) + - omega_path_lev[:-1, -S:].sum(axis=1) + ) / omega_path_lev[:-1, -S:].sum(axis=1) + g_n_path[0] = ( + omega_path_lev[0, -S:].sum() - pre_pop_EpS[-S:].sum() + ) / pre_pop_EpS[-S:].sum() + g_n_path[fixper + 1 :] = g_n_SS + omega_S_preTP = pre_pop_EpS[-S:] / pre_pop_EpS[-S:].sum() + imm_rates_mat = np.concatenate( + ( + imm_rates_orig[:fixper, E:], + np.tile( + imm_rates_adj[E:].reshape(1, S), + (T - fixper, 1), + ), + ), + axis=0, + ) + + if GraphDiag: + # Check whether original SS population distribution is close to + # the period-T population distribution + omegaSSmaxdif = np.absolute( + omega_SS_orig - (omega_path_lev[T, :] / omega_path_lev[T, :].sum()) + ).max() + if omegaSSmaxdif > 0.0003: + print( + "POP. WARNING: Max. abs. dist. between original SS " + + "pop. dist'n and period-T pop. dist'n is greater than" + + " 0.0003. It is " + + str(omegaSSmaxdif) + + "." + ) + else: + print( + "POP. SUCCESS: orig. SS pop. dist is very close to " + + "period-T pop. dist'n. The maximum absolute " + + "difference is " + + str(omegaSSmaxdif) + + "." + ) + + # Plot the adjusted steady-state population distribution versus + # the original population distribution. The difference should be + # small + omegaSSvTmaxdiff = np.absolute(omega_SS_orig - omega_SSfx).max() + if omegaSSvTmaxdiff > 0.0003: + print( + "POP. WARNING: The maximum absolute difference " + + "between any two corresponding points in the original" + + " and adjusted steady-state population " + + "distributions is" + + str(omegaSSvTmaxdiff) + + ", " + + "which is greater than 0.0003." + ) + else: + print( + "POP. SUCCESS: The maximum absolute difference " + + "between any two corresponding points in the original" + + " and adjusted steady-state population " + + "distributions is " + + str(omegaSSvTmaxdiff) + ) + + # Print whether or not the adjusted immigration rates solved the + # zero condition + immtol_solved = np.absolute(imm_diagdict["fvec"].max()) < imm_tol + if immtol_solved: + print( + "POP. SUCCESS: Adjusted immigration rates solved " + + "with maximum absolute error of " + + str(np.absolute(imm_diagdict["fvec"].max())) + + ", which is less than the tolerance of " + + str(imm_tol) + ) + else: + print( + "POP. WARNING: Adjusted immigration rates did not " + + "solve. Maximum absolute error of " + + str(np.absolute(imm_diagdict["fvec"].max())) + + " is greater than the tolerance of " + + str(imm_tol) + ) + + # Test whether the steady-state growth rates implied by the + # adjusted OMEGA matrix equals the steady-state growth rate of + # the original OMEGA matrix + OMEGA2 = np.zeros((E + S, E + S)) + OMEGA2[0, :] = (1 - infmort_rates[-1]) * fert_rates[-1, :] + np.hstack( + (imm_rates_adj[0], np.zeros(E + S - 1)) + ) + OMEGA2[1:, :-1] += np.diag(1 - mort_rates[-1, :-1]) + OMEGA2[1:, 1:] += np.diag(imm_rates_adj[1:]) + eigvalues2, eigvectors2 = np.linalg.eig(OMEGA2) + g_n_SS_adj = (eigvalues[np.isreal(eigvalues2)].real).max() - 1 + if np.max(np.absolute(g_n_SS_adj - g_n_SS)) > 10 ** (-8): + print( + "FAILURE: The steady-state population growth rate" + + " from adjusted OMEGA is different (diff is " + + str(g_n_SS_adj - g_n_SS) + + ") than the steady-" + + "state population growth rate from the original" + + " OMEGA." + ) + elif np.max(np.absolute(g_n_SS_adj - g_n_SS)) <= 10 ** (-8): + print( + "SUCCESS: The steady-state population growth rate" + + " from adjusted OMEGA is close to (diff is " + + str(g_n_SS_adj - g_n_SS) + + ") the steady-" + + "state population growth rate from the original" + + " OMEGA." + ) + + # Do another test of the adjusted immigration rates. Create the + # new OMEGA matrix implied by the new immigration rates. Plug in + # the adjusted steady-state population distribution. Hit is with + # the new OMEGA transition matrix and it should return the new + # steady-state population distribution + omega_new = np.dot(OMEGA2, omega_SSfx) + omega_errs = np.absolute(omega_new - omega_SSfx) + print( + "The maximum absolute difference between the adjusted " + + "steady-state population distribution and the " + + "distribution generated by hitting the adjusted OMEGA " + + "transition matrix is " + + str(omega_errs.max()) + ) + + # Plot the original immigration rates versus the adjusted + # immigration rates + immratesmaxdiff = np.absolute(imm_rates_orig - imm_rates_adj).max() + print( + "The maximum absolute distance between any two points " + + "of the original immigration rates and adjusted " + + "immigration rates is " + + str(immratesmaxdiff) + ) + + # plots + age_per_EpS = np.arange(1, E + S + 1) + pp.plot_omega_fixed( + age_per_EpS, omega_SS_orig, omega_SSfx, E, S, path=OUTPUT_DIR + ) + pp.plot_imm_fixed( + age_per_EpS, + imm_rates_orig[fixper - 1, :], + imm_rates_adj, + E, + S, + path=OUTPUT_DIR, + ) + pp.plot_population_path( + age_per_EpS, + omega_path_lev, + omega_SSfx, + initial_data_year, + initial_data_year, + initial_data_year, + S, + path=OUTPUT_DIR, + ) + + # return omega_path_S, g_n_SS, omega_SSfx, survival rates, + # mort_rates_S, and g_n_path + pop_dict = { + "omega": omega_path_S, + "g_n_ss": g_n_SS, + "omega_SS": omega_SSfx[-S:] / omega_SSfx[-S:].sum(), + "rho": [mort_rates_S], + "g_n": g_n_path, + "imm_rates": imm_rates_mat, + "omega_S_preTP": omega_S_preTP, + } + + return pop_dict diff --git a/ogcore/elliptical_u_est.py b/ogcore/elliptical_u_est.py index ad541405b..2779f4166 100644 --- a/ogcore/elliptical_u_est.py +++ b/ogcore/elliptical_u_est.py @@ -5,6 +5,7 @@ constant Frisch elasticity function with the input Frisch elasticity. ------------------------------------------------------------------------ """ + # Import packages import numpy as np import scipy.optimize as opt diff --git a/ogcore/execute.py b/ogcore/execute.py index 5bcadc6c0..f05e480c5 100644 --- a/ogcore/execute.py +++ b/ogcore/execute.py @@ -1,6 +1,7 @@ """ This module defines the runner() function, which is used to run OG-Core """ + import pickle import cloudpickle import os diff --git a/ogcore/firm.py b/ogcore/firm.py index d134f5528..5d454f679 100644 --- a/ogcore/firm.py +++ b/ogcore/firm.py @@ -96,12 +96,7 @@ def get_Y(K, K_g, L, p, method, m=-1): * (L ** ((epsilon - 1) / epsilon)) ) ) ** (epsilon / (epsilon - 1)) - Y2 = ( - Z - * (K**gamma) - * (K_g**gamma_g) - * (L ** (1 - gamma - gamma_g)) - ) + Y2 = Z * (K**gamma) * (K_g**gamma_g) * (L ** (1 - gamma - gamma_g)) Y[epsilon == 1] = Y2[epsilon == 1] else: # TPI case if m is not None: @@ -157,12 +152,7 @@ def get_Y(K, K_g, L, p, method, m=-1): * (L ** ((epsilon - 1) / epsilon)) ) ) ** (epsilon / (epsilon - 1)) - Y2 = ( - Z - * (K**gamma) - * (K_g**gamma_g) - * (L ** (1 - gamma - gamma_g)) - ) + Y2 = Z * (K**gamma) * (K_g**gamma_g) * (L ** (1 - gamma - gamma_g)) Y[:, epsilon == 1] = Y2[:, epsilon == 1] return Y @@ -463,9 +453,7 @@ def get_L_from_Y(w, Y, p, method): Z = p.Z[-1] else: Z = p.Z[: p.T] - L = ((1 - p.gamma - p.gamma_g) * Z ** (p.epsilon - 1) * Y) / ( - w**p.epsilon - ) + L = ((1 - p.gamma - p.gamma_g) * Z ** (p.epsilon - 1) * Y) / (w**p.epsilon) return L @@ -666,9 +654,7 @@ def solve_L(Y, K, K_g, p, method, m=-1): K_g[K_g == 0] = 1.0 gamma_g = 0 if epsilon == 1.0: - L = (Y / (Z * K**gamma * K_g**gamma_g)) ** ( - 1 / (1 - gamma - gamma_g) - ) + L = (Y / (Z * K**gamma * K_g**gamma_g)) ** (1 / (1 - gamma - gamma_g)) else: L = ( ( diff --git a/ogcore/parameter_plots.py b/ogcore/parameter_plots.py index a5c79277e..df188f652 100644 --- a/ogcore/parameter_plots.py +++ b/ogcore/parameter_plots.py @@ -3,41 +3,64 @@ import os import matplotlib.pyplot as plt import matplotlib +from cycler import cycler from ogcore.constants import GROUP_LABELS from ogcore import utils, txfunc from ogcore.constants import DEFAULT_START_YEAR, VAR_LABELS -def plot_imm_rates(p, year=DEFAULT_START_YEAR, include_title=False, path=None): +def plot_imm_rates( + imm_rates, + start_year=DEFAULT_START_YEAR, + years_to_plot=[DEFAULT_START_YEAR], + include_title=False, + source="United Nations, World Population Prospects", + path=None, +): """ - Create a plot of immigration rates from OG-Core parameterization. + Plot fertility rates from the data Args: - p (OG-Core Specifications class): parameters object - year (integer): year of mortality ratese to plot - include_title (bool): whether to include a title in the plot - path (string): path to save figure to + imm_rates (NumPy array): immigration rates for each of + totpers + start_year (int): first year of data + years_to_plot (list): list of years to plot + source (str): data source for fertility rates + path (str): path to save figure to, if None then figure + is returned Returns: - fig (Matplotlib plot object): plot of immigration rates + fig (Matplotlib plot object): plot of fertility rates """ - assert isinstance(year, int) - age_per = np.linspace(p.E, p.E + p.S, p.S) + # create line styles to cycle through + plt.rc("axes", prop_cycle=(cycler("linestyle", [":", "-.", "-", "--"]))) fig, ax = plt.subplots() - plt.scatter(age_per, p.imm_rates[year - p.start_year, :], s=40, marker="d") - plt.plot(age_per, p.imm_rates[year - p.start_year, :]) - plt.xlabel(r"Age $s$ (model periods)") - plt.ylabel(r"Imm. rate $i_{s}$") - vals = ax.get_yticks() - ax.set_yticklabels(["{:,.2%}".format(x) for x in vals]) + for y in years_to_plot: + i = start_year - y + plt.plot(imm_rates[i, :], c="blue", label="Year " + str(y)) + # plt.title('Fertility rates by age ($f_{s}$)', + # fontsize=20) + plt.xlabel(r"Age $s$") + plt.ylabel(r"Immigration rate $i_{s}$") + plt.legend(loc="upper left") + plt.text( + -5, + -0.023, + "Source: " + source, + fontsize=9, + ) + plt.tight_layout(rect=(0, 0.035, 1, 1)) if include_title: - plt.title("Immigration Rates in " + str(year)) - if path is None: - return fig + plt.title("Immigration Rates") + # Save or return figure + if path: + output_path = os.path.join(path, "imm_rates") + plt.savefig(output_path, dpi=300) + plt.close() else: - fig_path = os.path.join(path, "imm_rates_orig") - plt.savefig(fig_path, dpi=300) + fig.show() + return fig def plot_mort_rates( @@ -271,60 +294,35 @@ def plot_chi_n(p, include_title=False, path=None): def plot_fert_rates( - fert_func, - age_midp, - totpers, - min_yr, - max_yr, - fert_data, fert_rates, - output_dir=None, + start_year=DEFAULT_START_YEAR, + years_to_plot=[DEFAULT_START_YEAR], + source="United Nations, World Population Prospects", + path=None, ): """ - Plot fertility rates from the data along with smoothed function to - use for model fertility rates. + Plot fertility rates from the data Args: - fert_func (Scipy interpolation object): interpolated fertility - rates - age_midp (NumPy array): midpoint of age for each age group in - data - totpers (int): total number of agent life periods (E+S), >= 3 - min_yr (int): age in years at which agents are born, >= 0 - max_yr (int): age in years at which agents die with certainty, - >= 4 - fert_data (NumPy array): fertility rates by age group from data - fert_rates (NumPy array): fitted fertility rates for each of + fert_rates (NumPy array): fertility rates for each of totpers - output_dir (str): path to save figure to, if None then figure + start_year (int): first year of data + years_to_plot (list): list of years to plot + source (str): data source for fertility rates + path (str): path to save figure to, if None then figure is returned Returns: fig (Matplotlib plot object): plot of fertility rates """ - # Generate finer age vector and fertility rate vector for - # graphing cubic spline interpolating function - age_fine_pred = np.linspace(age_midp[0], age_midp[-1], 300) - fert_fine_pred = fert_func(age_fine_pred) - age_fine = np.hstack((min_yr, age_fine_pred, max_yr)) - fert_fine = np.hstack((0, fert_fine_pred, 0)) - age_mid_new = np.linspace( - np.float64(max_yr) / totpers, max_yr, totpers - ) - (0.5 * np.float64(max_yr) / totpers) - + # create line styles to cycle through + plt.rc("axes", prop_cycle=(cycler("linestyle", [":", "-.", "-", "--"]))) fig, ax = plt.subplots() - plt.scatter(age_midp, fert_data, s=70, c="blue", marker="o", label="Data") - plt.scatter( - age_mid_new, - fert_rates, - s=40, - c="red", - marker="d", - label="Model period (integrated)", - ) - plt.plot(age_fine, fert_fine, label="Cubic spline") - # plt.title('Fitted fertility rate function by age ($f_{s}$)', + for y in years_to_plot: + i = start_year - y + plt.plot(fert_rates[i, :], c="blue", label="Year " + str(y)) + # plt.title('Fertility rates by age ($f_{s}$)', # fontsize=20) plt.xlabel(r"Age $s$") plt.ylabel(r"Fertility rate $f_{s}$") @@ -332,102 +330,72 @@ def plot_fert_rates( plt.text( -5, -0.023, - "Source: National Vital Statistics Reports, " - + "Volume 64, Number 1, January 15, 2015.", + "Source: " + source, fontsize=9, ) plt.tight_layout(rect=(0, 0.035, 1, 1)) # Save or return figure - if output_dir: - output_path = os.path.join(output_dir, "fert_rates") + if path: + output_path = os.path.join(path, "fert_rates") plt.savefig(output_path, dpi=300) plt.close() else: + fig.show() return fig def plot_mort_rates_data( - totpers, - min_yr, - max_yr, - age_year_all, - mort_rates_all, - infmort_rate, mort_rates, - output_dir=None, + start_year=DEFAULT_START_YEAR, + years_to_plot=[DEFAULT_START_YEAR], + source="United Nations, World Population Prospects", + path=None, ): """ - Plots mortality rates from the model and data. + Plots mortality rates from the data. Args: - totpers (int): total number of agent life periods (E+S), >= 3 - min_yr (int): age in years at which agents are born, >= 0 - max_yr (int): age in years at which agents die with certainty, - >= 4 - age_year_all (array_like): ages in mortality rate data - mort_rates_all (array_like): mortality rates by age from data, - average across males and females - infmort_rate (scalar): infant mortality rate - mort_rates (array_like): fitted mortality rates for each of + mort_rates (array_like): mortality rates for each of totpers - output_dir (str): path to save figure to, if None then figure + start_year (int): first year of data + years_to_plot (list): list of years to plot + source (str): data source for fertility rates + path (str): path to save figure to, if None then figure is returned Returns: fig (Matplotlib plot object): plot of mortality rates """ - age_mid_new = np.linspace( - np.float64(max_yr) / totpers, max_yr, totpers - ) - (0.5 * np.float64(max_yr) / totpers) + # create line styles to cycle through + plt.rc("axes", prop_cycle=(cycler("linestyle", [":", "-.", "-", "--"]))) fig, ax = plt.subplots() - plt.scatter( - np.hstack([0, age_year_all]), - np.hstack([infmort_rate, mort_rates_all]), - s=20, - c="blue", - marker="o", - label="Data", - ) - plt.scatter( - np.hstack([0, age_mid_new]), - np.hstack([infmort_rate, mort_rates]), - s=40, - c="red", - marker="d", - label="Model period (cumulative)", - ) - plt.plot( - np.hstack([0, age_year_all[min_yr - 1 : max_yr]]), - np.hstack([infmort_rate, mort_rates_all[min_yr - 1 : max_yr]]), - ) - plt.axvline(x=max_yr, color="red", linestyle="-", linewidth=1) - plt.grid(visible=True, which="major", color="0.65", linestyle="-") - # plt.title('Fitted mortality rate function by age ($rho_{s}$)', + for y in years_to_plot: + i = start_year - y + plt.plot(mort_rates[i, :], c="blue", label="Year " + str(y)) + # plt.title('Fertility rates by age ($f_{s}$)', # fontsize=20) plt.xlabel(r"Age $s$") - plt.ylabel(r"Mortality rate $\rho_{s}$") + plt.ylabel(r"Mortality rate $rho_{s}$") plt.legend(loc="upper left") plt.text( -5, - -0.2, - "Source: Actuarial Life table, 2011 Social Security " - + "Administration.", + -0.223, + "Source: " + source, fontsize=9, ) - plt.tight_layout(rect=(0, 0.03, 1, 1)) + plt.tight_layout(rect=(0, 0.035, 1, 1)) # Save or return figure - if output_dir: - output_path = os.path.join(output_dir, "mort_rates") + if path: + output_path = os.path.join(path, "mort_rates") plt.savefig(output_path, dpi=300) plt.close() else: + fig.show() return fig -def plot_omega_fixed( - age_per_EpS, omega_SS_orig, omega_SSfx, E, S, output_dir=None -): +def plot_omega_fixed(age_per_EpS, omega_SS_orig, omega_SSfx, E, S, path=None): """ Plot the steady-state population distribution implied by the data on fertility and mortality rates versus the the steady-state @@ -444,7 +412,7 @@ def plot_omega_fixed( after adjustment to immigration rates E (int): age at which household becomes economically active S (int): number of years which household is economically active - output_dir (str): path to save figure to, if None then figure + path (str): path to save figure to, if None then figure is returned Returns: @@ -461,8 +429,8 @@ def plot_omega_fixed( plt.xlim((0, E + S + 1)) plt.legend(loc="upper right") # Save or return figure - if output_dir: - output_path = os.path.join(output_dir, "OrigVsFixSSpop") + if path: + output_path = os.path.join(path, "OrigVsFixSSpop") plt.savefig(output_path, dpi=300) plt.close() else: @@ -470,7 +438,7 @@ def plot_omega_fixed( def plot_imm_fixed( - age_per_EpS, imm_rates_orig, imm_rates_adj, E, S, output_dir=None + age_per_EpS, imm_rates_orig, imm_rates_adj, E, S, path=None ): """ Plot the immigration rates implied by the data on population, @@ -485,7 +453,7 @@ def plot_imm_fixed( imm_rates_adj (Numpy array): adjusted immigration rates by age E (int): age at which household becomes economically active S (int): number of years which household is economically active - output_dir (str): path to save figure to, if None then figure + path (str): path to save figure to, if None then figure is returned Returns: @@ -502,8 +470,8 @@ def plot_imm_fixed( plt.xlim((0, E + S + 1)) plt.legend(loc="upper center") # Save or return figure - if output_dir: - output_path = os.path.join(output_dir, "OrigVsAdjImm") + if path: + output_path = os.path.join(path, "OrigVsAdjImm") plt.savefig(output_path, dpi=300) plt.close() else: @@ -512,13 +480,13 @@ def plot_imm_fixed( def plot_population_path( age_per_EpS, - initial_pop_pct, omega_path_lev, omega_SSfx, - data_year, - curr_year, + start_year, + year1, + year2, S, - output_dir=None, + path=None, ): """ Plot the distribution of the population over age for various years. @@ -526,15 +494,17 @@ def plot_population_path( Args: age_per_EpS (array_like): list of ages over which to plot population distribution - pop_2013_pct (array_like): population distribution in 2013 + initial_pop_pct (array_like): initial year population distribution omega_path_lev (Numpy array): number of households by age over the transition path omega_SSfx (Numpy array): number of households by age in the SS - data_year (int): year of data for initial_pop_pct - curr_year (int): current year in the model + start_year (int): first year of data (so can get index of year1 + and year2) + year1 (int): first year of data to plot + year2 (int): second year of data to plot S (int): number of years which household is economically active - output_dir (str): path to save figure to, if None then figure + path (str): path to save figure to, if None then figure is returned Returns: @@ -543,23 +513,33 @@ def plot_population_path( """ fig, ax = plt.subplots() - plt.plot(age_per_EpS, initial_pop_pct, label=str(data_year) + " pop.") plt.plot( age_per_EpS, - (omega_path_lev[:, 0] / omega_path_lev[:, 0].sum()), - label=str(curr_year) + " pop.", + ( + omega_path_lev[start_year - year1, :] + / omega_path_lev[start_year - year1, :].sum() + ), + label=str(year1) + " pop.", + ) + plt.plot( + age_per_EpS, + ( + omega_path_lev[start_year - year2, :] + / omega_path_lev[start_year - year2, :].sum() + ), + label=str(year2) + " pop.", ) plt.plot( age_per_EpS, ( - omega_path_lev[:, int(0.5 * S)] - / omega_path_lev[:, int(0.5 * S)].sum() + omega_path_lev[int(0.5 * S), :] + / omega_path_lev[int(0.5 * S), :].sum() ), label="T=" + str(int(0.5 * S)) + " pop.", ) plt.plot( age_per_EpS, - (omega_path_lev[:, int(S)] / omega_path_lev[:, int(S)].sum()), + (omega_path_lev[int(S), :] / omega_path_lev[int(S), :].sum()), label="T=" + str(int(S)) + " pop.", ) plt.plot(age_per_EpS, omega_SSfx, label="Adj. SS pop.") @@ -568,8 +548,8 @@ def plot_population_path( plt.ylabel(r"Pop. dist'n $\omega_{s}$") plt.legend(loc="lower left") # Save or return figure - if output_dir: - output_path = os.path.join(output_dir, "PopDistPath") + if path: + output_path = os.path.join(path, "PopDistPath") plt.savefig(output_path, dpi=300) plt.close() else: @@ -587,7 +567,7 @@ def gen_3Dscatters_hist(df, s, t, output_dir): rates s (int): age of individual, >= 21 t (int): year of analysis, >= 2016 - output_dir (str): output directory for saving plot files + path (str): output directory for saving plot files Returns: None @@ -722,7 +702,7 @@ def txfunc_graph( tax_func_type (str): functional form of tax functions params_to_plot (array_like or function): tax function parameters or nonparametric function - output_dir (str): output directory for saving plot files + path (str): output directory for saving plot files Returns: None @@ -837,7 +817,7 @@ def txfunc_sse_plot(age_vec, sse_mat, start_year, varstr, output_dir, round): size is BW x S start_year (int): first year of budget window varstr (str): name of tax function being evaluated - output_dir (str): path to save graph to + path (str): path to save graph to round (int): which round of sweeping for outliers (0, 1, or 2) Returns: @@ -866,7 +846,7 @@ def txfunc_sse_plot(age_vec, sse_mat, start_year, varstr, output_dir, round): def plot_income_data( - ages, abil_midp, abil_pcts, emat, t=None, output_dir=None, filesuffix="" + ages, abil_midp, abil_pcts, emat, t=None, path=None, filesuffix="" ): """ This function graphs ability matrix in 3D, 2D, log, and nolog @@ -891,15 +871,15 @@ def plot_income_data( J = abil_midp.shape[0] abil_mesh, age_mesh = np.meshgrid(abil_midp, ages) cmap1 = matplotlib.cm.get_cmap("summer") - if output_dir: + if path: # Make sure that directory is created - utils.mkdirs(output_dir) + utils.mkdirs(path) if J == 1: # Plot of 2D, J=1 in levels plt.figure() plt.plot(ages, emat[t, :, :]) filename = "ability_2D_lev" + filesuffix - fullpath = os.path.join(output_dir, filename) + fullpath = os.path.join(path, filename) plt.savefig(fullpath, dpi=300) plt.close() @@ -907,7 +887,7 @@ def plot_income_data( plt.figure() plt.plot(ages, np.log(emat[t, :, :])) filename = "ability_2D_log" + filesuffix - fullpath = os.path.join(output_dir, filename) + fullpath = os.path.join(path, filename) plt.savefig(fullpath, dpi=300) plt.close() else: @@ -925,7 +905,7 @@ def plot_income_data( ax10.set_ylabel(r"ability type -$j$") ax10.set_zlabel(r"ability $e_{j,s}$") filename = "ability_3D_lev" + filesuffix - fullpath = os.path.join(output_dir, filename) + fullpath = os.path.join(path, filename) plt.savefig(fullpath, dpi=300) plt.close() @@ -943,7 +923,7 @@ def plot_income_data( ax11.set_ylabel(r"ability type -$j$") ax11.set_zlabel(r"log ability $log(e_{j,s})$") filename = "ability_3D_log" + filesuffix - fullpath = os.path.join(output_dir, filename) + fullpath = os.path.join(path, filename) plt.savefig(fullpath, dpi=300) plt.close() @@ -991,7 +971,7 @@ def plot_income_data( ax.set_xlabel(r"age-$s$") ax.set_ylabel(r"log ability $log(e_{j,s})$") filename = "ability_2D_log" + filesuffix - fullpath = os.path.join(output_dir, filename) + fullpath = os.path.join(path, filename) plt.savefig(fullpath, dpi=300) plt.close() else: diff --git a/ogcore/txfunc.py b/ogcore/txfunc.py index c58553773..42a568816 100644 --- a/ogcore/txfunc.py +++ b/ogcore/txfunc.py @@ -8,6 +8,7 @@ income. ------------------------------------------------------------------------ """ + # Import packages import time import os @@ -1307,15 +1308,15 @@ def tax_func_loop( (NoData_cnt, 1) ) * (x1_mtry - x0_mtry) for s_ind in range(NoData_cnt): - etrparam_list[ - s - NoData_cnt - min_age + s_ind - ] = lin_int_etr[s_ind, :] - mtrxparam_list[ - s - NoData_cnt - min_age + s_ind - ] = lin_int_mtrx[s_ind, :] - mtryparam_list[ - s - NoData_cnt - min_age + s_ind - ] = lin_int_mtry[s_ind, :] + etrparam_list[s - NoData_cnt - min_age + s_ind] = ( + lin_int_etr[s_ind, :] + ) + mtrxparam_list[s - NoData_cnt - min_age + s_ind] = ( + lin_int_mtrx[s_ind, :] + ) + mtryparam_list[s - NoData_cnt - min_age + s_ind] = ( + lin_int_mtry[s_ind, :] + ) elif ( (NoData_cnt > 0) diff --git a/ogcore/utils.py b/ogcore/utils.py index 2fe130ec1..0f65d95d9 100644 --- a/ogcore/utils.py +++ b/ogcore/utils.py @@ -11,6 +11,8 @@ import pickle from pkg_resources import resource_stream, Requirement import importlib.resources +import urllib3 +import ssl EPSILON = 1e-10 # tolerance or comparison functions @@ -1039,3 +1041,31 @@ def extrapolate_arrays(param_in, dims=None, item="Parameter Name"): ) return param_out + + +class CustomHttpAdapter(requests.adapters.HTTPAdapter): + """ + The UN Data Portal server doesn't support "RFC 5746 secure renegotiation". This causes and error when the client is using OpenSSL 3, which enforces that standard by default. + The fix is to create a custom SSL context that allows for legacy connections. This defines a function get_legacy_session() that should be used instead of requests(). + """ + + # "Transport adapter" that allows us to use custom ssl_context. + def __init__(self, ssl_context=None, **kwargs): + self.ssl_context = ssl_context + super().__init__(**kwargs) + + def init_poolmanager(self, connections, maxsize, block=False): + self.poolmanager = urllib3.poolmanager.PoolManager( + num_pools=connections, + maxsize=maxsize, + block=block, + ssl_context=self.ssl_context, + ) + + +def get_legacy_session(): + ctx = ssl.create_default_context(ssl.Purpose.SERVER_AUTH) + ctx.options |= 0x4 # OP_LEGACY_SERVER_CONNECT #in Python 3.12 you will be able to switch from 0x4 to ssl.OP_LEGACY_SERVER_CONNECT. + session = requests.session() + session.mount("https://", CustomHttpAdapter(ctx)) + return session diff --git a/tests/test_demographics.py b/tests/test_demographics.py new file mode 100644 index 000000000..c051d4b5e --- /dev/null +++ b/tests/test_demographics.py @@ -0,0 +1,382 @@ +import numpy as np +import pytest +from ogcore import demographics + + +def test_get_pop_objs(): + """ + Test of the that omega_SS and the last period of omega_path_S are + close to each other. + """ + E = 20 + S = 80 + T = int(round(4.0 * S)) + start_year = 2019 + + pop_dict = demographics.get_pop_objs( + E, + S, + T, + 0, + 99, + initial_data_year=start_year - 1, + final_data_year=start_year, + GraphDiag=False, + ) + + assert np.allclose(pop_dict["omega_SS"], pop_dict["omega"][-1, :]) + + +def test_pop_smooth(): + """ + Test that population distribution and pop growth rates evolve smoothly. + """ + E = 20 + S = 80 + T = int(round(4.0 * S)) + start_year = 2019 + fixper = int(1.5 * S) + pop_dict = demographics.get_pop_objs( + E, + S, + T, + 0, + 99, + initial_data_year=start_year - 1, + final_data_year=start_year, + country_id="840", + GraphDiag=False, + ) + + # assert diffs are small + # note that in the "fixper" we impost a jump in immigration rates + # to achieve the SS more quickly so the min dist is not super small + # in that period + assert np.all( + np.abs( + pop_dict["omega"][: fixper - 2, :] + - pop_dict["omega"][1 : fixper - 1, :] + ) + < 0.003 + ) + assert np.all( + np.abs( + pop_dict["omega"][fixper:-1, :] + - pop_dict["omega"][fixper + 1 :, :] + ) + < 0.0001 + ) + + +def test_pop_growth_smooth(): + """ + Test that population distribution and pop growth rates evolve smoothly. + """ + E = 20 + S = 80 + T = int(round(4.0 * S)) + start_year = 2019 + fixper = int(1.5 * S) + pop_dict = demographics.get_pop_objs( + E, + S, + T, + 0, + 99, + initial_data_year=start_year - 1, + final_data_year=start_year, + country_id="840", + GraphDiag=False, + ) + + # assert diffs are small + # note that in the "fixper" we impost a jump in immigration rates + # to achieve the SS more quickly so the min dist is not super small + # in that period + print("first few of g_n = ", pop_dict["g_n"][:5]) + assert np.all( + np.abs(pop_dict["g_n"][: fixper - 2] - pop_dict["g_n"][1 : fixper - 1]) + < 0.003 + ) + assert np.all( + np.abs(pop_dict["g_n"][fixper:-1] - pop_dict["g_n"][fixper + 1 :]) + < 0.003 + ) + + +def test_imm_smooth(): + """ + Test that immigration rates evolve smoothly. + """ + E = 20 + S = 80 + T = int(round(4.0 * S)) + start_year = 2019 + fixper = int(1.5 * S + 2) + pop_dict = demographics.get_pop_objs( + E, + S, + T, + 0, + 99, + initial_data_year=start_year - 1, + final_data_year=start_year, + GraphDiag=False, + ) + # assert diffs are small + # note that in the "fixper" we impost a jump in immigration rates + # to achieve the SS more quickly so the min dist is not super small + # in that period + print( + "Max diff before = ", + np.abs( + pop_dict["imm_rates"][: fixper - 2, :] + - pop_dict["imm_rates"][1 : fixper - 1, :] + ).max(), + ) + + print( + "Max diff after = ", + np.abs( + pop_dict["imm_rates"][fixper:-1, :] + - pop_dict["imm_rates"][fixper + 1 :, :] + ).max(), + ) + assert np.all( + np.abs( + pop_dict["imm_rates"][: fixper - 2, :] + - pop_dict["imm_rates"][1 : fixper - 1, :] + ) + < 0.03 + ) + assert np.all( + np.abs( + pop_dict["imm_rates"][fixper:-1, :] + - pop_dict["imm_rates"][fixper + 1 :, :] + ) + < 0.00001 + ) + + +def test_get_fert(): + """ + Test of function to get fertility rates from data + """ + S = 100 + fert_rates = demographics.get_fert(S, 0, 99, graph=False) + assert fert_rates.shape[1] == S + + +def test_get_mort(): + """ + Test of function to get mortality rates from data + """ + S = 100 + mort_rates, infmort_rate = demographics.get_mort(S, 0, 99, graph=False) + assert mort_rates.shape[1] == S + + +def test_infant_mort(): + """ + Test of function to get mortality rates from data + """ + mort_rates, infmort_rate = demographics.get_mort(100, 0, 99, graph=False) + # check that infant mortality equals rate hardcoded into + # demographics.py + assert infmort_rate == 0.00491958 + + +def test_pop_rebin(): + """ + Test of population rebin function + """ + curr_pop_dist = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]) + totpers_new = 5 + rebinned_data = demographics.pop_rebin(curr_pop_dist, totpers_new) + assert rebinned_data.shape[0] == totpers_new + + +def test_get_imm_rates(): + """ + Test of function to solve for immigration rates from population data + """ + S = 100 + imm_rates = demographics.get_imm_rates(S, 0, 99) + assert imm_rates.shape[1] == S + + +# Test functionality when passing in a custom series of fertility rates +def test_custom_series(): + """ + Test of the get pop objects function when passing in a custom series + """ + E = 20 + S = 80 + T = int(round(4.0 * S)) + start_year = 2019 + imm_rates = np.ones((2, E + S)) * 0.01 + pop_dict = demographics.get_pop_objs( + E, + S, + T, + 0, + 99, + initial_data_year=start_year, + final_data_year=start_year + 1, + GraphDiag=False, + imm_rates=imm_rates, + ) + assert np.allclose(pop_dict["imm_rates"][0, :], imm_rates[0, E:]) + + +def test_custom_series_all(): + """ + Test of the get pop objects function when passing in custom series + for fertility, mortality, immigration, and population + """ + E = 20 + S = 80 + T = int(round(4.0 * S)) + start_year = 2019 + fert_rates = demographics.get_fert( + E + S, + 0, + 99, + start_year=start_year, + end_year=start_year + 1, + graph=False, + ) + mort_rates, infmort_rates = demographics.get_mort( + E + S, + 0, + 99, + start_year=start_year, + end_year=start_year + 1, + graph=False, + ) + imm_rates = demographics.get_imm_rates( + E + S, + 0, + 99, + fert_rates=fert_rates, + mort_rates=mort_rates, + infmort_rates=infmort_rates, + start_year=start_year, + end_year=start_year + 1, + graph=False, + ) + pop_dist = np.zeros((3, E + S)) + for t in range(pop_dist.shape[0]): + df = demographics.get_un_data( + "47", start_year=start_year + t, end_year=start_year + t + ) + pop = df[(df.age < 100) & (df.age >= 0)].value.values + pop_dist[t, :] = demographics.pop_rebin(pop, E + S) + df = demographics.get_un_data( + "47", start_year=start_year - 1, end_year=start_year - 1 + ) + pop = df[(df.age < 100) & (df.age >= 0)].value.values + pre_pop_dist = demographics.pop_rebin(pop, E + S) + pop_dict = demographics.get_pop_objs( + E, + S, + T, + 0, + 99, + fert_rates=fert_rates, + mort_rates=mort_rates, + infmort_rates=infmort_rates, + imm_rates=imm_rates, + pop_dist=pop_dist, + pre_pop_dist=pre_pop_dist, + initial_data_year=start_year, + final_data_year=start_year + 1, + GraphDiag=False, + ) + assert pop_dict is not None + + +def test_custom_series_fail(): + """ + Test of the get pop objects function when passing in custom series + for fertility, mortality, immigration, and population + + This test gives a pop dist that doesn't result from the fert, mort, + and immigration rates. This should raise an error. + """ + with pytest.raises(Exception) as e_info: + E = 20 + S = 80 + T = int(round(4.0 * S)) + start_year = 2019 + fert_rates = demographics.get_fert( + E + S, + 0, + 99, + start_year=start_year, + end_year=start_year + 1, + graph=False, + ) + mort_rates, infmort_rates = demographics.get_mort( + E + S, + 0, + 99, + start_year=start_year, + end_year=start_year + 1, + graph=False, + ) + imm_rates = np.ones((2, E + S)) * 0.01 + pop_dist = np.zeros((2, E + S)) + for t in range(pop_dist.shape[0]): + df = demographics.get_un_data( + "47", start_year=start_year + t, end_year=start_year + t + ) + pop = df[(df.age < 100) & (df.age >= 0)].value.values + pop_dist[t, :] = demographics.pop_rebin(pop, E + S) + df = demographics.get_un_data( + "47", start_year=start_year - 1, end_year=start_year - 1 + ) + pop = df[(df.age < 100) & (df.age >= 0)].value.values + pre_pop_dist = demographics.pop_rebin(pop, E + S) + pop_dict = demographics.get_pop_objs( + E, + S, + T, + 0, + 99, + fert_rates=fert_rates, + mort_rates=mort_rates, + infmort_rates=infmort_rates, + imm_rates=imm_rates, + pop_dist=pop_dist, + pre_pop_dist=pre_pop_dist, + initial_data_year=start_year, + final_data_year=start_year + 1, + GraphDiag=False, + ) + + +# Test that SS solved for +def test_SS_dist(): + """ + Test of the that omega_SS is found by period T (so in SS for last + S periods of the T+S transition path) + """ + E = 20 + S = 80 + T = int(round(4.0 * S)) + start_year = 2019 + + pop_dict = demographics.get_pop_objs( + E, + S, + T, + 0, + 99, + initial_data_year=start_year - 1, + final_data_year=start_year, + GraphDiag=False, + ) + # Assert that S reached by period T + assert np.allclose(pop_dict["omega_SS"], pop_dict["omega"][-S, :]) + assert np.allclose(pop_dict["omega_SS"], pop_dict["omega"][-1, :]) diff --git a/tests/test_parameter_plots.py b/tests/test_parameter_plots.py index 3f9defbb8..993a06d0e 100644 --- a/tests/test_parameter_plots.py +++ b/tests/test_parameter_plots.py @@ -44,13 +44,23 @@ def test_plot_imm_rates(): - fig = parameter_plots.plot_imm_rates(base_params, include_title=True) + fig = parameter_plots.plot_imm_rates( + base_params.imm_rates, + base_params.start_year, + [base_params.start_year], + include_title=True, + ) assert fig def test_plot_imm_rates_save_fig(tmpdir): - parameter_plots.plot_imm_rates(base_params, path=tmpdir) - img = mpimg.imread(os.path.join(tmpdir, "imm_rates_orig.png")) + parameter_plots.plot_imm_rates( + base_params.imm_rates, + base_params.start_year, + [base_params.start_year], + path=tmpdir, + ) + img = mpimg.imread(os.path.join(tmpdir, "imm_rates.png")) assert isinstance(img, np.ndarray) @@ -173,10 +183,8 @@ def test_plot_fert_rates(): ) age_midp = np.array([9, 10, 12, 16, 18.5, 22, 27, 32, 37, 42, 47, 55, 56]) fert_func = si.interp1d(age_midp, fert_data, kind="cubic") - fert_rates = np.random.uniform(size=totpers) - fig = parameter_plots.plot_fert_rates( - fert_func, age_midp, totpers, min_yr, max_yr, fert_data, fert_rates - ) + fert_rates = np.random.uniform(size=totpers).reshape((1, totpers)) + fig = parameter_plots.plot_fert_rates(fert_rates) assert fig @@ -206,16 +214,10 @@ def test_plot_fert_rates_save_fig(tmpdir): ) age_midp = np.array([9, 10, 12, 16, 18.5, 22, 27, 32, 37, 42, 47, 55, 56]) fert_func = si.interp1d(age_midp, fert_data, kind="cubic") - fert_rates = np.random.uniform(size=totpers) + fert_rates = np.random.uniform(size=totpers).reshape((1, totpers)) parameter_plots.plot_fert_rates( - fert_func, - age_midp, - totpers, - min_yr, - max_yr, - fert_data, fert_rates, - output_dir=tmpdir, + path=tmpdir, ) img = mpimg.imread(os.path.join(tmpdir, "fert_rates.png")) @@ -224,42 +226,20 @@ def test_plot_fert_rates_save_fig(tmpdir): def test_plot_mort_rates_data(): totpers = base_params.S - 1 - min_yr = 21 - max_yr = 100 - age_year_all = np.arange(min_yr, max_yr) - mort_rates = base_params.rho[-1, 1:].squeeze() - mort_rates_all = base_params.rho[-1, 1:].squeeze() - infmort_rate = base_params.rho[0, 0] + mort_rates = base_params.rho[-1, 1:].reshape((1, totpers)) fig = parameter_plots.plot_mort_rates_data( - totpers, - min_yr, - max_yr, - age_year_all, - mort_rates_all, - infmort_rate, mort_rates, - output_dir=None, + path=None, ) assert fig def test_plot_mort_rates_data_save_fig(tmpdir): totpers = base_params.S - 1 - min_yr = 21 - max_yr = 100 - age_year_all = np.arange(min_yr, max_yr) - mort_rates = base_params.rho[-1, 1:].squeeze() - mort_rates_all = base_params.rho[-1, 1:].squeeze() - infmort_rate = base_params.rho[0, 0] + mort_rates = base_params.rho[-1, 1:].reshape((1, totpers)) parameter_plots.plot_mort_rates_data( - totpers, - min_yr, - max_yr, - age_year_all, - mort_rates_all, - infmort_rate, mort_rates, - output_dir=tmpdir, + path=tmpdir, ) img = mpimg.imread(os.path.join(tmpdir, "mort_rates.png")) @@ -285,7 +265,7 @@ def test_plot_omega_fixed_save_fig(tmpdir): omega_SS_orig = base_params.omega_SS omega_SSfx = base_params.omega_SS parameter_plots.plot_omega_fixed( - age_per_EpS, omega_SS_orig, omega_SSfx, E, S, output_dir=tmpdir + age_per_EpS, omega_SS_orig, omega_SSfx, E, S, path=tmpdir ) img = mpimg.imread(os.path.join(tmpdir, "OrigVsFixSSpop.png")) @@ -311,7 +291,7 @@ def test_plot_imm_fixed_save_fig(tmpdir): imm_rates_orig = base_params.imm_rates[0, :] imm_rates_adj = base_params.imm_rates[-1, :] parameter_plots.plot_imm_fixed( - age_per_EpS, imm_rates_orig, imm_rates_adj, E, S, output_dir=tmpdir + age_per_EpS, imm_rates_orig, imm_rates_adj, E, S, path=tmpdir ) img = mpimg.imread(os.path.join(tmpdir, "OrigVsAdjImm.png")) @@ -322,39 +302,37 @@ def test_plot_population_path(): S = base_params.S age_per_EpS = np.arange(21, S + 21) initial_pop_pct = base_params.omega[0, :] - omega_path_lev = base_params.omega.T + omega_path_lev = base_params.omega omega_SSfx = base_params.omega_SS data_year = base_params.start_year curr_year = base_params.start_year fig = parameter_plots.plot_population_path( age_per_EpS, - initial_pop_pct, omega_path_lev, omega_SSfx, data_year, curr_year, + curr_year + 5, S, ) assert fig def test_plot_population_path_save_fig(tmpdir): - E = 0 S = base_params.S age_per_EpS = np.arange(21, S + 21) - pop_2013_pct = base_params.omega[0, :] - omega_path_lev = base_params.omega.T + omega_path_lev = base_params.omega omega_SSfx = base_params.omega_SS curr_year = base_params.start_year parameter_plots.plot_population_path( age_per_EpS, - pop_2013_pct, omega_path_lev, omega_SSfx, curr_year, - E, + curr_year + 3, + curr_year + 50, S, - output_dir=tmpdir, + path=tmpdir, ) img = mpimg.imread(os.path.join(tmpdir, "PopDistPath.png")) @@ -385,7 +363,7 @@ def test_plot_income_data_save_fig(tmpdir): abil_pcts = np.array([0.25, 0.25, 0.2, 0.1, 0.1, 0.09, 0.01]) emat = p.e parameter_plots.plot_income_data( - ages, abil_midp, abil_pcts, emat, output_dir=tmpdir + ages, abil_midp, abil_pcts, emat, path=tmpdir ) img1 = mpimg.imread(os.path.join(tmpdir, "ability_3D_lev.png")) img2 = mpimg.imread(os.path.join(tmpdir, "ability_3D_log.png")) diff --git a/tests/test_run_example.py b/tests/test_run_example.py index caaa073da..6a7395e63 100644 --- a/tests/test_run_example.py +++ b/tests/test_run_example.py @@ -2,6 +2,7 @@ This test tests whether starting a `run_ogcore_example.py` run of the model does not break down (is still running) after 5 minutes or 300 seconds. """ + import multiprocessing import time import os