Skip to content

Commit

Permalink
Merge pull request PSLmodels#93 from jdebacker/s3
Browse files Browse the repository at this point in the history
Update S3 bucket info
  • Loading branch information
jdebacker authored Mar 1, 2024
2 parents 8c54c24 + c916947 commit a10faf6
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 49 deletions.
7 changes: 6 additions & 1 deletion cs-config/cs_config/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@

AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID", "")
AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY", "")
PUF_S3_FILE_LOCATION = os.environ.get(
"PUF_S3_LOCATION", "s3://ospc-data-files/puf.20210720.csv.gz"
)
CUR_DIR = os.path.dirname(os.path.realpath(__file__))

# Get Tax-Calculator default parameters
Expand Down Expand Up @@ -172,7 +175,9 @@ def run_model(meta_param_dict, adjustment):
meta_params = MetaParams()
meta_params.adjust(meta_param_dict)
if meta_params.data_source == "PUF":
data = retrieve_puf(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
data = retrieve_puf(
PUF_S3_FILE_LOCATION, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY
)
# set name of cached baseline file in case use below
cached_pickle = "TxFuncEst_baseline_PUF.pkl"
else:
Expand Down
50 changes: 40 additions & 10 deletions cs-config/cs_config/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,23 @@
except ImportError:
boto3 = None
import gzip
import os
import pandas as pd
from taxcalc import Policy
from collections import defaultdict
from pathlib import Path
import warnings

try:
from s3fs import S3FileSystem
except ImportError as ie:
S3FileSystem = None

AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID", None)
AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY", None)
PUF_S3_FILE_LOCATION = os.environ.get(
"PUF_S3_LOCATION", "s3://ospc-data-files/puf.20210720.csv.gz"
)
TC_LAST_YEAR = Policy.LAST_BUDGET_YEAR

POLICY_SCHEMA = {
Expand Down Expand Up @@ -73,20 +86,37 @@
}


def retrieve_puf(aws_access_key_id, aws_secret_access_key):
def retrieve_puf(
puf_s3_file_location=PUF_S3_FILE_LOCATION,
aws_access_key_id=AWS_ACCESS_KEY_ID,
aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
):
"""
Function for retrieving the PUF from the OSPC S3 bucket
"""
has_credentials = aws_access_key_id and aws_secret_access_key
if has_credentials and boto3 is not None:
client = boto3.client(
"s3",
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
s3_reader_installed = S3FileSystem is not None
has_credentials = (
aws_access_key_id is not None and aws_secret_access_key is not None
)
if puf_s3_file_location and has_credentials and s3_reader_installed:
print("Reading puf from S3 bucket.", puf_s3_file_location)
fs = S3FileSystem(
key=AWS_ACCESS_KEY_ID,
secret=AWS_SECRET_ACCESS_KEY,
)
obj = client.get_object(Bucket="ospc-data-files", Key="puf.csv.gz")
gz = gzip.GzipFile(fileobj=obj["Body"])
puf_df = pd.read_csv(gz)
with fs.open(PUF_S3_FILE_LOCATION) as f:
# Skips over header from top of file.
puf_df = pd.read_csv(f, compression="gzip")
return puf_df
elif Path("puf.csv.gz").exists():
print("Reading puf from puf.csv.gz.")
return pd.read_csv("puf.csv.gz", compression="gzip")
elif Path("puf.csv").exists():
print("Reading puf from puf.csv.")
return pd.read_csv("puf.csv")
else:
warnings.warn(
f"PUF file not available (has_credentials={has_credentials}, "
f"s3_reader_installed={s3_reader_installed})"
)
return None
12 changes: 3 additions & 9 deletions cs-config/install.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
# bash commands for installing your package

git clone -b master --depth 1 https://github.com/PSLmodels/OG-USA
cd OG-USA

# Explicitly add channels for looking up dependencies outside of
# taxcalc and paramtools. If the channels are not specified like this,
# the tests fail due to not being able to converge on a solution.
conda config --add channels PSLmodels
conda config --add channels conda-forge
conda install scipy mkl dask matplotlib PSLmodels::taxcalc conda-forge::paramtools
# install packages needed by CS, but not in ogusa-dev env
pip install s3fs
# install OG-USA from source
pip install -e .
70 changes: 41 additions & 29 deletions ogusa/calibrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def __init__(
estimate_tax_functions=False,
estimate_beta=False,
estimate_chi_n=False,
estimate_pop=False,
tax_func_path=None,
iit_reform={},
guid="",
Expand All @@ -27,6 +28,7 @@ def __init__(
self.estimate_tax_functions = estimate_tax_functions
self.estimate_beta = estimate_beta
self.estimate_chi_n = estimate_chi_n
self.estimate_pop = estimate_pop
if estimate_tax_functions:
if tax_func_path is not None:
run_micro = False
Expand All @@ -42,7 +44,7 @@ def __init__(
run_micro=run_micro,
tax_func_path=tax_func_path,
)
if estimate_beta:
if self.estimate_beta:
self.beta_j = estimate_beta_j.beta_estimate(self)
# if estimate_chi_n:
# chi_n = self.get_chi_n()
Expand All @@ -57,35 +59,44 @@ def __init__(
self.zeta = bequest_transmission.get_bequest_matrix(p.J, p.lambdas)

# demographics
self.demographic_params = demographics.get_pop_objs(
p.E,
p.S,
p.T,
0,
99,
initial_data_year=p.start_year - 1,
final_data_year=p.start_year,
)
if estimate_pop:
self.demographic_params = demographics.get_pop_objs(
p.E,
p.S,
p.T,
0,
99,
initial_data_year=p.start_year - 1,
final_data_year=p.start_year,
)

# demographics for 80 period lives (needed for getting e below)
demog80 = demographics.get_pop_objs(
20,
80,
p.T,
0,
99,
initial_data_year=p.start_year - 1,
final_data_year=p.start_year,
)
# demographics for 80 period lives (needed for getting e below)
demog80 = demographics.get_pop_objs(
20,
80,
p.T,
0,
99,
initial_data_year=p.start_year - 1,
final_data_year=p.start_year,
)

# earnings profiles
self.e = income.get_e_interp(
p.S,
self.demographic_params["omega_SS"],
demog80["omega_SS"],
p.lambdas,
plot=False,
)
# earnings profiles
self.e = income.get_e_interp(
p.S,
self.demographic_params["omega_SS"],
demog80["omega_SS"],
p.lambdas,
plot=False,
)
else:
self.e = income.get_e_interp(
p.S,
p.omega_SS,
p.omega_SS,
p.lambdas,
plot=False,
)

# Tax Functions
def get_tax_function_parameters(
Expand Down Expand Up @@ -334,6 +345,7 @@ def get_dict(self):
dict["zeta"] = self.zeta
dict.update(self.macro_params)
dict["e"] = self.e
dict.update(self.demographic_params)
if self.estimate_pop:
dict.update(self.demographic_params)

return dict

0 comments on commit a10faf6

Please sign in to comment.