Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

data_refresh for US BEA #894

Draft
wants to merge 8 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion scripts/us_bea/states_gdp/import_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,14 @@ class StateGDPDataLoader:
'West Virginia', 'Wisconsin', 'Wyoming'
]
_ZIP_LINK = 'https://apps.bea.gov/regional/zip/SQGDP.zip'
_STATE_QUARTERLY_GDP_FILE = 'SQGDP1__ALL_AREAS_2005_2020.csv'
_STATE_QUARTERLY_GDP_FILE = 'SQGDP1__ALL_AREAS_2005_2023.csv'
_QUARTER_MONTH_MAP = {'Q1': '03', 'Q2': '06', 'Q3': '09', 'Q4': '12'}

def __init__(self):
"""Initializes instance, assigning member data frames to None."""
self.raw_df = None
self.clean_df = None
self.clean_df1 = None

def download_data(self, zip_link=None, file=None):
"""Downloads ZIP file, extracts the desired CSV, and puts it into a data
Expand Down
19 changes: 11 additions & 8 deletions scripts/us_bea/states_gdp/import_industry_data_and_gen_mcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class StateGDPIndustryDataLoader(import_data.StateGDPDataLoader):
Attributes:
df: DataFrame (DF) with the cleaned data.
"""
_STATE_QUARTERLY_INDUSTRY_GDP_FILE = 'SQGDP2__ALL_AREAS_2005_2020.csv'
_STATE_QUARTERLY_INDUSTRY_GDP_FILE = 'SQGDP2__ALL_AREAS_2005_2023.csv'

def download_data(self, zip_link=None, file=None):
"""Downloads ZIP file, extracts the desired CSV, and puts it into a data
Expand Down Expand Up @@ -91,14 +91,16 @@ def process_data(self, raw_data=None):
df['GeoId'] = df['GeoFIPS'].apply(self.convert_geoid)

df = df[df['IndustryClassification'] != '...']

df['NAICS'] = df['IndustryClassification'].apply(
self.convert_industry_class)
df['value'] = df['value'].apply(self.value_converter)
df = df[df['value'] >= 0]

# Convert from millions of current USD to current USD.
df['value'] *= 1000000
# Creating a DataFrame for the MCF generation.
self.clean_df1 = df.copy()
df['NAICS'] = df["NAICS"].str.replace('-', '_')
self.clean_df = df.drop(['GeoFIPS', 'IndustryClassification'], axis=1)

@staticmethod
Expand All @@ -120,7 +122,7 @@ def convert_industry_class(naics_code):
Commons codes.
"""
if isinstance(naics_code, str):
naics_code = naics_code.replace('-', '_').replace(',', '&')
naics_code = naics_code.replace(',', '_')
return f"dcs:USStateQuarterlyIndustryGDP_NAICS_{naics_code}"

def save_csv(self, filename='states_industry_gdp.csv'):
Expand All @@ -141,14 +143,15 @@ def generate_mcf(self):
'activitySource: dcs:GrossDomesticProduction\n'
'measuredProperty: dcs:amount\n'
'measurementQualifier: dcs:Nominal\n'
'statType: dcs:measuredValue\n'
'naics: dcid:NAICS/{naics}\n\n')

with open('states_gdp_industry_statvars.mcf', 'w') as mcf_f:
for naics_code in self.clean_df['NAICS'].unique():
code_title = naics_code[38:]
code = code_title.replace('_', '-')
code = code.replace('&', '&NAICS/')
mcf_f.write(mcf_temp.format(title=code_title, naics=code))
for naics_code in self.clean_df1['NAICS'].unique():
code_title = naics_code[38:].replace('-', '_')
naics_title = naics_code[38:]
mcf_f.write(mcf_temp.format(title=code_title,
naics=naics_title))


def main(_):
Expand Down
6,836 changes: 3,724 additions & 3,112 deletions scripts/us_bea/states_gdp/states_gdp.csv

Large diffs are not rendered by default.

29 changes: 25 additions & 4 deletions scripts/us_bea/states_gdp/states_gdp_industry_statvars.mcf
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ populationType: dcs:EconomicActivity
activitySource: dcs:GrossDomesticProduction
measuredProperty: dcs:amount
measurementQualifier: dcs:Nominal
statType: dcs:measuredValue
naics: dcid:NAICS/11

Node: dcid:USStateQuarterlyIndustryGDP_NAICS_21
Expand All @@ -12,6 +13,7 @@ populationType: dcs:EconomicActivity
activitySource: dcs:GrossDomesticProduction
measuredProperty: dcs:amount
measurementQualifier: dcs:Nominal
statType: dcs:measuredValue
naics: dcid:NAICS/21

Node: dcid:USStateQuarterlyIndustryGDP_NAICS_22
Expand All @@ -20,6 +22,7 @@ populationType: dcs:EconomicActivity
activitySource: dcs:GrossDomesticProduction
measuredProperty: dcs:amount
measurementQualifier: dcs:Nominal
statType: dcs:measuredValue
naics: dcid:NAICS/22

Node: dcid:USStateQuarterlyIndustryGDP_NAICS_23
Expand All @@ -28,6 +31,7 @@ populationType: dcs:EconomicActivity
activitySource: dcs:GrossDomesticProduction
measuredProperty: dcs:amount
measurementQualifier: dcs:Nominal
statType: dcs:measuredValue
naics: dcid:NAICS/23

Node: dcid:USStateQuarterlyIndustryGDP_NAICS_31_33
Expand All @@ -36,30 +40,34 @@ populationType: dcs:EconomicActivity
activitySource: dcs:GrossDomesticProduction
measuredProperty: dcs:amount
measurementQualifier: dcs:Nominal
statType: dcs:measuredValue
naics: dcid:NAICS/31-33

Node: dcid:USStateQuarterlyIndustryGDP_NAICS_321&327_339
Node: dcid:USStateQuarterlyIndustryGDP_NAICS_321_327_339
typeOf: dcs:StatisticalVariable
populationType: dcs:EconomicActivity
activitySource: dcs:GrossDomesticProduction
measuredProperty: dcs:amount
measurementQualifier: dcs:Nominal
naics: dcid:NAICS/321&NAICS/327-339
statType: dcs:measuredValue
naics: dcid:NAICS/321_327-339

Node: dcid:USStateQuarterlyIndustryGDP_NAICS_311_316&322_326
Node: dcid:USStateQuarterlyIndustryGDP_NAICS_311_316_322_326
typeOf: dcs:StatisticalVariable
populationType: dcs:EconomicActivity
activitySource: dcs:GrossDomesticProduction
measuredProperty: dcs:amount
measurementQualifier: dcs:Nominal
naics: dcid:NAICS/311-316&NAICS/322-326
statType: dcs:measuredValue
naics: dcid:NAICS/311-316_322-326

Node: dcid:USStateQuarterlyIndustryGDP_NAICS_42
typeOf: dcs:StatisticalVariable
populationType: dcs:EconomicActivity
activitySource: dcs:GrossDomesticProduction
measuredProperty: dcs:amount
measurementQualifier: dcs:Nominal
statType: dcs:measuredValue
naics: dcid:NAICS/42

Node: dcid:USStateQuarterlyIndustryGDP_NAICS_44_45
Expand All @@ -68,6 +76,7 @@ populationType: dcs:EconomicActivity
activitySource: dcs:GrossDomesticProduction
measuredProperty: dcs:amount
measurementQualifier: dcs:Nominal
statType: dcs:measuredValue
naics: dcid:NAICS/44-45

Node: dcid:USStateQuarterlyIndustryGDP_NAICS_48_49
Expand All @@ -76,6 +85,7 @@ populationType: dcs:EconomicActivity
activitySource: dcs:GrossDomesticProduction
measuredProperty: dcs:amount
measurementQualifier: dcs:Nominal
statType: dcs:measuredValue
naics: dcid:NAICS/48-49

Node: dcid:USStateQuarterlyIndustryGDP_NAICS_51
Expand All @@ -84,6 +94,7 @@ populationType: dcs:EconomicActivity
activitySource: dcs:GrossDomesticProduction
measuredProperty: dcs:amount
measurementQualifier: dcs:Nominal
statType: dcs:measuredValue
naics: dcid:NAICS/51

Node: dcid:USStateQuarterlyIndustryGDP_NAICS_52
Expand All @@ -92,6 +103,7 @@ populationType: dcs:EconomicActivity
activitySource: dcs:GrossDomesticProduction
measuredProperty: dcs:amount
measurementQualifier: dcs:Nominal
statType: dcs:measuredValue
naics: dcid:NAICS/52

Node: dcid:USStateQuarterlyIndustryGDP_NAICS_53
Expand All @@ -100,6 +112,7 @@ populationType: dcs:EconomicActivity
activitySource: dcs:GrossDomesticProduction
measuredProperty: dcs:amount
measurementQualifier: dcs:Nominal
statType: dcs:measuredValue
naics: dcid:NAICS/53

Node: dcid:USStateQuarterlyIndustryGDP_NAICS_54
Expand All @@ -108,6 +121,7 @@ populationType: dcs:EconomicActivity
activitySource: dcs:GrossDomesticProduction
measuredProperty: dcs:amount
measurementQualifier: dcs:Nominal
statType: dcs:measuredValue
naics: dcid:NAICS/54

Node: dcid:USStateQuarterlyIndustryGDP_NAICS_55
Expand All @@ -116,6 +130,7 @@ populationType: dcs:EconomicActivity
activitySource: dcs:GrossDomesticProduction
measuredProperty: dcs:amount
measurementQualifier: dcs:Nominal
statType: dcs:measuredValue
naics: dcid:NAICS/55

Node: dcid:USStateQuarterlyIndustryGDP_NAICS_56
Expand All @@ -124,6 +139,7 @@ populationType: dcs:EconomicActivity
activitySource: dcs:GrossDomesticProduction
measuredProperty: dcs:amount
measurementQualifier: dcs:Nominal
statType: dcs:measuredValue
naics: dcid:NAICS/56

Node: dcid:USStateQuarterlyIndustryGDP_NAICS_61
Expand All @@ -132,6 +148,7 @@ populationType: dcs:EconomicActivity
activitySource: dcs:GrossDomesticProduction
measuredProperty: dcs:amount
measurementQualifier: dcs:Nominal
statType: dcs:measuredValue
naics: dcid:NAICS/61

Node: dcid:USStateQuarterlyIndustryGDP_NAICS_62
Expand All @@ -140,6 +157,7 @@ populationType: dcs:EconomicActivity
activitySource: dcs:GrossDomesticProduction
measuredProperty: dcs:amount
measurementQualifier: dcs:Nominal
statType: dcs:measuredValue
naics: dcid:NAICS/62

Node: dcid:USStateQuarterlyIndustryGDP_NAICS_71
Expand All @@ -148,6 +166,7 @@ populationType: dcs:EconomicActivity
activitySource: dcs:GrossDomesticProduction
measuredProperty: dcs:amount
measurementQualifier: dcs:Nominal
statType: dcs:measuredValue
naics: dcid:NAICS/71

Node: dcid:USStateQuarterlyIndustryGDP_NAICS_72
Expand All @@ -156,6 +175,7 @@ populationType: dcs:EconomicActivity
activitySource: dcs:GrossDomesticProduction
measuredProperty: dcs:amount
measurementQualifier: dcs:Nominal
statType: dcs:measuredValue
naics: dcid:NAICS/72

Node: dcid:USStateQuarterlyIndustryGDP_NAICS_81
Expand All @@ -164,5 +184,6 @@ populationType: dcs:EconomicActivity
activitySource: dcs:GrossDomesticProduction
measuredProperty: dcs:amount
measurementQualifier: dcs:Nominal
statType: dcs:measuredValue
naics: dcid:NAICS/81

1 change: 1 addition & 0 deletions scripts/us_bea/states_gdp/states_gdp_statvars.mcf
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ populationType: dcs:EconomicActivity
activitySource: dcs:GrossDomesticProduction
measuredProperty: dcs:amount
measurementQualifier: dcs:InflationAdjusted
statType: dcs:measuredValue
Loading
Loading