Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding additional outputs wrapper #126

Merged
merged 3 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions mbs_results/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from mbs_results.estimation.estimate import estimate
from mbs_results.imputation.impute import impute
from mbs_results.outlier_detection.detect_outlier import detect_outlier
from mbs_results.outputs.produce_outputs import produce_outputs
from mbs_results.outputs.produce_additional_outputs import produce_additional_outputs
from mbs_results.staging.stage_dataframe import stage_dataframe
from mbs_results.utilities.inputs import load_config
from mbs_results.utilities.validation_checks import (
Expand Down Expand Up @@ -32,7 +32,7 @@ def run_mbs_main():
outlier_output = detect_outlier(estimation_output, config)
validate_outlier_detection(outlier_output, config)

produce_outputs(outlier_output, "output_path/")
produce_additional_outputs(config)


if __name__ == "__main__":
Expand Down
38 changes: 26 additions & 12 deletions mbs_results/outputs/get_additional_outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,25 +27,37 @@ def get_additional_outputs(config: dict, function_mapper: dict) -> None:

Returns
-------
None
dict
Dictionary of additional outputs, with the keys being the names
of the outputs and the values being the outputs to be exported.

Examples
--------
>> example_function = print("Hello world)
>> config = {additional_outputs:["output_name"]}
>> function_mapper = {output_name : example_function}
>> get_additional_outputs(config,function_mapper)
>> example_function = print("Hello world")
>> config = {"additional_outputs" : ["output_name"]}
>> function_mapper = {"output_name" : example_function}
>> get_additional_outputs(config, function_mapper)
>>
>>
>> example_function = function(argA, argB)
>> config = {"additional_outputs" : ["example_output"],
>> "argA": "valueA",
>> "argB": "valueB"}
>> function_mapper = {"example_output" : example_function}
>> get_additional_outputs(config, function_mapper)

"""

additional_outputs = dict()

if not isinstance(config["additional_outputs"], list):

raise ValueError(
"""
In config file additional_outputs must be a list, please use:\n
["all"] to get all outputs\n
[] to get no outputs\n
or a list with the outputs, e.g. ["output_1","output_2"]
In config file additional_outputs must be a list, please use:\n
["all"] to get all outputs\n
[] to get no outputs\n
or a list with the outputs, e.g. ["output_1","output_2"]
"""
)

Expand All @@ -64,12 +76,14 @@ def get_additional_outputs(config: dict, function_mapper: dict) -> None:

if function in function_mapper:

function_mapper[function](**config)
additional_outputs[function] = function_mapper[function](**config)

else:
raise ValueError(
f"""
The function {function} is not registerd, check spelling.\n
Currently the registered functions are:\n {function_mapper}
The function {function} is not registered, check spelling.\n
Currently the registered functions are:\n {function_mapper}
"""
)

return additional_outputs
50 changes: 50 additions & 0 deletions mbs_results/outputs/produce_additional_outputs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from importlib import metadata

from mbs_results.outputs.get_additional_outputs import get_additional_outputs
from mbs_results.outputs.selective_editing_contributer_output import (
get_selective_editing_contributer_output,
)
from mbs_results.outputs.selective_editing_question_output import (
create_selective_editing_question_output,
)
from mbs_results.outputs.turnover_analysis import create_turnover_output
from mbs_results.outputs.weighted_adj_val_time_series import (
get_weighted_adj_val_time_series,
)


def produce_additional_outputs(config: dict):
"""
Function to write additional outputs

Parameters
----------
config : Dict
main pipeline configuration

Returns
-------
None.
Outputs are written to output path defined in config

"""

additional_outputs = get_additional_outputs(
config,
{
"selective_editing_contributor": get_selective_editing_contributer_output,
"selective_editing_question": create_selective_editing_question_output,
"turnover_output": create_turnover_output,
"weighted_adj_val_time_series": get_weighted_adj_val_time_series,
},
)

# Stop function if no additional_outputs are listed in config.
if additional_outputs is None:
return

file_version_mbs = metadata.metadata("monthly-business-survey-results")["version"]
snapshot_name = config["mbs_file_name"].split(".")[0]
for output in additional_outputs:
filename = f"{output}_v{file_version_mbs}_{snapshot_name}.csv"
additional_outputs[output].to_csv(config["output_path"] + filename)
7 changes: 0 additions & 7 deletions mbs_results/outputs/produce_outputs.py

This file was deleted.

7 changes: 6 additions & 1 deletion mbs_results/outputs/selective_editing_contributer_output.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pandas as pd

from mbs_results.merge_domain import merge_domain
from mbs_results.staging.merge_domain import merge_domain


def get_selective_editing_contributer_output(
Expand All @@ -10,6 +10,7 @@ def get_selective_editing_contributer_output(
sic_input: str,
sic_mapping: str,
period_selected: int,
**config
) -> pd.DataFrame:
"""
Returns a dataframe containing period, reference, domain_group, and
Expand All @@ -29,6 +30,10 @@ def get_selective_editing_contributer_output(
Name of column in input_filepath csv file containing SIC variable.
sic_mapping : str
Name of column in domain_filepath csv file containing SIC variable.
period_selected : int
period to include in outputs
**config: Dict
main pipeline configuration. Can be used to input the entire config dictionary

Returns
-------
Expand Down
9 changes: 6 additions & 3 deletions mbs_results/outputs/selective_editing_question_output.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pandas as pd

from mbs_results.merge_domain import merge_domain
from mbs_results.unsorted.selective_editing import create_standardising_factor
from mbs_results.outputs.selective_editing import create_standardising_factor
from mbs_results.staging.merge_domain import merge_domain


def create_selective_editing_question_output(
Expand All @@ -18,6 +18,7 @@ def create_selective_editing_question_output(
adjusted_value: str,
sic_domain_mapping_path: str,
period_selected: int,
**config,
) -> pd.DataFrame:
"""
creates the selective editing question output.
Expand Down Expand Up @@ -48,11 +49,13 @@ def create_selective_editing_question_output(
adjusted_value : str
name of column in dataframe containing adjusted_value variable combined
with imputed_values as outputted from Ratio of Means script
sic_domain_mapping_path : str
sic_domain_mapping_path : str
path to the sic domain mapping file
period_selected : int
previous period to take the weights for estimation of standardising factor in
the format yyyymm
**config: Dict
main pipeline configuration. Can be used to input the entire config dictionary

Returns
-------
Expand Down
3 changes: 3 additions & 0 deletions mbs_results/outputs/turnover_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ def create_turnover_output(
winsorisation_df: pd.DataFrame,
winsorisation_period: str,
selected_period: int,
**config
) -> pd.DataFrame:
"""
Creating output for turnover analysis tool.
Expand All @@ -30,6 +31,8 @@ def create_turnover_output(
Name of column displaying period in winsorisation
selected_period : int
Period to output results for in the format YYYYMM
**config: Dict
main pipeline configuration. Can be used to input the entire config dictionary

Returns
-------
Expand Down
4 changes: 3 additions & 1 deletion mbs_results/outputs/weighted_adj_val_time_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from utilities.utils import convert_column_to_datetime


def get_weighted_adj_val_time_series(filepath: str) -> pd.DataFrame:
def get_weighted_adj_val_time_series(filepath: str, **config) -> pd.DataFrame:
"""
Time series of weighted adjusted values by classification, question number,
and cell number.
Expand All @@ -13,6 +13,8 @@ def get_weighted_adj_val_time_series(filepath: str) -> pd.DataFrame:
filepath : str
filepath to csv containing classification, question number, cell number,
period, and weighted adjusted value.
**config: Dict
main pipeline configuration. Can be used to input the entire config dictionary

Returns
-------
Expand Down
Loading