diff --git a/policyengine_taxsim/__init__.py b/policyengine_taxsim/__init__.py index 833f8ab..6f5e929 100644 --- a/policyengine_taxsim/__init__.py +++ b/policyengine_taxsim/__init__.py @@ -1,7 +1,7 @@ -from .core.input_mapper import import_single_household -from .core.output_mapper import export_single_household +from .core.input_mapper import generate_household +from .core.output_mapper import export_household from .cli import main as cli -__all__ = ["import_single_household", "export_single_household", "cli"] +__all__ = ["generate_household", "export_household", "cli"] __version__ = "0.1.0" # Make sure this matches the version in pyproject.toml diff --git a/policyengine_taxsim/cli.py b/policyengine_taxsim/cli.py index 11b40f3..2d2c271 100644 --- a/policyengine_taxsim/cli.py +++ b/policyengine_taxsim/cli.py @@ -2,11 +2,11 @@ import pandas as pd from pathlib import Path try: - from .core.input_mapper import import_single_household - from .core.output_mapper import export_single_household + from .core.input_mapper import generate_household + from .core.output_mapper import export_household except ImportError: - from policyengine_taxsim.core.input_mapper import import_single_household - from policyengine_taxsim.core.output_mapper import export_single_household + from policyengine_taxsim.core.input_mapper import generate_household + from policyengine_taxsim.core.output_mapper import export_household @click.command() @click.argument("input_file", type=click.Path(exists=True)) @@ -29,8 +29,8 @@ def main(input_file, output): results = [] for _, row in df.iterrows(): taxsim_input = row.to_dict() - pe_situation = import_single_household(taxsim_input) - taxsim_output = export_single_household(taxsim_input, pe_situation) + pe_situation = generate_household(taxsim_input) + taxsim_output = export_household(taxsim_input, pe_situation) results.append(taxsim_output) # Create output dataframe and save to csv diff --git a/policyengine_taxsim/config/variable_mappings.yaml b/policyengine_taxsim/config/variable_mappings.yaml index 6a4f523..2ba941d 100644 --- a/policyengine_taxsim/config/variable_mappings.yaml +++ b/policyengine_taxsim/config/variable_mappings.yaml @@ -28,12 +28,16 @@ policyengine_to_taxsim: - full: 2 - full_text: 5 fica: - variable: employee_social_security_tax + variable: use_placeholder_for_now_until_its_implemented_in_PE implemented: true idtl: - standard : 0 - full: 2 - full_text: 5 + variables: + - employee_social_security_tax + - employee_medicare_tax + - additional_medicare_tax frate: variable: not_implemented_in_pe implemented: false @@ -63,7 +67,7 @@ policyengine_to_taxsim: - full: 2 - full_text: 5 v10: - variable: state_agi + variable: adjusted_gross_income implemented: true idtl: - full: 2 @@ -105,7 +109,7 @@ policyengine_to_taxsim: - full: 2 - full_text: 5 v17: - variable: taxable_income_deductions + variable: taxable_income_deductions_if_itemizing implemented: true idtl: - full: 2 @@ -280,6 +284,23 @@ policyengine_to_taxsim: - full_text: 5 taxsim_to_policyengine: - # This section would be the inverse of the above mapping - # It's left empty for brevity, but you should populate it - # with the inverse relationships for bidirectional conversion \ No newline at end of file + household_situation: + families: + your family: + members: [] + households: + your household: + members: [] + state_name: {} + marital_units: + your marital unit: + members: [] + people: {} + spm_units: + your household: + members: [] + tax_units: + your tax unit: + members: [] + + diff --git a/policyengine_taxsim/core/input_mapper.py b/policyengine_taxsim/core/input_mapper.py index 044f967..ebf4d16 100644 --- a/policyengine_taxsim/core/input_mapper.py +++ b/policyengine_taxsim/core/input_mapper.py @@ -1,10 +1,95 @@ from .utils import ( load_variable_mappings, - get_state_code, + get_state_code, get_ordinal, ) +import copy -def import_single_household(taxsim_vars): +def add_additional_tax_units(state, year, situation): + has_use_tax = ['pa', 'nc', 'ca', 'il', 'in', 'ok'] + if state in has_use_tax: + situation["tax_units"]["your tax unit"][f"{state}_use_tax"] = {str(year): 0.0} + return situation + + +def form_household_situation(year, state, taxsim_vars): + mappings = load_variable_mappings()["taxsim_to_policyengine"] + + household_situation = copy.deepcopy(mappings["household_situation"]) + + depx = taxsim_vars["depx"] + mstat = taxsim_vars["mstat"] + + if mstat == 2: # Married filing jointly + members = ["you", "your partner"] + else: # Single, separate, or dependent taxpayer + members = ["you"] + + for i in range(1, depx + 1): + members.append(f"your {get_ordinal(i)} dependent") + + household_situation["families"]["your family"]["members"] = members + household_situation["households"]["your household"]["members"] = members + household_situation["tax_units"]["your tax unit"]["members"] = members + + household_situation = add_additional_tax_units(state.lower(), year, household_situation) + + household_situation["spm_units"]["your household"]["members"] = members + + if depx > 0: + household_situation["marital_units"] = { + "your marital unit": { + "members": ["you", "your partner"] if mstat == 2 else ["you"] + } + } + for i in range(1, depx + 1): + dep_name = f"your {get_ordinal(i)} dependent" + household_situation["marital_units"][f"{dep_name}'s marital unit"] = { + "members": [dep_name], + "marital_unit_id": {str(year): i} + } + else: + household_situation["marital_units"]["your marital unit"]["members"] = ( + ["you", "your partner"] if mstat == 2 else ["you"] + ) + + household_situation["households"]["your household"]["state_name"][str(year)] = state + + people = household_situation["people"] + + people["you"] = { + "age": {str(year): int(taxsim_vars.get("page", 40))}, + "employment_income": {str(year): float(taxsim_vars.get("pwages", 0))} + } + + if mstat == 2: + people["your partner"] = { + "age": {str(year): int(taxsim_vars.get("sage", 40))}, + "employment_income": {str(year): float(taxsim_vars.get("swages", 0))} + } + + for i in range(1, depx + 1): + dep_name = f"your {get_ordinal(i)} dependent" + people[dep_name] = { + "age": {str(year): int(taxsim_vars.get(f"age{i}", 10))}, + "employment_income": {str(year): 0} + } + + return household_situation + + +def check_if_exists_or_set_defaults(taxsim_vars): + taxsim_vars["state"] = int(taxsim_vars.get("state", + 44) or 44) # set TX (texas) as default is no state field has passed or passed as 0 + + taxsim_vars["depx"] = int(taxsim_vars.get("depx", 0) or 0) + + taxsim_vars["mstat"] = int(taxsim_vars.get("mstat", 1) or 1) + + return taxsim_vars + + +def generate_household(taxsim_vars): """ Convert TAXSIM input variables to a PolicyEngine situation. @@ -14,24 +99,13 @@ def import_single_household(taxsim_vars): Returns: dict: PolicyEngine situation dictionary """ - mappings = load_variable_mappings()["taxsim_to_policyengine"] year = str(int(taxsim_vars["year"])) # Ensure year is an integer string - taxsim_vars["state"] = taxsim_vars.get("state", 44) or 44 #set TX texas as default is no state has passed or passed as 0 + taxsim_vars = check_if_exists_or_set_defaults(taxsim_vars) + state = get_state_code(taxsim_vars["state"]) - situation = { - "people": { - "you": { - "age": {year: int(taxsim_vars.get("page", 40))}, - "employment_income": {year: int(taxsim_vars.get("pwages", 0))}, - } - }, - "households": { - "your household": {"members": ["you"], "state_name": {year: state}} - }, - "tax_units": {"your tax unit": {"members": ["you"]}}, - } + situation = form_household_situation(year, state, taxsim_vars) return situation diff --git a/policyengine_taxsim/core/output_mapper.py b/policyengine_taxsim/core/output_mapper.py index cc6099f..5a75816 100644 --- a/policyengine_taxsim/core/output_mapper.py +++ b/policyengine_taxsim/core/output_mapper.py @@ -5,7 +5,7 @@ from policyengine_us import Simulation -def export_single_household(taxsim_input, policyengine_situation): +def export_household(taxsim_input, policyengine_situation): """ Convert a PolicyEngine situation to TAXSIM output variables. @@ -16,7 +16,7 @@ def export_single_household(taxsim_input, policyengine_situation): dict: Dictionary of TAXSIM output variables """ mappings = load_variable_mappings()["policyengine_to_taxsim"] - + print(policyengine_situation) simulation = Simulation(situation=policyengine_situation) year = list( @@ -39,6 +39,9 @@ def export_single_household(taxsim_input, policyengine_situation): taxsim_output[key] = int(year) elif key == "state": taxsim_output[key] = get_state_number(state_name) + elif key == "fica": + pe_variables = value['variables'] + taxsim_output[key] = simulate_multiple(simulation, pe_variables, year) else: pe_variable = value['variable'] @@ -57,3 +60,11 @@ def simulate(simulation, variable, year): return to_roundedup_number(simulation.calculate(variable, period=year)) except: return 0.00 + + +def simulate_multiple(simulation, variables, year): + try: + total = sum(to_roundedup_number(simulation.calculate(variable, period=year)) for variable in variables) + except: + total = 0.00 + return to_roundedup_number(total) diff --git a/policyengine_taxsim/core/utils.py b/policyengine_taxsim/core/utils.py index 693317e..7a69a0f 100644 --- a/policyengine_taxsim/core/utils.py +++ b/policyengine_taxsim/core/utils.py @@ -96,3 +96,19 @@ def to_roundedup_number(value): return round(value[0], 2) else: return round(value, 2) + + +def get_ordinal(n): + ordinals = { + 1: "first", + 2: "second", + 3: "third", + 4: "fourth", + 5: "fifth", + 6: "sixth", + 7: "seventh", + 8: "eighth", + 9: "ninth", + 10: "tenth" + } + return ordinals.get(n, f"{n}th") diff --git a/policyengine_taxsim/exe.py b/policyengine_taxsim/exe.py new file mode 100644 index 0000000..83935e5 --- /dev/null +++ b/policyengine_taxsim/exe.py @@ -0,0 +1,65 @@ +import click +import pandas as pd +from pathlib import Path +import sys +import os + + +# Delay imports until runtime +def get_mappers(): + from policyengine_taxsim.core.input_mapper import generate_household + from policyengine_taxsim.core.output_mapper import export_household + return generate_household, export_household + + +def get_yaml_path(): + """Get the path to YAML whether running as script or frozen executable""" + if getattr(sys, 'frozen', False): + # Running in a bundle + return os.path.join(sys._MEIPASS, "config", "variable_mappings.yaml") + else: + # Running in normal Python + return os.path.join(Path(__file__).parent, "config", "variable_mappings.yaml") + + +@click.command() +@click.argument("input_file", type=click.Path(exists=True)) +@click.option( + "--output", + "-o", + type=click.Path(), + default="output.csv", + help="Output file path", +) +def main(input_file, output): + """ + Process TAXSIM input file and generate PolicyEngine-compatible output. + """ + try: + # Get mapper functions at runtime + import_single_household, export_single_household = get_mappers() + + # Read input file + df = pd.read_csv(input_file) + + # Process each row + results = [] + for _, row in df.iterrows(): + taxsim_input = row.to_dict() + pe_situation = import_single_household(taxsim_input) + taxsim_output = export_single_household(taxsim_input, pe_situation) + results.append(taxsim_output) + + # Create output dataframe and save to csv + output_df = pd.DataFrame(results) + output_path = Path(output) + output_path.parent.mkdir(parents=True, exist_ok=True) + output_df.to_csv(output_path, index=False) + click.echo(f"Output saved to {output}") + except Exception as e: + click.echo(f"Error processing input: {str(e)}", err=True) + raise + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/policyengine_taxsim/policyengine_taxsim.spec b/policyengine_taxsim/policyengine_taxsim.spec new file mode 100644 index 0000000..783356c --- /dev/null +++ b/policyengine_taxsim/policyengine_taxsim.spec @@ -0,0 +1,63 @@ +import os +from pathlib import Path +import policyengine_us +import policyengine_taxsim + +# Get required paths +pe_data_path = Path(policyengine_us.__file__).parent / "" +yaml_path = Path(policyengine_taxsim.__file__).parent / "config" / "variable_mappings.yaml" + +analysis = Analysis( + ['exe.py'], + pathex=[], + binaries=[], + datas=[ + (str(pe_data_path), 'policyengine_us/'), + (str(yaml_path), 'policyengine_taxsim/config'), + (str(Path.cwd() / "core"), 'core'), + ], + hiddenimports=[ + 'click', + 'pandas', + 'numpy', + 'policyengine_us', + 'yaml', + 'core', + 'core.input_mapper', + 'core.output_mapper', + ], + hookspath=[], + runtime_hooks=[], + excludes=[ + '__pycache__', + '*.pyc', + '*.pyo', + '*.pyd', + '*__pycache__*', + ], + noarchive=False, +) + +pyz = PYZ(analysis.pure, analysis.zipped_data) + +exe = EXE( + pyz, + analysis.scripts, + analysis.binaries, + analysis.zipfiles, + analysis.datas, + [], + name='policyengine-taxsim', + debug=False, + bootloader_ignore_signals=False, + strip=False, + upx=True, + upx_exclude=[], + runtime_tmpdir=None, + console=True, + disable_windowed_traceback=False, + argv_emulation=False, + target_arch=None, + codesign_identity=None, + entitlements_file=None, +) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index f2f41a9..22a9673 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ dependencies = [ "PyYAML", "click", "matplotlib", + "numpy" ] [project.urls] @@ -39,8 +40,15 @@ policyengine-taxsim = "policyengine_taxsim.cli:main" [tool.hatch.build.targets.wheel] packages = ["policyengine_taxsim"] +include = [ + "policyengine_taxsim/**/*.py", + "policyengine_taxsim/config/*.yaml", +] + [tool.hatch.build.targets.wheel.shared-data] "resources/taxsim35" = "share/policyengine_taxsim/taxsim35" +"policyengine_taxsim/config" = "policyengine_taxsim/config" + [tool.hatch.envs.default] dependencies = [ @@ -72,22 +80,4 @@ minversion = "6.0" addopts = "-ra -q" testpaths = [ "tests", -] - -[tool.nuitka] -follow-imports = true -standalone = true -onefile = true -# Remove or set to false since we need console -# windows-disable-console = true -include-data-dir = ["resources/taxsim35=resources/taxsim35"] -include-package = [ - "policyengine_taxsim", - "click", - "pandas", - "numpy" -] -plugin-enable = [ - "numpy", - "pandas" ] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 8be0c6c..0000000 --- a/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -policyengine-us==1.45.1 -click==8.1.3 -matplotlib==3.9.0 -pandas==2.2.2 -numpy==1.26.4 -pathlib==1.0.1 \ No newline at end of file diff --git a/resources/taxsim35/taxsim_input_joint_household.csv b/resources/taxsim35/taxsim_input_joint_household.csv new file mode 100644 index 0000000..26baeca --- /dev/null +++ b/resources/taxsim35/taxsim_input_joint_household.csv @@ -0,0 +1,3 @@ +year,state,depx,page,sage,pwages,swages,mstat,mortgage,taxsimid,idtl +2023,44,2,35,35,49000,30000,2,0,999,2 +2023,44,2,35,35,47000,30000,2,0,11,2 \ No newline at end of file diff --git a/resources/taxsim35/taxsim_input.csv b/resources/taxsim35/taxsim_input_single_household.csv similarity index 100% rename from resources/taxsim35/taxsim_input.csv rename to resources/taxsim35/taxsim_input_single_household.csv diff --git a/resources/taxsim35/taxsim_input_with_1_dep.csv b/resources/taxsim35/taxsim_input_with_1_dep.csv new file mode 100644 index 0000000..a26ff00 --- /dev/null +++ b/resources/taxsim35/taxsim_input_with_1_dep.csv @@ -0,0 +1,2 @@ +year,state,age1,depx,pwages,mstat,mortgage,taxsimid,idtl +2023,39,4,1,81000.001,2,0,6,2 diff --git a/resources/taxsim35/taxsim_input_with_1_dep_single_parent.csv b/resources/taxsim35/taxsim_input_with_1_dep_single_parent.csv new file mode 100644 index 0000000..65ef0ba --- /dev/null +++ b/resources/taxsim35/taxsim_input_with_1_dep_single_parent.csv @@ -0,0 +1,2 @@ +year,state,age1,depx,pwages,mstat,mortgage,taxsimid,idtl +2023,39,4,1,81000.001,1,0,6,2 diff --git a/tests/test_e2e.py b/tests/test_e2e.py index 4785af3..251b5d1 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -15,6 +15,23 @@ def setUp(self) -> None: import policyengine_taxsim from importlib.metadata import distribution + self.SINGLE_HOUSEHOLD_INPUT = "taxsim_input_single_household.csv" + self.JOINT_HOUSEHOLD_INPUT = "taxsim_input_joint_household.csv" + self.HOUSEHOLD_WITH_DEPENDENT_INPUT = "taxsim_input_with_1_dep.csv" + self.HOUSEHOLD_WITH_DEPENDENT_SINGLE_PARENT_INPUT = "taxsim_input_with_1_dep_single_parent.csv" + + self.SINGLE_HOUSEHOLD_PE_TAXSIM_OUTPUT = "policyengine_taxsim_single_household_output.csv" + self.SINGLE_HOUSEHOLD_TAXSIM35_OUTPUT = "taxsim35_single_household_output.csv" + + self.JOINT_HOUSEHOLD_PE_TAXSIM_OUTPUT = "policyengine_taxsim_joint_household_output.csv" + self.JOINT_HOUSEHOLD_TAXSIM35_OUTPUT = "taxsim35_joint_household_output.csv" + + self.HOUSEHOLD_WITH_DEPENDENT_PE_TAXSIM_OUTPUT = "policyengine_taxsim_household_with_dependent_output.csv" + self.HOUSEHOLD_WITH_DEPENDENT_TAXSIM35_OUTPUT = "taxsim35_household_with_dependent_output.csv" + + self.HOUSEHOLD_WITH_DEPENDENT_SINGLE_PARENT_PE_TAXSIM_OUTPUT = "policyengine_taxsim_household_with_dependent_single_parent_output.csv" + self.HOUSEHOLD_WITH_DEPENDENT_SINGLE_PARENT_TAXSIM35_OUTPUT = "taxsim35_household_with_dependent_single_parent_output.csv" + # Get the correct path to shared data dist = distribution('policyengine-taxsim') # Print for debugging @@ -29,7 +46,7 @@ def setUp(self) -> None: # Find the first path that exists and contains our files for path in possible_paths: - if (path / 'taxsim_input.csv').exists(): + if (path / self.SINGLE_HOUSEHOLD_INPUT).exists(): self.taxsim_dir = path break else: @@ -55,31 +72,20 @@ def setUp(self) -> None: else: raise OSError(f"Unsupported operating system: {system}") - self.input_file = self.taxsim_dir / "taxsim_input.csv" + self.input_file_single_household = self.taxsim_dir / self.SINGLE_HOUSEHOLD_INPUT + self.input_file_joint_household = self.taxsim_dir / self.JOINT_HOUSEHOLD_INPUT + self.input_file_household_with_dependent = self.taxsim_dir / self.HOUSEHOLD_WITH_DEPENDENT_INPUT + self.input_file_household_with_dependent_single_parent = self.taxsim_dir / self.HOUSEHOLD_WITH_DEPENDENT_SINGLE_PARENT_INPUT # Verify and print paths for debugging print(f"\nDebug Information:") print(f"Taxsim Directory: {self.taxsim_dir}") - print(f"Input File Path: {self.input_file}") - print(f"Input File Exists: {self.input_file.exists()}") - if self.input_file.exists(): - print(f"Input File is Readable: {os.access(self.input_file, os.R_OK)}") - - def test_generate_policyengine_taxsim_output(self): - output_file = self.output_dir / "policyengine_taxsim_output.csv" - - # Use list form and absolute paths - cmd = [ - sys.executable, - str(self.cli_path.absolute()), - str(self.input_file.absolute()), - "-o", - str(output_file.absolute()) - ] - - # Print command for debugging - print(f"Running command: {' '.join(cmd)}") + print(f"Input File Path: {self.input_file_single_household}") + print(f"Input File Exists: {self.input_file_single_household.exists()}") + if self.input_file_single_household.exists(): + print(f"Input File is Readable: {os.access(self.input_file_single_household, os.R_OK)}") + def generate_pe_taxsim_output(self, cmd): creation_flags = 0 if platform.system().lower() == "windows": if hasattr(subprocess, 'CREATE_NO_WINDOW'): @@ -88,7 +94,6 @@ def test_generate_policyengine_taxsim_output(self): # For Python < 3.11 on Windows # DETACHED_PROCESS = 0x00000008 creation_flags = 0x00000008 - process = subprocess.run( cmd, shell=False, @@ -96,7 +101,6 @@ def test_generate_policyengine_taxsim_output(self): text=True, creationflags=creation_flags ) - print(f"PolicyEngine TAXSIM CLI output:\n{process.stdout}") if process.returncode != 0: print( @@ -106,16 +110,10 @@ def test_generate_policyengine_taxsim_output(self): f"PolicyEngine TAXSIM CLI failed: {process.returncode}" ) - self.assertTrue(output_file.is_file()) - print(f"Content of {output_file}:") - with open(output_file, "r") as f: - print(f.read()) - - def test_generate_taxsim35_output(self): + def generate_taxsim35_output(self, taxsim35_input_file, output_file): import tempfile import shutil - output_file = self.output_dir / "taxsim35_output.csv" taxsim_path = self.taxsim_dir / self.taxsim_exe # Create a temporary directory for execution @@ -125,7 +123,7 @@ def test_generate_taxsim35_output(self): temp_input = Path(temp_dir) / "input.csv" shutil.copy2(taxsim_path, temp_exe) - shutil.copy2(self.input_file, temp_input) + shutil.copy2(taxsim35_input_file, temp_input) if platform.system().lower() != "windows": os.chmod(temp_exe, 0o755) @@ -156,17 +154,45 @@ def test_generate_taxsim35_output(self): print(f"TAXSIM35 failed with error:\n{process.stderr}") raise Exception(f"TAXSIM35 failed: {process.returncode}") - self.assertTrue(output_file.is_file()) - print(f"Content of {output_file}:") - with open(output_file, "r") as f: - print(f.read()) + def test_generate_policyengine_taxsim_single_household_output(self): + output_file = self.output_dir / self.SINGLE_HOUSEHOLD_PE_TAXSIM_OUTPUT + + # Use list form and absolute paths + cmd = [ + sys.executable, + str(self.cli_path.absolute()), + str(self.input_file_single_household.absolute()), + "-o", + str(output_file.absolute()) + ] + + # Print command for debugging + print(f"Running command: {' '.join(cmd)}") + + self.generate_pe_taxsim_output(cmd) - def test_match_both_output(self): - taxsim35_csv = pd.read_csv(self.output_dir / "taxsim35_output.csv") + self.assertTrue(output_file.is_file()) + print(f"Content of {output_file}:") + with open(output_file, "r") as f: + print(f.read()) + + def test_generate_taxsim35_single_household_output(self): + + output_file = self.output_dir / self.SINGLE_HOUSEHOLD_TAXSIM35_OUTPUT + + self.generate_taxsim35_output(self.input_file_single_household, output_file) + + self.assertTrue(output_file.is_file()) + print(f"Content of {output_file}:") + with open(output_file, "r") as f: + print(f.read()) + + def test_match_single_household_output(self): + taxsim35_csv = pd.read_csv(self.output_dir / self.SINGLE_HOUSEHOLD_TAXSIM35_OUTPUT) pe_taxsim_csv = pd.read_csv( - self.output_dir / "policyengine_taxsim_output.csv" + self.output_dir / self.SINGLE_HOUSEHOLD_PE_TAXSIM_OUTPUT ) - input_csv = pd.read_csv(self.input_file) + input_csv = pd.read_csv(self.input_file_single_household) print("Input CSV:") print(input_csv) @@ -237,7 +263,351 @@ def test_match_both_output(self): # Assert all columns match all_matched = all(comparison_results.values()) - self.assertTrue(all_matched, f"Columns with missmatches: {[col for col, matched in comparison_results.items() if not matched]}") + self.assertTrue(all_matched, + f"Columns with missmatches: {[col for col, matched in comparison_results.items() if not matched]}") + + def test_generate_policyengine_taxsim_joint_household_output(self): + output_file = self.output_dir / self.JOINT_HOUSEHOLD_PE_TAXSIM_OUTPUT + + # Use list form and absolute paths + cmd = [ + sys.executable, + str(self.cli_path.absolute()), + str(self.input_file_joint_household.absolute()), + "-o", + str(output_file.absolute()) + ] + + # Print command for debugging + print(f"Running command: {' '.join(cmd)}") + + self.generate_pe_taxsim_output(cmd) + + self.assertTrue(output_file.is_file()) + print(f"Content of {output_file}:") + with open(output_file, "r") as f: + print(f.read()) + + def test_generate_taxsim35_joint_household_output(self): + + output_file = self.output_dir / self.JOINT_HOUSEHOLD_TAXSIM35_OUTPUT + self.generate_taxsim35_output(self.input_file_joint_household, output_file) + + self.assertTrue(output_file.is_file()) + print(f"Content of {output_file}:") + with open(output_file, "r") as f: + print(f.read()) + + def test_match_joint_household_output(self): + taxsim35_csv = pd.read_csv(self.output_dir / self.JOINT_HOUSEHOLD_TAXSIM35_OUTPUT) + pe_taxsim_csv = pd.read_csv( + self.output_dir / self.JOINT_HOUSEHOLD_PE_TAXSIM_OUTPUT + ) + input_csv = pd.read_csv(self.input_file_joint_household) + + print("Input CSV:") + print(input_csv) + print("\nTAXSIM35 output:") + print(taxsim35_csv) + print("\nPolicyEngine TAXSIM output:") + print(pe_taxsim_csv) + + # Ensure both DataFrames have the same columns + common_columns = set(taxsim35_csv.columns) & set(pe_taxsim_csv.columns) + taxsim35_csv = taxsim35_csv[list(common_columns)] + pe_taxsim_csv = pe_taxsim_csv[list(common_columns)] + + # Ensure both DataFrames have the same column names + taxsim35_csv.columns = taxsim35_csv.columns.str.lower() + pe_taxsim_csv.columns = pe_taxsim_csv.columns.str.lower() + + # Sort both DataFrames by taxsimid to ensure rows are in the same order + taxsim35_csv = taxsim35_csv.sort_values("taxsimid").reset_index( + drop=True + ) + pe_taxsim_csv = pe_taxsim_csv.sort_values("taxsimid").reset_index( + drop=True + ) + input_csv = input_csv.sort_values("taxsimid").reset_index( + drop=True + ) + + # Convert numeric columns to float + numeric_columns = taxsim35_csv.select_dtypes( + include=["number"] + ).columns + for col in numeric_columns: + taxsim35_csv[col] = pd.to_numeric( + taxsim35_csv[col], errors="coerce" + ) + pe_taxsim_csv[col] = pd.to_numeric( + pe_taxsim_csv[col], errors="coerce" + ) + + # Compare + standard_output_cols = [ + "year", + # "fiitax", + "siitax" + ] + full_output_cols = standard_output_cols + [ + # "tfica" + # "v10", # state_agi + # "v13", + # "v18", + # "v19", + # "v26", + # "v28", + # "v34", + # "v45", + ] + + # Determine which columns to check based on idtl value + columns_to_check = full_output_cols if (input_csv["idtl"] == 2).any() else standard_output_cols + + # Compare all relevant columns at once + comparison_results = {} + for col in columns_to_check: + if col in common_columns: + matches = (taxsim35_csv[col] == pe_taxsim_csv[col]).all() + comparison_results[col] = matches + if not matches: + print(f"Mismatch in column {col}:") + print(f"TAXSIM35 values: {taxsim35_csv[col].values}") + print(f"PolicyEngine values: {pe_taxsim_csv[col].values}") + + # Assert all columns match + all_matched = all(comparison_results.values()) + self.assertTrue(all_matched, + f"Columns with missmatches: {[col for col, matched in comparison_results.items() if not matched]}") + + def test_generate_policyengine_taxsim_household_with_dependent_output(self): + output_file = self.output_dir / self.HOUSEHOLD_WITH_DEPENDENT_PE_TAXSIM_OUTPUT + + # Use list form and absolute paths + cmd = [ + sys.executable, + str(self.cli_path.absolute()), + str(self.input_file_household_with_dependent.absolute()), + "-o", + str(output_file.absolute()) + ] + + # Print command for debugging + print(f"Running command: {' '.join(cmd)}") + + self.generate_pe_taxsim_output(cmd) + + self.assertTrue(output_file.is_file()) + print(f"Content of {output_file}:") + with open(output_file, "r") as f: + print(f.read()) + + def test_generate_taxsim35_household_with_dependent_output(self): + output_file = self.output_dir / self.HOUSEHOLD_WITH_DEPENDENT_TAXSIM35_OUTPUT + self.generate_taxsim35_output(self.input_file_household_with_dependent, output_file) + + self.assertTrue(output_file.is_file()) + print(f"Content of {output_file}:") + with open(output_file, "r") as f: + print(f.read()) + + def test_match_household_with_dependent_output(self): + taxsim35_csv = pd.read_csv(self.output_dir / self.HOUSEHOLD_WITH_DEPENDENT_TAXSIM35_OUTPUT) + pe_taxsim_csv = pd.read_csv( + self.output_dir / self.HOUSEHOLD_WITH_DEPENDENT_PE_TAXSIM_OUTPUT + ) + input_csv = pd.read_csv(self.input_file_household_with_dependent) + + print("Input CSV:") + print(input_csv) + print("\nTAXSIM35 output:") + print(taxsim35_csv) + print("\nPolicyEngine TAXSIM output:") + print(pe_taxsim_csv) + + # Ensure both DataFrames have the same columns + common_columns = set(taxsim35_csv.columns) & set(pe_taxsim_csv.columns) + taxsim35_csv = taxsim35_csv[list(common_columns)] + pe_taxsim_csv = pe_taxsim_csv[list(common_columns)] + + # Ensure both DataFrames have the same column names + taxsim35_csv.columns = taxsim35_csv.columns.str.lower() + pe_taxsim_csv.columns = pe_taxsim_csv.columns.str.lower() + + # Sort both DataFrames by taxsimid to ensure rows are in the same order + taxsim35_csv = taxsim35_csv.sort_values("taxsimid").reset_index( + drop=True + ) + pe_taxsim_csv = pe_taxsim_csv.sort_values("taxsimid").reset_index( + drop=True + ) + input_csv = input_csv.sort_values("taxsimid").reset_index( + drop=True + ) + + # Convert numeric columns to float + numeric_columns = taxsim35_csv.select_dtypes( + include=["number"] + ).columns + for col in numeric_columns: + taxsim35_csv[col] = pd.to_numeric( + taxsim35_csv[col], errors="coerce" + ) + pe_taxsim_csv[col] = pd.to_numeric( + pe_taxsim_csv[col], errors="coerce" + ) + + # Compare + standard_output_cols = [ + "year", + # "fiitax", + "siitax" + ] + full_output_cols = standard_output_cols + [ + # "tfica" + # "v10", # state_agi + # "v13", + # "v18", + # "v19", + # "v26", + # "v28", + # "v34", + # "v45", + ] + + # Determine which columns to check based on idtl value + columns_to_check = full_output_cols if (input_csv["idtl"] == 2).any() else standard_output_cols + + # Compare all relevant columns at once + comparison_results = {} + for col in columns_to_check: + if col in common_columns: + matches = (taxsim35_csv[col] == pe_taxsim_csv[col]).all() + comparison_results[col] = matches + if not matches: + print(f"Mismatch in column {col}:") + print(f"TAXSIM35 values: {taxsim35_csv[col].values}") + print(f"PolicyEngine values: {pe_taxsim_csv[col].values}") + + # Assert all columns match + all_matched = all(comparison_results.values()) + self.assertTrue(all_matched, + f"Columns with missmatches: {[col for col, matched in comparison_results.items() if not matched]}") + + def test_generate_policyengine_taxsim_household_with_dependent_single_parent_output(self): + output_file = self.output_dir / self.HOUSEHOLD_WITH_DEPENDENT_SINGLE_PARENT_PE_TAXSIM_OUTPUT + + # Use list form and absolute paths + cmd = [ + sys.executable, + str(self.cli_path.absolute()), + str(self.input_file_household_with_dependent_single_parent.absolute()), + "-o", + str(output_file.absolute()) + ] + + # Print command for debugging + print(f"Running command: {' '.join(cmd)}") + + self.generate_pe_taxsim_output(cmd) + + self.assertTrue(output_file.is_file()) + print(f"Content of {output_file}:") + with open(output_file, "r") as f: + print(f.read()) + + def test_generate_taxsim35_household_with_dependent_single_parent_output(self): + output_file = self.output_dir / self.HOUSEHOLD_WITH_DEPENDENT_SINGLE_PARENT_TAXSIM35_OUTPUT + self.generate_taxsim35_output(self.input_file_household_with_dependent_single_parent, output_file) + self.assertTrue(output_file.is_file()) + print(f"Content of {output_file}:") + with open(output_file, "r") as f: + print(f.read()) + + def test_match_household_with_dependent_single_parent_output(self): + taxsim35_csv = pd.read_csv(self.output_dir / self.HOUSEHOLD_WITH_DEPENDENT_SINGLE_PARENT_TAXSIM35_OUTPUT) + pe_taxsim_csv = pd.read_csv( + self.output_dir / self.HOUSEHOLD_WITH_DEPENDENT_SINGLE_PARENT_PE_TAXSIM_OUTPUT + ) + input_csv = pd.read_csv(self.input_file_household_with_dependent_single_parent) + + print("Input CSV:") + print(input_csv) + print("\nTAXSIM35 output:") + print(taxsim35_csv) + print("\nPolicyEngine TAXSIM output:") + print(pe_taxsim_csv) + + # Ensure both DataFrames have the same columns + common_columns = set(taxsim35_csv.columns) & set(pe_taxsim_csv.columns) + taxsim35_csv = taxsim35_csv[list(common_columns)] + pe_taxsim_csv = pe_taxsim_csv[list(common_columns)] + + # Ensure both DataFrames have the same column names + taxsim35_csv.columns = taxsim35_csv.columns.str.lower() + pe_taxsim_csv.columns = pe_taxsim_csv.columns.str.lower() + + # Sort both DataFrames by taxsimid to ensure rows are in the same order + taxsim35_csv = taxsim35_csv.sort_values("taxsimid").reset_index( + drop=True + ) + pe_taxsim_csv = pe_taxsim_csv.sort_values("taxsimid").reset_index( + drop=True + ) + input_csv = input_csv.sort_values("taxsimid").reset_index( + drop=True + ) + + # Convert numeric columns to float + numeric_columns = taxsim35_csv.select_dtypes( + include=["number"] + ).columns + for col in numeric_columns: + taxsim35_csv[col] = pd.to_numeric( + taxsim35_csv[col], errors="coerce" + ) + pe_taxsim_csv[col] = pd.to_numeric( + pe_taxsim_csv[col], errors="coerce" + ) + + # Compare + standard_output_cols = [ + "year", + # "fiitax", + "siitax" + ] + full_output_cols = standard_output_cols + [ + # "tfica" + # "v10", # state_agi + # "v13", + # "v18", + # "v19", + # "v26", + # "v28", + # "v34", + # "v45", + ] + + # Determine which columns to check based on idtl value + columns_to_check = full_output_cols if (input_csv["idtl"] == 2).any() else standard_output_cols + + # Compare all relevant columns at once + comparison_results = {} + for col in columns_to_check: + if col in common_columns: + matches = (taxsim35_csv[col] == pe_taxsim_csv[col]).all() + comparison_results[col] = matches + if not matches: + print(f"Mismatch in column {col}:") + print(f"TAXSIM35 values: {taxsim35_csv[col].values}") + print(f"PolicyEngine values: {pe_taxsim_csv[col].values}") + + # Assert all columns match + all_matched = all(comparison_results.values()) + self.assertTrue(all_matched, + f"Columns with mismatches: {[col for col, matched in comparison_results.items() if not matched]}") + if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/tests/test_mappers.py b/tests/test_mappers.py index 8164a11..e94b5de 100644 --- a/tests/test_mappers.py +++ b/tests/test_mappers.py @@ -1,6 +1,6 @@ import pytest -from policyengine_taxsim import import_single_household, export_single_household +from policyengine_taxsim import generate_household, export_household @pytest.fixture @@ -15,108 +15,384 @@ def sample_taxsim_input(): } -sample_taxsim_input_without_state = { - "year": 2021, - "page": 35, - "pwages": 50000, - "taxsimid": 11, - "idtl": 0 -} +@pytest.fixture +def sample_taxsim_input_without_state(): + return { + "year": 2021, + "page": 35, + "pwages": 50000, + "taxsimid": 11, + "idtl": 0 + } + + +@pytest.fixture +def sample_taxsim_input_with_state_eq_0(): + return { + "year": 2021, + "page": 35, + "state": 0, + "pwages": 50000, + "taxsimid": 11, + "idtl": 0 + } + + +@pytest.fixture +def sample_taxsim_input_for_joint(): + return { + "year": 2023, + "page": 40, + "sage": 40, + "state": 0, + "mstat": 2, + "pwages": 45000, + "swages": 30000, + "taxsimid": 11, + "idtl": 2, + "depx": 2 + } + + +@pytest.fixture +def sample_taxsim_input_for_household_with_dependent(): + return { + "year": 2023, + "state": 39, + "pwages": 81000.001, + "swages": 0, + "taxsimid": 11, + "idtl": 2, + "mstat": 2, + "depx": 2, + "age1": 4 + } + -sample_taxsim_input_with_state_eq_0 = { - "year": 2021, - "page": 35, - "state": 0, - "pwages": 50000, - "taxsimid": 11, - "idtl": 0 -} +@pytest.fixture +def sample_taxsim_input_for_household_with_dependent_single_parent(): + return { + "year": 2023, + "state": 39, + "pwages": 81000.001, + "swages": 0, + "taxsimid": 11, + "idtl": 2, + "mstat": 1, + "depx": 2, + "age1": 4 + } def test_import_single_household(sample_taxsim_input): expected_output = { - "people": { - "you": {"age": {"2021": 35}, "employment_income": {"2021": 50000}} + "families": { + "your family": { + "members": [ + "you" + ] + } }, "households": { "your household": { - "members": ["you"], - "state_name": {"2021": "AZ"}, + "members": [ + "you" + ], + "state_name": { + "2021": "AZ" + } + } + }, + "marital_units": { + "your marital unit": { + "members": [ + "you" + ] } }, - "tax_units": {"your tax unit": {"members": ["you"]}}, + "people": { + "you": { + "age": { + "2021": 35 + }, + "employment_income": { + "2021": 50000 + } + } + }, + "spm_units": { + "your household": { + "members": [ + "you" + ] + } + }, + "tax_units": { + "your tax unit": { + "members": [ + "you" + ] + } + } } - result = import_single_household(sample_taxsim_input) + result = generate_household(sample_taxsim_input) assert result == expected_output -def test_import_single_household_without_state(): +def test_import_single_household_without_state(sample_taxsim_input_without_state): expected_output = { - "people": { - "you": {"age": {"2021": 35}, "employment_income": {"2021": 50000}} + "families": { + "your family": { + "members": [ + "you" + ] + } }, "households": { "your household": { - "members": ["you"], - "state_name": {"2021": "TX"}, + "members": [ + "you" + ], + "state_name": { + "2021": "TX" + } + } + }, + "marital_units": { + "your marital unit": { + "members": [ + "you" + ] + } + }, + "people": { + "you": { + "age": { + "2021": 35 + }, + "employment_income": { + "2021": 50000 + } + } + }, + "spm_units": { + "your household": { + "members": [ + "you" + ] } }, - "tax_units": {"your tax unit": {"members": ["you"]}}, + "tax_units": { + "your tax unit": { + "members": [ + "you" + ] + } + } } - result = import_single_household(sample_taxsim_input_without_state) + result = generate_household(sample_taxsim_input_without_state) assert result == expected_output -def test_import_single_household_with_state_eq_0(): +def test_import_single_household_with_state_eq_0(sample_taxsim_input_with_state_eq_0): expected_output = { - "people": { - "you": {"age": {"2021": 35}, "employment_income": {"2021": 50000}} + "families": { + "your family": { + "members": [ + "you" + ] + } }, "households": { "your household": { - "members": ["you"], - "state_name": {"2021": "TX"}, + "members": [ + "you" + ], + "state_name": { + "2021": "TX" + } + } + }, + "marital_units": { + "your marital unit": { + "members": [ + "you" + ] + } + }, + "people": { + "you": { + "age": { + "2021": 35 + }, + "employment_income": { + "2021": 50000 + } } }, - "tax_units": {"your tax unit": {"members": ["you"]}}, + "spm_units": { + "your household": { + "members": [ + "you" + ] + } + }, + "tax_units": { + "your tax unit": { + "members": [ + "you" + ] + } + } } - result = import_single_household(sample_taxsim_input_with_state_eq_0) + result = generate_household(sample_taxsim_input_with_state_eq_0) assert result == expected_output def test_export_single_household(sample_taxsim_input): - policyengine_situation = { - "people": { - "you": {"age": {"2021": 35}, "employment_income": {"2021": 50000}} + policyengine_single_household_situation = { + "families": { + "your family": { + "members": [ + "you" + ] + } }, "households": { "your household": { - "members": ["you"], - "state_name": {"2021": "AZ"}, + "members": [ + "you" + ], + "state_name": { + "2021": "AZ" + } + } + }, + "marital_units": { + "your marital unit": { + "members": [ + "you" + ] + } + }, + "people": { + "you": { + "age": { + "2021": 35 + }, + "employment_income": { + "2021": 50000 + } } }, - "tax_units": {"your tax unit": {"members": ["you"]}}, + "spm_units": { + "your household": { + "members": [ + "you" + ] + } + }, + "tax_units": { + "your tax unit": { + "members": [ + "you" + ] + } + } } - result = export_single_household(sample_taxsim_input, policyengine_situation) - print(result) + result = export_household(sample_taxsim_input, policyengine_single_household_situation) assert result["year"] == 2021 assert result["state"] == 3 assert "fiitax" in result assert "siitax" in result - # Note: We can't easily predict the exact tax values without mocking the PolicyEngine simulation, - # so we're just checking that these keys exist in the output. + + +def test_joint_household(sample_taxsim_input_for_joint): + expected_output_joint_situation = {'families': { + 'your family': {'members': ['you', 'your partner', 'your first dependent', 'your second dependent']}}, + 'households': {'your household': { + 'members': ['you', 'your partner', 'your first dependent', + 'your second dependent'], 'state_name': {'2023': 'TX'}}}, + 'marital_units': {'your marital unit': {'members': ['you', 'your partner']}, + "your first dependent's marital unit": { + 'members': ['your first dependent'], + 'marital_unit_id': {'2023': 1}}, + "your second dependent's marital unit": { + 'members': ['your second dependent'], + 'marital_unit_id': {'2023': 2}}}, + 'people': {'you': {'age': {'2023': 40}, 'employment_income': {'2023': 45000.0}}, + 'your partner': {'age': {'2023': 40}, + 'employment_income': {'2023': 30000.0}}, + 'your first dependent': {'age': {'2023': 10}, + 'employment_income': {'2023': 0}}, + 'your second dependent': {'age': {'2023': 10}, + 'employment_income': {'2023': 0}}}, + 'spm_units': {'your household': { + 'members': ['you', 'your partner', 'your first dependent', + 'your second dependent']}}, 'tax_units': { + 'your tax unit': {'members': ['you', 'your partner', 'your first dependent', 'your second dependent']}}} + + result = generate_household(sample_taxsim_input_for_joint) + assert result == expected_output_joint_situation + + +def test_household_with_dependent(sample_taxsim_input_for_household_with_dependent): + expected_output = {'families': { + 'your family': {'members': ['you', 'your partner', 'your first dependent', 'your second dependent']}}, + 'households': {'your household': { + 'members': ['you', 'your partner', 'your first dependent', 'your second dependent'], + 'state_name': {'2023': 'PA'}}}, + 'marital_units': {'your marital unit': {'members': ['you', 'your partner']}, + "your first dependent's marital unit": {'members': ['your first dependent'], + 'marital_unit_id': {'2023': 1}}, + "your second dependent's marital unit": {'members': ['your second dependent'], + 'marital_unit_id': {'2023': 2}}}, + 'people': {'you': {'age': {'2023': 40}, 'employment_income': {'2023': 81000.001}}, + 'your partner': {'age': {'2023': 40}, + 'employment_income': {'2023': 0.0}}, + 'your first dependent': {'age': {'2023': 4}, 'employment_income': {'2023': 0}}, + 'your second dependent': {'age': {'2023': 10}, 'employment_income': {'2023': 0}}}, + 'spm_units': {'your household': { + 'members': ['you', 'your partner', 'your first dependent', 'your second dependent']}}, + 'tax_units': {'your tax unit': { + 'members': ['you', 'your partner', 'your first dependent', 'your second dependent'], + 'pa_use_tax': {'2023': 0.0}}}} + + result = generate_household(sample_taxsim_input_for_household_with_dependent) + assert result == expected_output + + +def test_household_with_dependent_single_parent(sample_taxsim_input_for_household_with_dependent_single_parent): + expected_output = { + 'families': {'your family': {'members': ['you', 'your first dependent', 'your second dependent']}}, + 'households': {'your household': {'members': ['you', 'your first dependent', 'your second dependent'], + 'state_name': {'2023': 'PA'}}}, + 'marital_units': {'your marital unit': {'members': ['you']}, + "your first dependent's marital unit": {'members': ['your first dependent'], + 'marital_unit_id': {'2023': 1}}, + "your second dependent's marital unit": {'members': ['your second dependent'], + 'marital_unit_id': {'2023': 2}}}, + 'people': {'you': {'age': {'2023': 40}, 'employment_income': {'2023': 81000.001}}, + 'your first dependent': {'age': {'2023': 4}, 'employment_income': {'2023': 0}}, + 'your second dependent': {'age': {'2023': 10}, 'employment_income': {'2023': 0}}}, + 'spm_units': {'your household': {'members': ['you', 'your first dependent', 'your second dependent']}}, + 'tax_units': {'your tax unit': {'members': ['you', 'your first dependent', 'your second dependent'], + 'pa_use_tax': {'2023': 0.0}}}} + + result = generate_household(sample_taxsim_input_for_household_with_dependent_single_parent) + assert result == expected_output def test_roundtrip(sample_taxsim_input): # Import TAXSIM input to PolicyEngine situation - pe_situation = import_single_household(sample_taxsim_input) + pe_situation = generate_household(sample_taxsim_input) # Export PolicyEngine situation back to TAXSIM output - taxsim_output = export_single_household(sample_taxsim_input, pe_situation) + taxsim_output = export_household(sample_taxsim_input, pe_situation) # Check that key information is preserved assert taxsim_output["year"] == sample_taxsim_input["year"]