diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..331dae1 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,36 @@ +name: Run Tests + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + test: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + python-version: ["3.10", 3.11, 3.12] + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e . + pip install pytest + + - name: Make TAXSIM executables executable (Unix) + if: runner.os != 'Windows' + run: chmod +x resources/taxsim35/taxsim35-*.exe + + - name: Run tests + run: pytest tests/ diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml deleted file mode 100644 index e56abb6..0000000 --- a/.github/workflows/python-package.yml +++ /dev/null @@ -1,40 +0,0 @@ -# This workflow will install Python dependencies, run tests and lint with a variety of Python versions -# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python - -name: Python package - -on: - push: - branches: [ "main" ] - pull_request: - branches: [ "main" ] - -jobs: - build: - - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: ["3.9", "3.10", "3.11"] - - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install flake8 pytest - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Test with pytest - run: | - pytest diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..1c4e735 --- /dev/null +++ b/LICENSE @@ -0,0 +1,7 @@ +Copyright 2024 PolicyEngine + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/output/policyengine_taxsim_output.csv b/output/policyengine_taxsim_output.csv new file mode 100644 index 0000000..a3bf96d --- /dev/null +++ b/output/policyengine_taxsim_output.csv @@ -0,0 +1,3 @@ +taxsimid,year,state,mstat,page,sage,fiitax,siitax,fica +1,2021,3,1,40,0,[2775.],[1008.87],[3748.5] +1,2021,3,1,40,0,[2535.],[942.07],[3595.5] diff --git a/output/taxsim35_output.csv b/output/taxsim35_output.csv new file mode 100644 index 0000000..ca7a377 --- /dev/null +++ b/output/taxsim35_output.csv @@ -0,0 +1,3 @@ +taxsimid,year,state,fiitax,siitax,fica,frate,srate,ficar,tfica,credits,v10,v11,v12,v13,v14,v15,v16,v17,v18,v19,v20, v21,v22,v23,v24,v25,v26,v27,v28,v29,v30,v31,v32,v33,v34, v35,v36,v37,v38,v39,v40,v41,staxbc,v42,v43,v44,v45 +1.,2021,3,2775.00,1008.87,7497.00,12.00,3.34,15.30,3748.50,.00,49000.00,.00,.00,12550.00,.00,.00,.00,.00,36450.00,4175.00,.00,.00,.00,.00,.00,.00,49000.00,.00,4175.00,7497.00,49000.01,.00,49000.01,.00,12550.00,1008.87,36450.01,.00,.00,.00,.00,3.34,.00,.00,.00,.00,1400.00 +1.,2021,3,2535.00,942.07,7191.00,12.00,3.34,15.30,3595.50,.00,47000.00,.00,.00,12550.00,.00,.00,.00,.00,34450.00,3935.00,.00,.00,.00,.00,.00,.00,47000.00,.00,3935.00,7191.00,47001.01,.00,47000.01,.00,12550.00,942.07,34450.01,.00,.00,.00,.00,3.34,.00,.00,.00,.00,1400.00 diff --git a/policyengine_taxsim/__init__.py b/policyengine_taxsim/__init__.py new file mode 100644 index 0000000..833f8ab --- /dev/null +++ b/policyengine_taxsim/__init__.py @@ -0,0 +1,7 @@ +from .core.input_mapper import import_single_household +from .core.output_mapper import export_single_household +from .cli import main as cli + +__all__ = ["import_single_household", "export_single_household", "cli"] + +__version__ = "0.1.0" # Make sure this matches the version in pyproject.toml diff --git a/policyengine_taxsim/cli.py b/policyengine_taxsim/cli.py new file mode 100644 index 0000000..085ab3f --- /dev/null +++ b/policyengine_taxsim/cli.py @@ -0,0 +1,45 @@ +import click +import pandas as pd +from pathlib import Path +from policyengine_taxsim.core.input_mapper import import_single_household +from policyengine_taxsim.core.output_mapper import export_single_household + + +@click.command() +@click.argument("input_file", type=click.Path(exists=True)) +@click.option( + "--output", + "-o", + type=click.Path(), + default="output.csv", + help="Output file path", +) +def main(input_file, output): + """ + Process TAXSIM input file and generate PolicyEngine-compatible output. + """ + try: + # Read input file + df = pd.read_csv(input_file) + + # Process each row + results = [] + for _, row in df.iterrows(): + taxsim_input = row.to_dict() + pe_situation = import_single_household(taxsim_input) + taxsim_output = export_single_household(pe_situation) + results.append(taxsim_output) + + # Create output dataframe and save to csv + output_df = pd.DataFrame(results) + output_path = Path(output) + output_path.parent.mkdir(parents=True, exist_ok=True) + output_df.to_csv(output_path, index=False) + click.echo(f"Output saved to {output}") + except Exception as e: + click.echo(f"Error processing input: {str(e)}", err=True) + raise + + +if __name__ == "__main__": + main() diff --git a/policyengine_taxsim/config/variable_mappings.yaml b/policyengine_taxsim/config/variable_mappings.yaml new file mode 100644 index 0000000..05dbdda --- /dev/null +++ b/policyengine_taxsim/config/variable_mappings.yaml @@ -0,0 +1,51 @@ +taxsim_to_policyengine: + year: get_year + state: get_state_code + fiitax: income_tax + siitax: state_income_tax + fica: get_fica + frate: federal_mtr + srate: state_mtr + ficar: placeholder + tfica: taxsim_tfica + v10: adjusted_gross_income + v11: tax_unit_taxable_unemployment_compensation + v12: tax_unit_taxable_social_security + v13: basic_standard_deduction + v14: exemptions + v15: placeholder + v16: placeholder + v17: taxable_income_deductions + v18: taxable_income + v19: income_tax + v20: placeholder + v21: placeholder + v22: ctc + v23: refundable_ctc + v24: cdcc + v25: eitc + v26: amt_income + v27: alternative_minimum_tax + v28: income_tax_before_refundable_credits + v29: placeholder + v30: household_net_income + v31: placeholder + v32: state_agi + v33: placeholder + v34: state_standard_deduction + v35: state_itemized_deductions + v36: state_taxable_income + v37: property_tax_credit + v38: child_care_credit + v39: placeholder + v40: placeholder + v41: placeholder + v42: self_employment_income + v43: net_investment_income_tax + v44: employee_medicare_tax + v45: rrc_cares + +policyengine_to_taxsim: + # This section would be the inverse of the above mapping + # It's left empty for brevity, but you should populate it + # with the inverse relationships for bidirectional conversion \ No newline at end of file diff --git a/policyengine_taxsim/core/__init__.py b/policyengine_taxsim/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/policyengine_taxsim/core/input_mapper.py b/policyengine_taxsim/core/input_mapper.py new file mode 100644 index 0000000..f5b3151 --- /dev/null +++ b/policyengine_taxsim/core/input_mapper.py @@ -0,0 +1,35 @@ +from policyengine_taxsim.core.utils import ( + load_variable_mappings, + get_state_code, +) + + +def import_single_household(taxsim_vars): + """ + Convert TAXSIM input variables to a PolicyEngine situation. + + Args: + taxsim_vars (dict): Dictionary of TAXSIM input variables + + Returns: + dict: PolicyEngine situation dictionary + """ + mappings = load_variable_mappings()["taxsim_to_policyengine"] + + year = str(int(taxsim_vars["year"])) # Ensure year is an integer string + state = get_state_code(taxsim_vars["state"]) + + situation = { + "people": { + "you": { + "age": {year: int(taxsim_vars.get("page", 40))}, + "employment_income": {year: int(taxsim_vars.get("pwages", 0))}, + } + }, + "households": { + "your household": {"members": ["you"], "state_name": {year: state}} + }, + "tax_units": {"your tax unit": {"members": ["you"]}}, + } + + return situation diff --git a/policyengine_taxsim/core/output_mapper.py b/policyengine_taxsim/core/output_mapper.py new file mode 100644 index 0000000..11a29ff --- /dev/null +++ b/policyengine_taxsim/core/output_mapper.py @@ -0,0 +1,53 @@ +from policyengine_taxsim.core.utils import ( + load_variable_mappings, + get_state_number, +) +from policyengine_us import Simulation + + +def export_single_household(policyengine_situation): + """ + Convert a PolicyEngine situation to TAXSIM output variables. + + Args: + policyengine_situation (dict): PolicyEngine situation dictionary + + Returns: + dict: Dictionary of TAXSIM output variables + """ + mappings = load_variable_mappings()["policyengine_to_taxsim"] + + simulation = Simulation(situation=policyengine_situation) + + year = list( + policyengine_situation["households"]["your household"][ + "state_name" + ].keys() + )[0] + state_name = policyengine_situation["households"]["your household"][ + "state_name" + ][year] + + taxsim_output = { + "taxsimid": policyengine_situation.get("taxsimid", 1), + "year": int(year), + "state": get_state_number(state_name), + "mstat": policyengine_situation["tax_units"]["your tax unit"] + .get("marital_status", {}) + .get(year, 1), + "page": policyengine_situation["people"]["you"]["age"][year], + "sage": policyengine_situation["people"] + .get("your spouse", {}) + .get("age", {}) + .get(year, 0), + "fiitax": simulation.calculate("income_tax", period=year), + "siitax": simulation.calculate("state_income_tax", period=year), + "fica": simulation.calculate( + "employee_social_security_tax", period=year + ) + + simulation.calculate("employee_medicare_tax", period=year), + } + + # Add more variables as needed to match TAXSIM output + + return taxsim_output diff --git a/policyengine_taxsim/core/utils.py b/policyengine_taxsim/core/utils.py new file mode 100644 index 0000000..eb95b5c --- /dev/null +++ b/policyengine_taxsim/core/utils.py @@ -0,0 +1,90 @@ +import yaml +from pathlib import Path + + +def load_variable_mappings(): + """Load variable mappings from YAML file.""" + config_path = ( + Path(__file__).parent.parent / "config" / "variable_mappings.yaml" + ) + with open(config_path, "r") as f: + return yaml.safe_load(f) + + +STATE_MAPPING = { + 1: "AL", + 2: "AK", + 3: "AZ", + 4: "AR", + 5: "CA", + 6: "CO", + 7: "CT", + 8: "DE", + 9: "DC", + 10: "FL", + 11: "GA", + 12: "HI", + 13: "ID", + 14: "IL", + 15: "IN", + 16: "IA", + 17: "KS", + 18: "KY", + 19: "LA", + 20: "ME", + 21: "MD", + 22: "MA", + 23: "MI", + 24: "MN", + 25: "MS", + 26: "MO", + 27: "MT", + 28: "NE", + 29: "NV", + 30: "NH", + 31: "NJ", + 32: "NM", + 33: "NY", + 34: "NC", + 35: "ND", + 36: "OH", + 37: "OK", + 38: "OR", + 39: "PA", + 40: "RI", + 41: "SC", + 42: "SD", + 43: "TN", + 44: "TX", + 45: "UT", + 46: "VT", + 47: "VA", + 48: "WA", + 49: "WV", + 50: "WI", + 51: "WY", +} + + +def get_state_code(state_number): + """Convert state number to state code.""" + return STATE_MAPPING.get(state_number, "Invalid state number") + + +def get_state_number(state_code): + """Convert state code to state number.""" + state_mapping_reverse = {v: k for k, v in STATE_MAPPING.items()} + return state_mapping_reverse.get( + state_code, 0 + ) # Return 0 for invalid state codes + + +def is_date(string): + """Check if a string represents a valid year.""" + try: + year = int(string) + return ( + 1900 <= year <= 2100 + ) # Assuming years between 1900 and 2100 are valid + except ValueError: + return False diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..d7d8e5a --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,74 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "policyengine-taxsim" +version = "0.1.0" +description = "A TAXSIM emulator using PolicyEngine US" +readme = "README.md" +requires-python = ">=3.10" +license = {file = "LICENSE"} +authors = [ + {name = "Al Noman", email = "al@policyengine.org"}, +] +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] +dependencies = [ + "policyengine-us", + "pandas", + "PyYAML", + "click", + "matplotlib", +] + +[project.urls] +Homepage = "https://github.com/PolicyEngine/policyengine-taxsim" +"Bug Tracker" = "https://github.com/PolicyEngine/policyengine-taxsim/issues" + +[project.scripts] +policyengine-taxsim = "policyengine_taxsim.cli:main" + +[tool.hatch.build.targets.wheel] +packages = ["policyengine_taxsim"] + +[tool.hatch.build.targets.wheel.shared-data] +"resources/taxsim35" = "share/policyengine_taxsim/taxsim35" + +[tool.hatch.envs.default] +dependencies = [ + "pytest", + "pytest-cov", +] +[tool.hatch.envs.default.scripts] +test = "pytest tests/ {args}" +test-cov = "pytest tests/ --cov-report=term-missing --cov-config=pyproject.toml --cov=policyengine_taxsim {args}" +cov-report = ["- coverage combine", "coverage report"] + +[tool.coverage.run] +branch = true +parallel = true +omit = [ + "policyengine_taxsim/__init__.py", +] + +[tool.coverage.report] +exclude_lines = [ + "no cov", + "if __name__ == .__main__.:", + "if TYPE_CHECKING:", +] + +[tool.pytest.ini_options] +minversion = "6.0" +addopts = "-ra -q" +testpaths = [ + "tests", +] \ No newline at end of file diff --git a/resources/taxsim35/taxsim35-osx.exe b/resources/taxsim35/taxsim35-osx.exe new file mode 100755 index 0000000..216eeb4 Binary files /dev/null and b/resources/taxsim35/taxsim35-osx.exe differ diff --git a/resources/taxsim35/taxsim35-unix.exe b/resources/taxsim35/taxsim35-unix.exe new file mode 100644 index 0000000..58097f2 Binary files /dev/null and b/resources/taxsim35/taxsim35-unix.exe differ diff --git a/resources/taxsim35/taxsim35-windows.exe b/resources/taxsim35/taxsim35-windows.exe new file mode 100644 index 0000000..4009a76 Binary files /dev/null and b/resources/taxsim35/taxsim35-windows.exe differ diff --git a/resources/taxsim35/taxsim_input.csv b/resources/taxsim35/taxsim_input.csv new file mode 100644 index 0000000..2b023cf --- /dev/null +++ b/resources/taxsim35/taxsim_input.csv @@ -0,0 +1,3 @@ +year,state,depx,pwages,mstat,mortgage,taxsimid,idtl +2021,3,0,49000,1,0,1,2 +2021,3,0,47000,1,0,1,2 \ No newline at end of file diff --git a/taxsim/__init__.py b/taxsim/__init__.py deleted file mode 100644 index eee0ce2..0000000 --- a/taxsim/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .input_mapper import import_single_household -from .output_mapper import export_single_household \ No newline at end of file diff --git a/taxsim/input_mapper.py b/taxsim/input_mapper.py deleted file mode 100644 index 557a718..0000000 --- a/taxsim/input_mapper.py +++ /dev/null @@ -1,16 +0,0 @@ -""" -Scripts to transform taxsim input into policyengine inputs -""" - -def import_single_household( taxsim_vars : dict ) -> dict : - """ - Takes taxsim variables and converts into a policyengine 'situation' - Args: - taxsim_vars : dict with named taxsim variables - Returns: - structured dict to match policyengine Simulation() input - """ - output = dict() - return output - - diff --git a/taxsim/output_mapper.py b/taxsim/output_mapper.py deleted file mode 100644 index 910a677..0000000 --- a/taxsim/output_mapper.py +++ /dev/null @@ -1,14 +0,0 @@ -""" -Scripts to transform policyengine variables to taxsim -""" - -def export_single_household( policyengine_situation : dict ) -> dict : - """ - Takes policyengine 'situation' and converts to taxsim variables - Args: - policyengine_situation : structured dict with policyengine household - Returns: - dict with named taxsim variables - """ - output = dict() - return output \ No newline at end of file diff --git a/taxsim_input.raw b/taxsim_input.raw deleted file mode 100644 index b004076..0000000 --- a/taxsim_input.raw +++ /dev/null @@ -1,3 +0,0 @@ -Taxsimid,year,mstat,pwages,state,page,idtl,psemp,stcg,proptax,swages,sage,depx,age1,age2 -1,2024,2,100000,5,40,2,50000,1000,5000,50000,35,2,10,15 -2,2024,1,50000,12,35,,,,,,,,, \ No newline at end of file diff --git a/tests.py b/tests.py deleted file mode 100644 index cc7cb84..0000000 --- a/tests.py +++ /dev/null @@ -1,340 +0,0 @@ -""" -Testing framework for taxsim package -""" -import taxsim -import pickle - -# Test 1 input -situation_1 = { - "people": { - "you": { - "age": { - "2024": 40 - }, - "employment_income": { - "2024": 100000 - } - }, - "your first dependent": { - "age": { - "2024": 10 - }, - "employment_income": { - "2024": 0 - } - }, - "your second dependent": { - "age": { - "2024": 10 - }, - "employment_income": { - "2024": 0 - } - } - }, - "families": { - "your family": { - "members": [ - "you", - "your first dependent", - "your second dependent" - ] - } - }, - "marital_units": { - "your marital unit": { - "members": [ - "you" - ] - }, - "your first dependent's marital unit": { - "members": [ - "your first dependent" - ], - "marital_unit_id": { - "2024": 1 - } - }, - "your second dependent's marital unit": { - "members": [ - "your second dependent" - ], - "marital_unit_id": { - "2024": 2 - } - } - }, - "tax_units": { - "your tax unit": { - "members": [ - "you", - "your first dependent", - "your second dependent" - ] - } - }, - "spm_units": { - "your household": { - "members": [ - "you", - "your first dependent", - "your second dependent" - ] - } - }, - "households": { - "your household": { - "members": [ - "you", - "your first dependent", - "your second dependent" - ], - "state_name": { - "2024": "CA" - } - } - } -} - -# Test 2 input -situation_2 = { - "people": { - "you": { - "age": { - "2024": 40 - }, - "employment_income": { - "2024": 100000 - } - }, - "your first dependent": { - "age": { - "2024": 10 - }, - "employment_income": { - "2024": 0 - } - }, - "your second dependent": { - "age": { - "2024": 10 - }, - "employment_income": { - "2024": 0 - } - } - }, - "families": { - "your family": { - "members": [ - "you", - "your first dependent", - "your second dependent" - ] - } - }, - "marital_units": { - "your marital unit": { - "members": [ - "you" - ] - }, - "your first dependent's marital unit": { - "members": [ - "your first dependent" - ], - "marital_unit_id": { - "2024": 1 - } - }, - "your second dependent's marital unit": { - "members": [ - "your second dependent" - ], - "marital_unit_id": { - "2024": 2 - } - } - }, - "tax_units": { - "your tax unit": { - "members": [ - "you", - "your first dependent", - "your second dependent" - ] - } - }, - "spm_units": { - "your household": { - "members": [ - "you", - "your first dependent", - "your second dependent" - ] - } - }, - "households": { - "your household": { - "members": [ - "you", - "your first dependent", - "your second dependent" - ], - "state_name": { - "2024": "PA" - } - } - } -} - - -tests = { 'test1' : - { 'taxsim_vars' : dict() - , 'situation' : situation_1 - } - , 'test2' : - { 'taxsim_vars' : dict() - , 'situation' : situation_2 - } - - } -tests_filename = 'saved_tests.pickle' - - -def reset_tests( testname : str = None ) -> None : - """ - Runs converter and saves to pickle file names saved_tests.pickle - Args: - testname : identifier of test to reset, or None for all - - Returns: - None, but overwrites test saving file - """ - # TBD: - if( testname != None ) : - raise( NotImplementedError('Not working for individual tests yet.') ) - else : - print("[INFO] Resetting all tests.") - - # Run the tests and save results - for name, test_items in tests.items() : - # TBD: Do import step and save that output - # i.e. situation = taxsim.import_single_household( taxsim_vars ) - # For now using 'situation' vars - situation = test_items['situation'] - - test_items['taxsim_results'] = taxsim.export_single_household( situation ) - print( f"[INFO] Finished test: {name}") - - with open(tests_filename, "wb") as f: - pickle.dump(tests, f) - - print( f"[INFO] Saved all tests to {tests_filename}" ) - - -def check_tests( testname : str = None ) -> bool : - """ - Runs converter and checks results against saved_tests.pickle - Args: - testname : identifier of test to reset, or None for all - - Returns: - True if tests match - """ - # TBD: - if( testname != None ) : - raise( NotImplementedError('Not working for individual tests yet.')) - else : - print( "[INFO] Checking all tests.") - - # Load the old tests - with open(tests_filename, "rb") as f: - old_tests = pickle.load(f) - - # Run the tests and save results - is_different = False - for name, test_results in tests.items() : - # TBD: Do import step and save that output - # i.e. situation = taxsim.import_single_household( taxsim_vars ) - # For now using 'situation' vars - situation = test_results['situation'] - test_results['taxsim_results'] = taxsim.export_single_household( situation ) - print( f"[INFO] Finished running test: {name}") - - # Compare top-level dictionaries - differences = _compare_nested_dicts(test_results, old_tests[name]) - - # Print differences if any - if differences: - print(f"Differences found between dictionaries:") - _compare_dicts(test_results, old_tests[name], print_details=True) # Recursive call for detailed output - is_different = True - print( f"[INFO] Finished checking test: {name}") - - print( f"[INFO] Completed checks." ) - return is_different - - -def _compare_nested_dicts(dict1_val, dict2_val): - """ - Compare two dictionaries by drilling down into each - Args: - dict1_val : one dictionary - dict2_val : other dictionary - Returns: - set of differences or None if none - """ - # Base case: If both values are not dictionaries, compare directly - if not isinstance(dict1_val, dict) or not isinstance(dict2_val, dict): - return dict1_val != dict2_val - - # Recursive case: Compare nested dictionaries - missing_keys = set(dict2_val.keys()) - set(dict1_val.keys()) - extra_keys = set(dict1_val.keys()) - set(dict2_val.keys()) - value_diffs = { key: dict2_val.get(key, None) - for key in dict1_val.keys() if _compare_nested_dicts(dict1_val[key], dict2_val.get(key, None)) - } - return any(missing_keys or extra_keys or value_diffs) - - -def _compare_dicts(newdict, olddict, print_details : bool = False ): - """ - Show differences between the new and old dictionaries - Args: - newdict : new values - olddict : old values - print_details : whether to print the differences True/False - """ - # Identify missing and extra keys - missing_keys = set(olddict.keys()) - set(newdict.keys()) - extra_keys = set(newdict.keys()) - set(olddict.keys()) - - # Print top-level missing/extra keys - if print_details and (missing_keys or extra_keys): - if missing_keys: - print(f"\tMissing keys: {missing_keys}") - if extra_keys: - print(f"\tExtra keys: {extra_keys}") - - # Compare values for existing keys - value_diffs = {key: olddict.get(key, None) - for key in newdict.keys() if newdict[key] != olddict.get(key, None) - } - if print_details and value_diffs: - print(f"\tKeys with different values:") - for key, value in value_diffs.items(): - print(f"\t\t- {key}: New value = {newdict[key]}" ) - print(f"\t\t \t : Old value = {value}") - # Recursively compare nested dictionaries - for key, val in newdict.items(): - if isinstance(val, dict) and key in olddict: - _compare_dicts(val, olddict[key], print_details=True) # Recursive call with details - - - - -# Run to check -if( __name__ == "__main__") : - reset_tests() - check_tests() \ No newline at end of file diff --git a/tests/test_e2e.py b/tests/test_e2e.py new file mode 100644 index 0000000..0181133 --- /dev/null +++ b/tests/test_e2e.py @@ -0,0 +1,124 @@ +import unittest +import os +import subprocess +import pandas as pd +import numpy as np +from pathlib import Path +import platform +import sys + + +class E2ETest(unittest.TestCase): + + def setUp(self) -> None: + self.project_root = Path(__file__).parent.parent + self.taxsim_dir = self.project_root / "resources" / "taxsim35" + self.output_dir = self.project_root / "output" + self.output_dir.mkdir(exist_ok=True) + + # Determine the correct TAXSIM executable based on the OS + system = platform.system().lower() + if system == "darwin": + self.taxsim_exe = "taxsim35-osx.exe" + elif system == "windows": + self.taxsim_exe = "taxsim35-windows.exe" + elif system == "linux": + self.taxsim_exe = "taxsim35-unix.exe" + else: + raise OSError(f"Unsupported operating system: {system}") + + self.input_file = self.taxsim_dir / "taxsim_input.csv" + + def test_generate_policyengine_taxsim(self): + output_file = self.output_dir / "policyengine_taxsim_output.csv" + + cmd = f"{sys.executable} {self.project_root}/policyengine_taxsim/cli.py {self.input_file} -o {output_file}" + process = subprocess.run( + cmd, shell=True, capture_output=True, text=True + ) + + print(f"PolicyEngine TAXSIM CLI output:\n{process.stdout}") + if process.returncode != 0: + print( + f"PolicyEngine TAXSIM CLI failed with error:\n{process.stderr}" + ) + raise Exception( + f"PolicyEngine TAXSIM CLI failed: {process.returncode}" + ) + + self.assertTrue(output_file.is_file()) + print(f"Content of {output_file}:") + with open(output_file, "r") as f: + print(f.read()) + + def test_generate_taxsim_output(self): + output_file = self.output_dir / "taxsim35_output.csv" + + taxsim_path = self.taxsim_dir / self.taxsim_exe + + if platform.system().lower() != "windows": + # Make the file executable on Unix-like systems + os.chmod(taxsim_path, 0o755) + + cmd = f"{taxsim_path} < {self.input_file} > {output_file}" + process = subprocess.run( + cmd, shell=True, capture_output=True, text=True + ) + + print(f"TAXSIM35 output:\n{process.stdout}") + if process.returncode != 0: + print(f"TAXSIM35 failed with error:\n{process.stderr}") + raise Exception(f"TAXSIM35 failed: {process.returncode}") + + self.assertTrue(output_file.is_file()) + print(f"Content of {output_file}:") + with open(output_file, "r") as f: + print(f.read()) + + def test_match_both_output(self): + taxsim35_csv = pd.read_csv(self.output_dir / "taxsim35_output.csv") + pe_taxsim_csv = pd.read_csv( + self.output_dir / "policyengine_taxsim_output.csv" + ) + + print("TAXSIM35 output:") + print(taxsim35_csv) + print("\nPolicyEngine TAXSIM output:") + print(pe_taxsim_csv) + + # Ensure both DataFrames have the same columns + common_columns = set(taxsim35_csv.columns) & set(pe_taxsim_csv.columns) + taxsim35_csv = taxsim35_csv[list(common_columns)] + pe_taxsim_csv = pe_taxsim_csv[list(common_columns)] + + # Ensure both DataFrames have the same column names + taxsim35_csv.columns = taxsim35_csv.columns.str.lower() + pe_taxsim_csv.columns = pe_taxsim_csv.columns.str.lower() + + # Sort both DataFrames by taxsimid to ensure rows are in the same order + taxsim35_csv = taxsim35_csv.sort_values("taxsimid").reset_index( + drop=True + ) + pe_taxsim_csv = pe_taxsim_csv.sort_values("taxsimid").reset_index( + drop=True + ) + + # Convert numeric columns to float + numeric_columns = taxsim35_csv.select_dtypes( + include=["number"] + ).columns + for col in numeric_columns: + taxsim35_csv[col] = pd.to_numeric( + taxsim35_csv[col], errors="coerce" + ) + pe_taxsim_csv[col] = pd.to_numeric( + pe_taxsim_csv[col], errors="coerce" + ) + + # Compare year matched + year_matched = (taxsim35_csv['year'] == pe_taxsim_csv['year']).all() + self.assertTrue(year_matched, "year do not match") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_mappers.py b/tests/test_mappers.py new file mode 100644 index 0000000..320033c --- /dev/null +++ b/tests/test_mappers.py @@ -0,0 +1,72 @@ +import pytest +from policyengine_taxsim.core.input_mapper import import_single_household +from policyengine_taxsim.core.output_mapper import export_single_household + + +def test_import_single_household(): + taxsim_input = { + "year": 2022, + "state": 6, # Colorado + "page": 35, + "pwages": 50000, + } + + expected_output = { + "people": { + "you": {"age": {"2022": 35}, "employment_income": {"2022": 50000}} + }, + "households": { + "your household": { + "members": ["you"], + "state_name": {"2022": "CO"}, + } + }, + "tax_units": {"your tax unit": {"members": ["you"]}}, + } + + result = import_single_household(taxsim_input) + assert result == expected_output + + +def test_export_single_household(): + policyengine_situation = { + "people": { + "you": {"age": {"2022": 35}, "employment_income": {"2022": 50000}} + }, + "households": { + "your household": { + "members": ["you"], + "state_name": {"2022": "CO"}, + } + }, + "tax_units": {"your tax unit": {"members": ["you"]}}, + } + + result = export_single_household(policyengine_situation) + + assert result["year"] == 2022 + assert result["state"] == 6 # Colorado + assert "fiitax" in result + assert "siitax" in result + # Note: We can't easily predict the exact tax values without mocking the PolicyEngine simulation, + # so we're just checking that these keys exist in the output. + + +@pytest.fixture +def sample_taxsim_input(): + return {"year": 2022, "state": 6, "page": 35, "pwages": 50000} # Colorado + + +def test_roundtrip(sample_taxsim_input): + # Import TAXSIM input to PolicyEngine situation + pe_situation = import_single_household(sample_taxsim_input) + + # Export PolicyEngine situation back to TAXSIM output + taxsim_output = export_single_household(pe_situation) + + # Check that key information is preserved + assert taxsim_output["year"] == sample_taxsim_input["year"] + assert taxsim_output["state"] == sample_taxsim_input["state"] + # Again, we can't easily check the exact tax values, but we can ensure they exist + assert "fiitax" in taxsim_output + assert "siitax" in taxsim_output