Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into validate_outputs
Browse files Browse the repository at this point in the history
  • Loading branch information
sgreenbury committed Oct 11, 2024
2 parents 7850470 + 34b0291 commit baf61c8
Show file tree
Hide file tree
Showing 27 changed files with 8,742 additions and 2,486 deletions.
11 changes: 11 additions & 0 deletions config/base.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
[parameters]
seed = 0
region = "leeds"
number_of_households = 10000
zone_id = "OA21CD"
travel_times = true # Only set to true if you have travel time matrix at the level specified in boundary_geography
boundary_geography = "OA"


[work_assignment]
use_percentages = true
weight_max_dev = 0.2
weight_total_dev = 0.8
max_zones = 8
14 changes: 14 additions & 0 deletions config/base_500.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[parameters]
seed = 0
region = "leeds"
number_of_households = 500
zone_id = "OA21CD"
travel_times = true # Only set to true if you have travel time matrix at the level specified in boundary_geography
boundary_geography = "OA"


[work_assignment]
use_percentages = true
weight_max_dev = 0.2
weight_total_dev = 0.8
max_zones = 8
13 changes: 13 additions & 0 deletions config/base_5000.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[parameters]
seed = 0
region = "leeds"
number_of_households = 5000
zone_id = "OA21CD"
travel_times = true # Only set to true if you have travel time matrix at the level specified in boundary_geography
boundary_geography = "OA"

[work_assignment]
use_percentages = true
weight_max_dev = 0.2
weight_total_dev = 0.8
max_zones = 8
13 changes: 13 additions & 0 deletions config/base_all.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[parameters]
seed = 0
region = "leeds"
zone_id = "OA21CD"
travel_times = true # Only set to true if you have travel time matrix at the level specified in boundary_geography
boundary_geography = "OA"


[work_assignment]
use_percentages = false
weight_max_dev = 0.0
weight_total_dev = 1.0
max_zones = 4
5 changes: 0 additions & 5 deletions notebooks/R/.gitignore

This file was deleted.

24 changes: 0 additions & 24 deletions notebooks/R/main.R

This file was deleted.

5,552 changes: 5,552 additions & 0 deletions notebooks/Validation_SPC_with Census.ipynb

Large diffs are not rendered by default.

309 changes: 307 additions & 2 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ click = "^8.1.7"
tomlkit = "^0.13.0"
cml-pam = "0.3.2"
gdal = "<=3.8.4"
pandera = "^0.20.4"

[tool.poetry.dev-dependencies]
pytest = ">= 6"
Expand Down
78 changes: 78 additions & 0 deletions scripts/0_preprocess_inputs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import geopandas as gpd
import pandas as pd
from uatk_spc import Reader

import acbm
from acbm.cli import acbm_cli
from acbm.config import load_config
from acbm.logger_config import preprocessing_logger as logger
from acbm.preprocessing import edit_boundary_resolution


@acbm_cli
def main(config_file):
config = load_config(config_file)
config.init_rng()
region = config.region
# Pick a region with SPC output saved
spc_path = acbm.root_path / "data/external/spc_output/raw/"

# ----- BOUNDARIES
logger.info("Preprocessing Boundary Layer")

## Read in the boundary layer for the whole of England

logger.info("1. Reading in the boundary layer for the whole of England")

boundaries = gpd.read_file(
acbm.root_path / "data/external/boundaries/oa_england.geojson"
)

boundaries = boundaries.to_crs(epsg=4326)

## --- Dissolve boundaries if resolution is MSOA

boundary_geography = config.parameters.boundary_geography # can only be OA or MSOA
logger.info(f"2. Dissolving boundaries to {boundary_geography} level")

boundaries = edit_boundary_resolution(
study_area=boundaries, geography=boundary_geography, zone_id=config.zone_id
)

## --- Filter to study area
# we filter using msoa21cd values, which exist regardless of the boundary resolution

logger.info("3. Filtering boundaries to specified study area")

# Step 1: Get zones from SPC (these will be 2011 MSOAs)
spc = Reader(spc_path, region, backend="pandas")
zones_in_region = list(spc.info_per_msoa.keys())

# Step 2: Filter boundaries to identified zones

# a) get MSOA11CD to MSOA21CD lookup
msoa_lookup = pd.read_csv(
acbm.root_path
/ "data/external/MSOA_2011_MSOA_2021_Lookup_for_England_and_Wales.csv"
)
# Filter msoa_lookup to include only rows where MSOA11CD is in zones_in_region
msoa_lookup_filtered = msoa_lookup[msoa_lookup["MSOA11CD"].isin(zones_in_region)]
# Extract the corresponding MSOA21CD values
msoa21cd_values = msoa_lookup_filtered["MSOA21CD"].tolist()

# b) filter boundaries to include only rows where MSOA21CD is in msoa21cd_values
boundaries_filtered = boundaries[boundaries["MSOA21CD"].isin(msoa21cd_values)]

## Save the output as parquet
logger.info(
f"4. Saving the boundaries to {acbm.root_path / 'data/external/boundaries/'} path"
)

boundaries_filtered.to_file(
acbm.root_path / "data/external/boundaries/study_area_zones.geojson",
driver="GeoJSON",
)


if __name__ == "__main__":
main()
24 changes: 7 additions & 17 deletions scripts/1_prep_synthpop.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,18 @@
import click
import numpy as np
import tomlkit
from uatk_spc.builder import Builder

import acbm
from acbm.cli import acbm_cli
from acbm.config import load_config


@click.command()
# TODO: add override for case when seed provided from CLI
# @click.option("--seed", default=1, help="Seed for random state", type=int)
@click.option("--config", prompt="Filepath relative to repo root of config", type=str)
def main(config):
# Read config
with open(acbm.root_path / config, "rb") as f:
config_dict = tomlkit.load(f)
seed = config_dict["parameters"]["seed"]
region = config_dict["parameters"]["region"]

# Seed RNG
np.random.seed(seed)
@acbm_cli
def main(config_file):
config = load_config(config_file)
config.init_rng()
region = config.region

# Pick a region with SPC output saved
path = acbm.root_path / "data/external/spc_output/raw/"
region = "leeds"

# Add people and households
spc_people_hh = (
Expand Down
Loading

0 comments on commit baf61c8

Please sign in to comment.