Urban-Analytics-Technology-Platform · sgreenbury · Nov 1, 2024 · Oct 15, 2024 · Oct 15, 2024 · Oct 15, 2024
diff --git a/.gitignore b/.gitignore
@@ -167,3 +167,9 @@ logs/
 
 # pyright config
 pyrightconfig.json
+
+# scratch
+notebooks/scratch*
+
+# AcBM config
+config/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -23,7 +23,7 @@ repos:
       - id: trailing-whitespace
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: "v0.2.0"
+    rev: "v0.7.0"
     hooks:
       # first, lint + autofix
       - id: ruff

diff --git a/config/base.toml b/config/base.toml
@@ -1,11 +1,21 @@
 [parameters]
 seed = 0
 region = "leeds"
-number_of_households = 10000
+number_of_households = 5000
 zone_id = "OA21CD"
-travel_times = true # Only set to true if you have travel time matrix at the level specified in boundary_geography
+travel_times = true         # Only set to true if you have travel time matrix at the level specified in boundary_geography
 boundary_geography = "OA"
 
+[matching]
+required_columns = ["number_adults", "number_children"]
+optional_columns = [
+    "number_cars",
+    "num_pension_age",
+    "rural_urban_2_categories",
+    "employment_status",
+    "tenure_status",
+]
+n_matches = 10
 
 [work_assignment]
 use_percentages = true

diff --git a/config/base_500.toml b/config/base_500.toml
diff --git a/config/base_5000.toml b/config/base_5000.toml
diff --git a/config/base_all.toml b/config/base_all.toml
diff --git a/notebooks/2.1_sandbox-match_households.ipynb b/notebooks/2.1_sandbox-match_households.ipynb
@@ -16,7 +16,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -54,7 +54,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -75,7 +75,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -89,7 +89,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -111,7 +111,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -128,7 +128,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -180,7 +180,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -228,7 +228,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -276,7 +276,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -299,7 +299,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -319,7 +319,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -471,7 +471,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -530,7 +530,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -610,7 +610,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -789,7 +789,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -879,7 +879,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -945,7 +945,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -986,7 +986,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [],
    "source": [

diff --git a/notebooks/2_match_households_and_individuals.ipynb b/notebooks/2_match_households_and_individuals.ipynb
diff --git a/scripts/1_prep_synthpop.py b/scripts/1_prep_synthpop.py
@@ -27,35 +27,6 @@ def main(config_file):
         acbm.root_path / f"data/external/spc_output/{region}_people_hh.parquet"
     )
 
-    # People and time-use data
-    # Subset of (non-time-use) features to include and unnest
-    # The features can be found here: https://github.com/alan-turing-institute/uatk-spc/blob/main/synthpop.proto
-    features = {
-        "health": [
-            "bmi",
-            "has_cardiovascular_disease",
-            "has_diabetes",
-            "has_high_blood_pressure",
-            "self_assessed_health",
-            "life_satisfaction",
-        ],
-        "demographics": ["age_years", "ethnicity", "sex", "nssec8"],
-        "employment": ["sic1d2007", "sic2d2007", "pwkstat", "salary_yearly"],
-    }
-
-    # build the table
-    spc_people_tu = (
-        Builder(path, region, backend="polars", input_type="parquet")
-        .add_households()
-        .add_time_use_diaries(features, diary_type="weekday_diaries")
-        .build()
-    )
-
-    # save the output
-    spc_people_tu.write_parquet(
-        acbm.root_path / f"data/external/spc_output/{region}_people_tu.parquet"
-    )
-
 
 if __name__ == "__main__":
     main()