From 6db580da67acb5bab3f15ee3e07bdb3ebc4ad2e9 Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Fri, 26 Apr 2024 17:51:42 +0100 Subject: [PATCH 01/16] Update notebook 1 and add script --- notebooks/1_prep_synthpop.ipynb | 32 ++++-------- scripts/1_prep_synthpop.py | 93 +++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 23 deletions(-) create mode 100644 scripts/1_prep_synthpop.py diff --git a/notebooks/1_prep_synthpop.ipynb b/notebooks/1_prep_synthpop.ipynb index 23cc5a4..241f07a 100644 --- a/notebooks/1_prep_synthpop.ipynb +++ b/notebooks/1_prep_synthpop.ipynb @@ -76,34 +76,13 @@ "spc_people_hh = (\n", " Builder(path, region, backend=\"pandas\", input_type=\"parquet\")\n", " .add_households()\n", - " .unnest([\"health\", \"employment\", \"details\"])\n", - " # remove nssec column\n", + " .unnest([\"health\", \"employment\", \"details\", \"demographics\"], rsuffix=\"_household\")\n", " .build()\n", ")\n", "\n", "spc_people_hh.head(5)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# we need to unnest the demographic data. If we do this above\n", - "# we get an error as there will be two \"nssec8\" columns.\n", - "\n", - "# Unnest the JSON column\n", - "demographics = pd.json_normalize(spc_people_hh['demographics'])\n", - "\n", - "# Remove the columns we don't want\n", - "spc_people_hh = spc_people_hh.drop(['demographics', 'nssec8'], axis = 1)\n", - "# Add the unnested demographics column\n", - "spc_people_hh = pd.concat([spc_people_hh, demographics], axis = 1)\n", - "\n", - "spc_people_hh.head()" - ] - }, { "cell_type": "code", "execution_count": null, @@ -194,6 +173,13 @@ "# save the output\n", "spc_people_tu.write_parquet('../data/external/spc_output/' + region + '_people_tu.parquet')" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -212,7 +198,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.11.8" } }, "nbformat": 4, diff --git a/scripts/1_prep_synthpop.py b/scripts/1_prep_synthpop.py new file mode 100644 index 0000000..024f975 --- /dev/null +++ b/scripts/1_prep_synthpop.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python +# coding: utf-8 + +# ## Preparing the Synthetic Population + +# We will use the spc package for our synthetic population. To add it as a dependancy in this virtual environment, I ran `poetry add git+https://github.com/alan-turing-institute/uatk-spc.git@55-output-formats-python#subdirectory=python`. The branch may change if the python package is merged into the main spc branch. + +# import json +import pandas as pd + +# https://github.com/alan-turing-institute/uatk-spc/blob/55-output-formats-python/python/examples/spc_builder_example.ipynb +from uatk_spc.builder import Builder + + +# ### Loading in the SPC synthetic population +# +# I use the code in the `Quickstart` [here](https://github.com/alan-turing-institute/uatk-spc/blob/55-output-formats-python/python/README.md) to get a parquet file and convert it to JSON. +# +# You have two options: +# +# +# 1- Slow and memory-hungry: Download the pbf file directly from [here](https://alan-turing-institute.github.io/uatk-spc/using_england_outputs.html) and load in the pbf file with the python package +# +# 2- Faster: Covert the pbf file to parquet, and then load it using the python package. To convert to parquet, you need to: +# +# a. clone the [uatk-spc](https://github.com/alan-turing-institute/uatk-spc/tree/main/docs) +# +# b. Run `cargo run --release -- --rng-seed 0 --flat-output config/England/west-yorkshire.txt --year 2020` and replace `west-yorkshire` and `2020` with your preferred option +# + +# Pick a region with SPC output saved +path = "../data/external/spc_output/raw/" +region = "west-yorkshire" + + +# #### People and household data + +# add people and households +spc_people_hh = ( + Builder(path, region, backend="pandas", input_type="parquet") + .add_households() + .unnest(["health", "employment", "details", "demographics"], rsuffix="_household") + .build() +) + +spc_people_hh.head(5) + + +# save the output +spc_people_hh.to_parquet("../data/external/spc_output/" + region + "_people_hh.parquet") + + +spc_people_hh["salary_yearly"].hist(bins=100) + + +# plt.show() + + +spc_people_hh["salary_yearly"].unique() + + +# #### People and time-use data + +# Subset of (non-time-use) features to include and unnest + +# The features can be found here: https://github.com/alan-turing-institute/uatk-spc/blob/main/synthpop.proto +features = { + "health": [ + "bmi", + "has_cardiovascular_disease", + "has_diabetes", + "has_high_blood_pressure", + "self_assessed_health", + "life_satisfaction", + ], + "demographics": ["age_years", "ethnicity", "sex", "nssec8"], + "employment": ["sic1d2007", "sic2d2007", "pwkstat", "salary_yearly"], +} + +# build the table +spc_people_tu = ( + Builder(path, region, backend="polars", input_type="parquet") + .add_households() + .add_time_use_diaries(features, diary_type="weekday_diaries") + .build() +) +spc_people_tu.head() + + +# save the output +spc_people_tu.write_parquet( + "../data/external/spc_output/" + region + "_people_tu.parquet" +) From 2f925eb8b5c554aa9686e35e6826c94ea3ed7e6b Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Fri, 26 Apr 2024 18:04:40 +0100 Subject: [PATCH 02/16] Add README for scripts and refactor first script --- scripts/1_prep_synthpop.py | 140 +++++++++++++------------------------ scripts/README.md | 25 +++++++ 2 files changed, 75 insertions(+), 90 deletions(-) create mode 100644 scripts/README.md diff --git a/scripts/1_prep_synthpop.py b/scripts/1_prep_synthpop.py index 024f975..34c8f51 100644 --- a/scripts/1_prep_synthpop.py +++ b/scripts/1_prep_synthpop.py @@ -1,93 +1,53 @@ -#!/usr/bin/env python -# coding: utf-8 - -# ## Preparing the Synthetic Population - -# We will use the spc package for our synthetic population. To add it as a dependancy in this virtual environment, I ran `poetry add git+https://github.com/alan-turing-institute/uatk-spc.git@55-output-formats-python#subdirectory=python`. The branch may change if the python package is merged into the main spc branch. - -# import json -import pandas as pd - -# https://github.com/alan-turing-institute/uatk-spc/blob/55-output-formats-python/python/examples/spc_builder_example.ipynb from uatk_spc.builder import Builder -# ### Loading in the SPC synthetic population -# -# I use the code in the `Quickstart` [here](https://github.com/alan-turing-institute/uatk-spc/blob/55-output-formats-python/python/README.md) to get a parquet file and convert it to JSON. -# -# You have two options: -# -# -# 1- Slow and memory-hungry: Download the pbf file directly from [here](https://alan-turing-institute.github.io/uatk-spc/using_england_outputs.html) and load in the pbf file with the python package -# -# 2- Faster: Covert the pbf file to parquet, and then load it using the python package. To convert to parquet, you need to: -# -# a. clone the [uatk-spc](https://github.com/alan-turing-institute/uatk-spc/tree/main/docs) -# -# b. Run `cargo run --release -- --rng-seed 0 --flat-output config/England/west-yorkshire.txt --year 2020` and replace `west-yorkshire` and `2020` with your preferred option -# - -# Pick a region with SPC output saved -path = "../data/external/spc_output/raw/" -region = "west-yorkshire" - - -# #### People and household data - -# add people and households -spc_people_hh = ( - Builder(path, region, backend="pandas", input_type="parquet") - .add_households() - .unnest(["health", "employment", "details", "demographics"], rsuffix="_household") - .build() -) - -spc_people_hh.head(5) - - -# save the output -spc_people_hh.to_parquet("../data/external/spc_output/" + region + "_people_hh.parquet") - - -spc_people_hh["salary_yearly"].hist(bins=100) - - -# plt.show() - - -spc_people_hh["salary_yearly"].unique() - - -# #### People and time-use data - -# Subset of (non-time-use) features to include and unnest - -# The features can be found here: https://github.com/alan-turing-institute/uatk-spc/blob/main/synthpop.proto -features = { - "health": [ - "bmi", - "has_cardiovascular_disease", - "has_diabetes", - "has_high_blood_pressure", - "self_assessed_health", - "life_satisfaction", - ], - "demographics": ["age_years", "ethnicity", "sex", "nssec8"], - "employment": ["sic1d2007", "sic2d2007", "pwkstat", "salary_yearly"], -} - -# build the table -spc_people_tu = ( - Builder(path, region, backend="polars", input_type="parquet") - .add_households() - .add_time_use_diaries(features, diary_type="weekday_diaries") - .build() -) -spc_people_tu.head() - - -# save the output -spc_people_tu.write_parquet( - "../data/external/spc_output/" + region + "_people_tu.parquet" -) +def main(): + # Pick a region with SPC output saved + path = "../data/external/spc_output/raw/" + region = "west-yorkshire" + + # Add people and households + spc_people_hh = ( + Builder(path, region, backend="pandas", input_type="parquet") + .add_households() + .unnest( + ["health", "employment", "details", "demographics"], rsuffix="_household" + ) + .build() + ) + spc_people_hh.to_parquet( + "../data/external/spc_output/" + region + "_people_hh.parquet" + ) + + # People and time-use data + # Subset of (non-time-use) features to include and unnest + # The features can be found here: https://github.com/alan-turing-institute/uatk-spc/blob/main/synthpop.proto + features = { + "health": [ + "bmi", + "has_cardiovascular_disease", + "has_diabetes", + "has_high_blood_pressure", + "self_assessed_health", + "life_satisfaction", + ], + "demographics": ["age_years", "ethnicity", "sex", "nssec8"], + "employment": ["sic1d2007", "sic2d2007", "pwkstat", "salary_yearly"], + } + + # build the table + spc_people_tu = ( + Builder(path, region, backend="polars", input_type="parquet") + .add_households() + .add_time_use_diaries(features, diary_type="weekday_diaries") + .build() + ) + + # save the output + spc_people_tu.write_parquet( + "../data/external/spc_output/" + region + "_people_tu.parquet" + ) + + +if __name__ == "__main__": + main() diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000..6a79279 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,25 @@ +# Preparing synthetic population scripts + +## Loading in the SPC synthetic population + +Use the code in the `Quickstart` [here](https://github.com/alan-turing-institute/uatk-spc/blob/55-output-formats-python/python/README.md) +to get a parquet file and convert it to JSON. + +You have two options: +1. Slow and memory-hungry: download the `.pb` file directly from [here](https://alan-turing-institute.github.io/uatk-spc/using_england_outputs.html) + and load in the pbf file with the python package +2. Faster: Run SPC to generate parquet outputs, and then load using the SPC toolkit python package. To generate parquet, you need to: + 1. Clone [uatk-spc](https://github.com/alan-turing-institute/uatk-spc/tree/main/docs) + 2. Run: + ```shell + cargo run --release -- \ + --rng-seed 0 \ + --flat-output \ + --year 2020 \ + config/England/west-yorkshire.txt --year 2020 + ``` + and replace `west-yorkshire` and `2020` with your preferred option. + + +## Matching +TODO From b95bc2fd21908846112fb7973c09d0452f903fa4 Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Fri, 26 Apr 2024 18:36:37 +0100 Subject: [PATCH 03/16] Update notebook 2 and convert to script --- .../2_match_households_and_individuals.ipynb | 859 +++++------ scripts/2_match_households_and_individuals.py | 1312 +++++++++++++++++ 2 files changed, 1718 insertions(+), 453 deletions(-) create mode 100644 scripts/2_match_households_and_individuals.py diff --git a/notebooks/2_match_households_and_individuals.ipynb b/notebooks/2_match_households_and_individuals.ipynb index 05e5b85..5eb0f9b 100644 --- a/notebooks/2_match_households_and_individuals.ipynb +++ b/notebooks/2_match_households_and_individuals.ipynb @@ -22,12 +22,15 @@ "metadata": {}, "outputs": [], "source": [ + "import os\n", "import pickle as pkl\n", "\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", + "from tqdm import tqdm, trange\n", "\n", + "# from tqdm.notebook import trange\n", "from acbm.matching import match_categorical, match_individuals\n", "from acbm.preprocessing import (\n", " count_per_group,\n", @@ -38,7 +41,11 @@ " truncate_values,\n", ")\n", "\n", - "pd.set_option('display.max_columns', None)" + "pd.set_option('display.max_columns', None)\n", + "\n", + "def get_interim_path(file_name: str, path: str=\"../data/interim/matching/\") -> str:\n", + " os.makedirs(path, exist_ok=True)\n", + " return f'{path}/{file_name}'" ] }, { @@ -94,7 +101,6 @@ " id\n", " household\n", " workplace\n", - " location\n", " events\n", " weekday_diaries\n", " weekend_diaries\n", @@ -102,8 +108,8 @@ " id_tus_hh\n", " id_tus_p\n", " pid_hs\n", - " msoa\n", - " oa\n", + " msoa11cd\n", + " oa11cd\n", " members\n", " bmi\n", " has_cardiovascular_disease\n", @@ -119,6 +125,7 @@ " salary_yearly\n", " salary_hourly\n", " hid\n", + " nssec8\n", " accommodation_type\n", " communal_type\n", " num_rooms\n", @@ -128,7 +135,7 @@ " sex\n", " age_years\n", " ethnicity\n", - " nssec8\n", + " nssec8_household\n", " \n", " \n", " \n", @@ -137,7 +144,6 @@ " 0\n", " 0\n", " NaN\n", - " {'x': -1.7892179489135742, 'y': 53.91915130615...\n", " {'concert_f': 1.2791347489984115e-31, 'concert...\n", " [1583, 13161]\n", " [1582, 13160]\n", @@ -163,6 +169,7 @@ " NaN\n", " E02002183_0001\n", " 1.0\n", + " 1.0\n", " NaN\n", " 2.0\n", " True\n", @@ -178,7 +185,6 @@ " 1\n", " 1\n", " NaN\n", - " {'x': -1.8262380361557007, 'y': 53.92028045654...\n", " {'concert_f': 9.743248151956307e-21, 'concert_...\n", " [2900, 4948, 4972, 7424, 10284, 10586, 12199, ...\n", " [2901, 4949, 4973, 7425, 10285, 10585, 12198, ...\n", @@ -203,6 +209,7 @@ " NaN\n", " NaN\n", " E02002183_0002\n", + " 1.0\n", " 3.0\n", " NaN\n", " 6.0\n", @@ -219,7 +226,6 @@ " 2\n", " 1\n", " NaN\n", - " {'x': -1.8262380361557007, 'y': 53.92028045654...\n", " {'concert_f': 8.46716103992468e-16, 'concert_f...\n", " [3010, 6389, 9448, 10184, 11598]\n", " [3011, 6388, 9447, 10183, 11599]\n", @@ -244,6 +250,7 @@ " NaN\n", " NaN\n", " E02002183_0002\n", + " 1.0\n", " 3.0\n", " NaN\n", " 6.0\n", @@ -260,7 +267,6 @@ " 3\n", " 2\n", " 56126.0\n", - " {'x': -1.8749940395355225, 'y': 53.94298934936...\n", " {'concert_f': 1.8844366073608398, 'concert_fs'...\n", " [366, 867, 2096, 3678, 5212, 5450, 8145, 9254,...\n", " [365, 868, 2097, 3677, 5213, 5451, 8146, 9253,...\n", @@ -285,6 +291,7 @@ " 32857.859375\n", " 14.360952\n", " E02002183_0003\n", + " 4.0\n", " 3.0\n", " NaN\n", " 6.0\n", @@ -301,7 +308,6 @@ " 4\n", " 2\n", " NaN\n", - " {'x': -1.8749940395355225, 'y': 53.94298934936...\n", " {'concert_f': 4.877435207366943, 'concert_fs':...\n", " [1289, 12528, 12870]\n", " [1288, 12529, 12871]\n", @@ -326,6 +332,7 @@ " 18162.451172\n", " 9.439944\n", " E02002183_0003\n", + " 4.0\n", " 3.0\n", " NaN\n", " 6.0\n", @@ -349,13 +356,6 @@ "3 3 2 56126.0 \n", "4 4 2 NaN \n", "\n", - " location \\\n", - "0 {'x': -1.7892179489135742, 'y': 53.91915130615... \n", - "1 {'x': -1.8262380361557007, 'y': 53.92028045654... \n", - "2 {'x': -1.8262380361557007, 'y': 53.92028045654... \n", - "3 {'x': -1.8749940395355225, 'y': 53.94298934936... \n", - "4 {'x': -1.8749940395355225, 'y': 53.94298934936... \n", - "\n", " events \\\n", "0 {'concert_f': 1.2791347489984115e-31, 'concert... \n", "1 {'concert_f': 9.743248151956307e-21, 'concert_... \n", @@ -377,7 +377,7 @@ "3 [365, 868, 2097, 3677, 5213, 5451, 8146, 9253,... E02002183_0003_001 \n", "4 [1288, 12529, 12871] E02002183_0003_002 \n", "\n", - " id_tus_hh id_tus_p pid_hs msoa oa members bmi \\\n", + " id_tus_hh id_tus_p pid_hs msoa11cd oa11cd members bmi \\\n", "0 11291218 1 2905399 E02002183 E00053954 [0] 24.879356 \n", "1 17291219 1 2905308 E02002183 E00053953 [1, 2] 27.491207 \n", "2 17070713 2 2907681 E02002183 E00053953 [1, 2] 17.310829 \n", @@ -405,19 +405,19 @@ "3 31.0 3422.0 1 32857.859375 14.360952 E02002183_0003 \n", "4 62.0 7214.0 1 18162.451172 9.439944 E02002183_0003 \n", "\n", - " accommodation_type communal_type num_rooms central_heat tenure \\\n", - "0 1.0 NaN 2.0 True 2.0 \n", - "1 3.0 NaN 6.0 True 2.0 \n", - "2 3.0 NaN 6.0 True 2.0 \n", - "3 3.0 NaN 6.0 True 2.0 \n", - "4 3.0 NaN 6.0 True 2.0 \n", + " nssec8 accommodation_type communal_type num_rooms central_heat tenure \\\n", + "0 1.0 1.0 NaN 2.0 True 2.0 \n", + "1 1.0 3.0 NaN 6.0 True 2.0 \n", + "2 1.0 3.0 NaN 6.0 True 2.0 \n", + "3 4.0 3.0 NaN 6.0 True 2.0 \n", + "4 4.0 3.0 NaN 6.0 True 2.0 \n", "\n", - " num_cars sex age_years ethnicity nssec8 \n", - "0 2 1 86 1 1.0 \n", - "1 2 1 74 3 1.0 \n", - "2 2 2 68 1 2.0 \n", - "3 1 1 27 1 4.0 \n", - "4 1 2 26 1 6.0 " + " num_cars sex age_years ethnicity nssec8_household \n", + "0 2 1 86 1 1.0 \n", + "1 2 1 74 3 1.0 \n", + "2 2 2 68 1 2.0 \n", + "3 1 1 27 1 4.0 \n", + "4 1 2 26 1 6.0 " ] }, "execution_count": 3, @@ -438,8 +438,8 @@ "outputs": [], "source": [ "# select columns\n", - "spc = spc[['id', 'household', 'location', 'pid_hs',\n", - " 'msoa', 'oa', 'members', 'sic1d2007', 'sic2d2007',\n", + "spc = spc[['id', 'household', 'pid_hs',\n", + " 'msoa11cd', 'oa11cd', 'members', 'sic1d2007', 'sic2d2007',\n", " 'pwkstat', 'salary_yearly', 'salary_hourly', 'hid',\n", " 'accommodation_type', 'communal_type', 'num_rooms', 'central_heat',\n", " 'tenure', 'num_cars', 'sex', 'age_years', 'ethnicity', 'nssec8']]" @@ -1513,10 +1513,9 @@ " \n", " id\n", " household\n", - " location\n", " pid_hs\n", - " msoa\n", - " oa\n", + " msoa11cd\n", + " oa11cd\n", " members\n", " sic1d2007\n", " sic2d2007\n", @@ -1555,7 +1554,6 @@ " 0\n", " 0\n", " 0\n", - " {'x': -1.7892179489135742, 'y': 53.91915130615...\n", " 2905399\n", " E02002183\n", " E00053954\n", @@ -1595,7 +1593,6 @@ " 1\n", " 1\n", " 1\n", - " {'x': -1.8262380361557007, 'y': 53.92028045654...\n", " 2905308\n", " E02002183\n", " E00053953\n", @@ -1635,7 +1632,6 @@ " 2\n", " 2\n", " 1\n", - " {'x': -1.8262380361557007, 'y': 53.92028045654...\n", " 2907681\n", " E02002183\n", " E00053953\n", @@ -1655,7 +1651,7 @@ " 2\n", " 68\n", " 1\n", - " 2.0\n", + " 1.0\n", " 0.000000\n", " 1\n", " 1\n", @@ -1675,7 +1671,6 @@ " 3\n", " 3\n", " 2\n", - " {'x': -1.8749940395355225, 'y': 53.94298934936...\n", " 2902817\n", " E02002183\n", " E00053689\n", @@ -1715,7 +1710,6 @@ " 4\n", " 4\n", " 2\n", - " {'x': -1.8749940395355225, 'y': 53.94298934936...\n", " 2900884\n", " E02002183\n", " E00053689\n", @@ -1735,7 +1729,7 @@ " 2\n", " 26\n", " 1\n", - " 6.0\n", + " 4.0\n", " 51020.310547\n", " 3\n", " 1\n", @@ -1756,54 +1750,54 @@ "" ], "text/plain": [ - " id household location pid_hs \\\n", - "0 0 0 {'x': -1.7892179489135742, 'y': 53.91915130615... 2905399 \n", - "1 1 1 {'x': -1.8262380361557007, 'y': 53.92028045654... 2905308 \n", - "2 2 1 {'x': -1.8262380361557007, 'y': 53.92028045654... 2907681 \n", - "3 3 2 {'x': -1.8749940395355225, 'y': 53.94298934936... 2902817 \n", - "4 4 2 {'x': -1.8749940395355225, 'y': 53.94298934936... 2900884 \n", + " id household pid_hs msoa11cd oa11cd members sic1d2007 sic2d2007 \\\n", + "0 0 0 2905399 E02002183 E00053954 [0] J 58.0 \n", + "1 1 1 2905308 E02002183 E00053953 [1, 2] C 25.0 \n", + "2 2 1 2907681 E02002183 E00053953 [1, 2] P 85.0 \n", + "3 3 2 2902817 E02002183 E00053689 [3, 4] C 31.0 \n", + "4 4 2 2900884 E02002183 E00053689 [3, 4] J 62.0 \n", "\n", - " msoa oa members sic1d2007 sic2d2007 pwkstat salary_yearly \\\n", - "0 E02002183 E00053954 [0] J 58.0 6 NaN \n", - "1 E02002183 E00053953 [1, 2] C 25.0 6 NaN \n", - "2 E02002183 E00053953 [1, 2] P 85.0 6 NaN \n", - "3 E02002183 E00053689 [3, 4] C 31.0 1 32857.859375 \n", - "4 E02002183 E00053689 [3, 4] J 62.0 1 18162.451172 \n", + " pwkstat salary_yearly salary_hourly hid accommodation_type \\\n", + "0 6 NaN NaN E02002183_0001 1.0 \n", + "1 6 NaN NaN E02002183_0002 3.0 \n", + "2 6 NaN NaN E02002183_0002 3.0 \n", + "3 1 32857.859375 14.360952 E02002183_0003 3.0 \n", + "4 1 18162.451172 9.439944 E02002183_0003 3.0 \n", "\n", - " salary_hourly hid accommodation_type communal_type \\\n", - "0 NaN E02002183_0001 1.0 NaN \n", - "1 NaN E02002183_0002 3.0 NaN \n", - "2 NaN E02002183_0002 3.0 NaN \n", - "3 14.360952 E02002183_0003 3.0 NaN \n", - "4 9.439944 E02002183_0003 3.0 NaN \n", + " communal_type num_rooms central_heat tenure num_cars sex age_years \\\n", + "0 NaN 2.0 True 2.0 2 1 86 \n", + "1 NaN 6.0 True 2.0 2 1 74 \n", + "2 NaN 6.0 True 2.0 2 2 68 \n", + "3 NaN 6.0 True 2.0 1 1 27 \n", + "4 NaN 6.0 True 2.0 1 2 26 \n", "\n", - " num_rooms central_heat tenure num_cars sex age_years ethnicity \\\n", - "0 2.0 True 2.0 2 1 86 1 \n", - "1 6.0 True 2.0 2 1 74 3 \n", - "2 6.0 True 2.0 2 2 68 1 \n", - "3 6.0 True 2.0 1 1 27 1 \n", - "4 6.0 True 2.0 1 2 26 1 \n", + " ethnicity nssec8 salary_yearly_hh salary_yearly_hh_cat is_adult \\\n", + "0 1 1.0 0.000000 1 1 \n", + "1 3 1.0 0.000000 1 1 \n", + "2 1 1.0 0.000000 1 1 \n", + "3 1 4.0 51020.310547 3 1 \n", + "4 1 4.0 51020.310547 3 1 \n", "\n", - " nssec8 salary_yearly_hh salary_yearly_hh_cat is_adult num_adults \\\n", - "0 1.0 0.000000 1 1 1 \n", - "1 1.0 0.000000 1 1 2 \n", - "2 2.0 0.000000 1 1 2 \n", - "3 4.0 51020.310547 3 1 2 \n", - "4 6.0 51020.310547 3 1 2 \n", + " num_adults is_child num_children is_pension_age num_pension_age \\\n", + "0 1 0 0 1 1 \n", + "1 2 0 0 1 2 \n", + "2 2 0 0 1 2 \n", + "3 2 0 0 0 0 \n", + "4 2 0 0 0 0 \n", "\n", - " is_child num_children is_pension_age num_pension_age pwkstat_FT_hh \\\n", - "0 0 0 1 1 0 \n", - "1 0 0 1 2 0 \n", - "2 0 0 1 2 0 \n", - "3 0 0 0 0 2 \n", - "4 0 0 0 0 2 \n", + " pwkstat_FT_hh pwkstat_PT_hh pwkstat_NTS_match OA11CD \\\n", + "0 0 0 1 E00053954 \n", + "1 0 0 1 E00053953 \n", + "2 0 0 1 E00053953 \n", + "3 2 0 6 E00053689 \n", + "4 2 0 6 E00053689 \n", "\n", - " pwkstat_PT_hh pwkstat_NTS_match OA11CD RUC11 RUC11CD \n", - "0 0 1 E00053954 Urban city and town C1 \n", - "1 0 1 E00053953 Urban city and town C1 \n", - "2 0 1 E00053953 Urban city and town C1 \n", - "3 0 6 E00053689 Rural town and fringe D1 \n", - "4 0 6 E00053689 Rural town and fringe D1 " + " RUC11 RUC11CD \n", + "0 Urban city and town C1 \n", + "1 Urban city and town C1 \n", + "2 Urban city and town C1 \n", + "3 Rural town and fringe D1 \n", + "4 Rural town and fringe D1 " ] }, "execution_count": 23, @@ -1816,7 +1810,7 @@ "rural_urban = pd.read_csv('../data/external/census_2011_rural_urban.csv', sep=',')\n", "\n", "# merge the rural_urban data with the spc\n", - "spc_edited = spc_edited.merge(rural_urban[['OA11CD', 'RUC11', 'RUC11CD']], left_on='oa', right_on='OA11CD')\n", + "spc_edited = spc_edited.merge(rural_urban[['OA11CD', 'RUC11', 'RUC11CD']], left_on='oa11cd', right_on='OA11CD')\n", "spc_edited.head(5)\n", "\n" ] @@ -1902,10 +1896,9 @@ " \n", " id\n", " household\n", - " location\n", " pid_hs\n", - " msoa\n", - " oa\n", + " msoa11cd\n", + " oa11cd\n", " members\n", " sic1d2007\n", " sic2d2007\n", @@ -1948,7 +1941,6 @@ " 0\n", " 0\n", " 0\n", - " {'x': -1.7892179489135742, 'y': 53.91915130615...\n", " 2905399\n", " E02002183\n", " E00053954\n", @@ -1992,7 +1984,6 @@ " 1\n", " 1\n", " 1\n", - " {'x': -1.8262380361557007, 'y': 53.92028045654...\n", " 2905308\n", " E02002183\n", " E00053953\n", @@ -2036,7 +2027,6 @@ " 2\n", " 2\n", " 1\n", - " {'x': -1.8262380361557007, 'y': 53.92028045654...\n", " 2907681\n", " E02002183\n", " E00053953\n", @@ -2056,7 +2046,7 @@ " 2\n", " 68\n", " 1\n", - " 2.0\n", + " 1.0\n", " 0.000000\n", " 1\n", " 1\n", @@ -2080,7 +2070,6 @@ " 3\n", " 3\n", " 2\n", - " {'x': -1.8749940395355225, 'y': 53.94298934936...\n", " 2902817\n", " E02002183\n", " E00053689\n", @@ -2124,7 +2113,6 @@ " 4\n", " 4\n", " 2\n", - " {'x': -1.8749940395355225, 'y': 53.94298934936...\n", " 2900884\n", " E02002183\n", " E00053689\n", @@ -2144,7 +2132,7 @@ " 2\n", " 26\n", " 1\n", - " 6.0\n", + " 4.0\n", " 51020.310547\n", " 3\n", " 1\n", @@ -2169,68 +2157,68 @@ "" ], "text/plain": [ - " id household location pid_hs \\\n", - "0 0 0 {'x': -1.7892179489135742, 'y': 53.91915130615... 2905399 \n", - "1 1 1 {'x': -1.8262380361557007, 'y': 53.92028045654... 2905308 \n", - "2 2 1 {'x': -1.8262380361557007, 'y': 53.92028045654... 2907681 \n", - "3 3 2 {'x': -1.8749940395355225, 'y': 53.94298934936... 2902817 \n", - "4 4 2 {'x': -1.8749940395355225, 'y': 53.94298934936... 2900884 \n", + " id household pid_hs msoa11cd oa11cd members sic1d2007 sic2d2007 \\\n", + "0 0 0 2905399 E02002183 E00053954 [0] J 58.0 \n", + "1 1 1 2905308 E02002183 E00053953 [1, 2] C 25.0 \n", + "2 2 1 2907681 E02002183 E00053953 [1, 2] P 85.0 \n", + "3 3 2 2902817 E02002183 E00053689 [3, 4] C 31.0 \n", + "4 4 2 2900884 E02002183 E00053689 [3, 4] J 62.0 \n", "\n", - " msoa oa members sic1d2007 sic2d2007 pwkstat salary_yearly \\\n", - "0 E02002183 E00053954 [0] J 58.0 6 NaN \n", - "1 E02002183 E00053953 [1, 2] C 25.0 6 NaN \n", - "2 E02002183 E00053953 [1, 2] P 85.0 6 NaN \n", - "3 E02002183 E00053689 [3, 4] C 31.0 1 32857.859375 \n", - "4 E02002183 E00053689 [3, 4] J 62.0 1 18162.451172 \n", + " pwkstat salary_yearly salary_hourly hid accommodation_type \\\n", + "0 6 NaN NaN E02002183_0001 1.0 \n", + "1 6 NaN NaN E02002183_0002 3.0 \n", + "2 6 NaN NaN E02002183_0002 3.0 \n", + "3 1 32857.859375 14.360952 E02002183_0003 3.0 \n", + "4 1 18162.451172 9.439944 E02002183_0003 3.0 \n", "\n", - " salary_hourly hid accommodation_type communal_type \\\n", - "0 NaN E02002183_0001 1.0 NaN \n", - "1 NaN E02002183_0002 3.0 NaN \n", - "2 NaN E02002183_0002 3.0 NaN \n", - "3 14.360952 E02002183_0003 3.0 NaN \n", - "4 9.439944 E02002183_0003 3.0 NaN \n", + " communal_type num_rooms central_heat tenure num_cars sex age_years \\\n", + "0 NaN 2.0 True 2.0 2 1 86 \n", + "1 NaN 6.0 True 2.0 2 1 74 \n", + "2 NaN 6.0 True 2.0 2 2 68 \n", + "3 NaN 6.0 True 2.0 1 1 27 \n", + "4 NaN 6.0 True 2.0 1 2 26 \n", "\n", - " num_rooms central_heat tenure num_cars sex age_years ethnicity \\\n", - "0 2.0 True 2.0 2 1 86 1 \n", - "1 6.0 True 2.0 2 1 74 3 \n", - "2 6.0 True 2.0 2 2 68 1 \n", - "3 6.0 True 2.0 1 1 27 1 \n", - "4 6.0 True 2.0 1 2 26 1 \n", + " ethnicity nssec8 salary_yearly_hh salary_yearly_hh_cat is_adult \\\n", + "0 1 1.0 0.000000 1 1 \n", + "1 3 1.0 0.000000 1 1 \n", + "2 1 1.0 0.000000 1 1 \n", + "3 1 4.0 51020.310547 3 1 \n", + "4 1 4.0 51020.310547 3 1 \n", "\n", - " nssec8 salary_yearly_hh salary_yearly_hh_cat is_adult num_adults \\\n", - "0 1.0 0.000000 1 1 1 \n", - "1 1.0 0.000000 1 1 2 \n", - "2 2.0 0.000000 1 1 2 \n", - "3 4.0 51020.310547 3 1 2 \n", - "4 6.0 51020.310547 3 1 2 \n", + " num_adults is_child num_children is_pension_age num_pension_age \\\n", + "0 1 0 0 1 1 \n", + "1 2 0 0 1 2 \n", + "2 2 0 0 1 2 \n", + "3 2 0 0 0 0 \n", + "4 2 0 0 0 0 \n", "\n", - " is_child num_children is_pension_age num_pension_age pwkstat_FT_hh \\\n", - "0 0 0 1 1 0 \n", - "1 0 0 1 2 0 \n", - "2 0 0 1 2 0 \n", - "3 0 0 0 0 2 \n", - "4 0 0 0 0 2 \n", + " pwkstat_FT_hh pwkstat_PT_hh pwkstat_NTS_match OA11CD \\\n", + "0 0 0 1 E00053954 \n", + "1 0 0 1 E00053953 \n", + "2 0 0 1 E00053953 \n", + "3 2 0 6 E00053689 \n", + "4 2 0 6 E00053689 \n", "\n", - " pwkstat_PT_hh pwkstat_NTS_match OA11CD RUC11 RUC11CD \\\n", - "0 0 1 E00053954 Urban city and town C1 \n", - "1 0 1 E00053953 Urban city and town C1 \n", - "2 0 1 E00053953 Urban city and town C1 \n", - "3 0 6 E00053689 Rural town and fringe D1 \n", - "4 0 6 E00053689 Rural town and fringe D1 \n", + " RUC11 RUC11CD Settlement2011EW_B03ID_spc \\\n", + "0 Urban city and town C1 Urban \n", + "1 Urban city and town C1 Urban \n", + "2 Urban city and town C1 Urban \n", + "3 Rural town and fringe D1 Rural \n", + "4 Rural town and fringe D1 Rural \n", "\n", - " Settlement2011EW_B03ID_spc Settlement2011EW_B04ID_spc \\\n", - "0 Urban Urban City and Town \n", - "1 Urban Urban City and Town \n", - "2 Urban Urban City and Town \n", - "3 Rural Rural Town and Fringe \n", - "4 Rural Rural Town and Fringe \n", + " Settlement2011EW_B04ID_spc Settlement2011EW_B03ID_spc_CD \\\n", + "0 Urban City and Town 1 \n", + "1 Urban City and Town 1 \n", + "2 Urban City and Town 1 \n", + "3 Rural Town and Fringe 2 \n", + "4 Rural Town and Fringe 2 \n", "\n", - " Settlement2011EW_B03ID_spc_CD Settlement2011EW_B04ID_spc_CD \n", - "0 1 2 \n", - "1 1 2 \n", - "2 1 2 \n", - "3 2 3 \n", - "4 2 3 " + " Settlement2011EW_B04ID_spc_CD \n", + "0 2 \n", + "1 2 \n", + "2 2 \n", + "3 3 \n", + "4 3 " ] }, "execution_count": 25, @@ -3301,10 +3289,9 @@ " \n", " id\n", " household\n", - " location\n", " pid_hs\n", - " msoa\n", - " oa\n", + " msoa11cd\n", + " oa11cd\n", " members\n", " sic1d2007\n", " sic2d2007\n", @@ -3349,7 +3336,6 @@ " 0\n", " 0\n", " 0\n", - " {'x': -1.7892179489135742, 'y': 53.91915130615...\n", " 2905399\n", " E02002183\n", " E00053954\n", @@ -3395,7 +3381,6 @@ " 1\n", " 1\n", " 1\n", - " {'x': -1.8262380361557007, 'y': 53.92028045654...\n", " 2905308\n", " E02002183\n", " E00053953\n", @@ -3441,7 +3426,6 @@ " 2\n", " 2\n", " 1\n", - " {'x': -1.8262380361557007, 'y': 53.92028045654...\n", " 2907681\n", " E02002183\n", " E00053953\n", @@ -3461,7 +3445,7 @@ " 2\n", " 68\n", " 1\n", - " 2.0\n", + " 1.0\n", " 0.000000\n", " 1\n", " 1\n", @@ -3487,7 +3471,6 @@ " 3\n", " 3\n", " 2\n", - " {'x': -1.8749940395355225, 'y': 53.94298934936...\n", " 2902817\n", " E02002183\n", " E00053689\n", @@ -3533,7 +3516,6 @@ " 4\n", " 4\n", " 2\n", - " {'x': -1.8749940395355225, 'y': 53.94298934936...\n", " 2900884\n", " E02002183\n", " E00053689\n", @@ -3553,7 +3535,7 @@ " 2\n", " 26\n", " 1\n", - " 6.0\n", + " 4.0\n", " 51020.310547\n", " 3\n", " 1\n", @@ -3580,75 +3562,75 @@ "" ], "text/plain": [ - " id household location pid_hs \\\n", - "0 0 0 {'x': -1.7892179489135742, 'y': 53.91915130615... 2905399 \n", - "1 1 1 {'x': -1.8262380361557007, 'y': 53.92028045654... 2905308 \n", - "2 2 1 {'x': -1.8262380361557007, 'y': 53.92028045654... 2907681 \n", - "3 3 2 {'x': -1.8749940395355225, 'y': 53.94298934936... 2902817 \n", - "4 4 2 {'x': -1.8749940395355225, 'y': 53.94298934936... 2900884 \n", + " id household pid_hs msoa11cd oa11cd members sic1d2007 sic2d2007 \\\n", + "0 0 0 2905399 E02002183 E00053954 [0] J 58.0 \n", + "1 1 1 2905308 E02002183 E00053953 [1, 2] C 25.0 \n", + "2 2 1 2907681 E02002183 E00053953 [1, 2] P 85.0 \n", + "3 3 2 2902817 E02002183 E00053689 [3, 4] C 31.0 \n", + "4 4 2 2900884 E02002183 E00053689 [3, 4] J 62.0 \n", "\n", - " msoa oa members sic1d2007 sic2d2007 pwkstat salary_yearly \\\n", - "0 E02002183 E00053954 [0] J 58.0 6 NaN \n", - "1 E02002183 E00053953 [1, 2] C 25.0 6 NaN \n", - "2 E02002183 E00053953 [1, 2] P 85.0 6 NaN \n", - "3 E02002183 E00053689 [3, 4] C 31.0 1 32857.859375 \n", - "4 E02002183 E00053689 [3, 4] J 62.0 1 18162.451172 \n", + " pwkstat salary_yearly salary_hourly hid accommodation_type \\\n", + "0 6 NaN NaN E02002183_0001 1.0 \n", + "1 6 NaN NaN E02002183_0002 3.0 \n", + "2 6 NaN NaN E02002183_0002 3.0 \n", + "3 1 32857.859375 14.360952 E02002183_0003 3.0 \n", + "4 1 18162.451172 9.439944 E02002183_0003 3.0 \n", "\n", - " salary_hourly hid accommodation_type communal_type \\\n", - "0 NaN E02002183_0001 1.0 NaN \n", - "1 NaN E02002183_0002 3.0 NaN \n", - "2 NaN E02002183_0002 3.0 NaN \n", - "3 14.360952 E02002183_0003 3.0 NaN \n", - "4 9.439944 E02002183_0003 3.0 NaN \n", + " communal_type num_rooms central_heat tenure num_cars sex age_years \\\n", + "0 NaN 2.0 True 2.0 2 1 86 \n", + "1 NaN 6.0 True 2.0 2 1 74 \n", + "2 NaN 6.0 True 2.0 2 2 68 \n", + "3 NaN 6.0 True 2.0 1 1 27 \n", + "4 NaN 6.0 True 2.0 1 2 26 \n", "\n", - " num_rooms central_heat tenure num_cars sex age_years ethnicity \\\n", - "0 2.0 True 2.0 2 1 86 1 \n", - "1 6.0 True 2.0 2 1 74 3 \n", - "2 6.0 True 2.0 2 2 68 1 \n", - "3 6.0 True 2.0 1 1 27 1 \n", - "4 6.0 True 2.0 1 2 26 1 \n", + " ethnicity nssec8 salary_yearly_hh salary_yearly_hh_cat is_adult \\\n", + "0 1 1.0 0.000000 1 1 \n", + "1 3 1.0 0.000000 1 1 \n", + "2 1 1.0 0.000000 1 1 \n", + "3 1 4.0 51020.310547 3 1 \n", + "4 1 4.0 51020.310547 3 1 \n", "\n", - " nssec8 salary_yearly_hh salary_yearly_hh_cat is_adult num_adults \\\n", - "0 1.0 0.000000 1 1 1 \n", - "1 1.0 0.000000 1 1 2 \n", - "2 2.0 0.000000 1 1 2 \n", - "3 4.0 51020.310547 3 1 2 \n", - "4 6.0 51020.310547 3 1 2 \n", + " num_adults is_child num_children is_pension_age num_pension_age \\\n", + "0 1 0 0 1 1 \n", + "1 2 0 0 1 2 \n", + "2 2 0 0 1 2 \n", + "3 2 0 0 0 0 \n", + "4 2 0 0 0 0 \n", "\n", - " is_child num_children is_pension_age num_pension_age pwkstat_FT_hh \\\n", - "0 0 0 1 1 0 \n", - "1 0 0 1 2 0 \n", - "2 0 0 1 2 0 \n", - "3 0 0 0 0 2 \n", - "4 0 0 0 0 2 \n", + " pwkstat_FT_hh pwkstat_PT_hh pwkstat_NTS_match OA11CD \\\n", + "0 0 0 1 E00053954 \n", + "1 0 0 1 E00053953 \n", + "2 0 0 1 E00053953 \n", + "3 2 0 6 E00053689 \n", + "4 2 0 6 E00053689 \n", "\n", - " pwkstat_PT_hh pwkstat_NTS_match OA11CD RUC11 RUC11CD \\\n", - "0 0 1 E00053954 Urban city and town C1 \n", - "1 0 1 E00053953 Urban city and town C1 \n", - "2 0 1 E00053953 Urban city and town C1 \n", - "3 0 6 E00053689 Rural town and fringe D1 \n", - "4 0 6 E00053689 Rural town and fringe D1 \n", + " RUC11 RUC11CD Settlement2011EW_B03ID_spc \\\n", + "0 Urban city and town C1 Urban \n", + "1 Urban city and town C1 Urban \n", + "2 Urban city and town C1 Urban \n", + "3 Rural town and fringe D1 Rural \n", + "4 Rural town and fringe D1 Rural \n", "\n", - " Settlement2011EW_B03ID_spc Settlement2011EW_B04ID_spc \\\n", - "0 Urban Urban City and Town \n", - "1 Urban Urban City and Town \n", - "2 Urban Urban City and Town \n", - "3 Rural Rural Town and Fringe \n", - "4 Rural Rural Town and Fringe \n", + " Settlement2011EW_B04ID_spc Settlement2011EW_B03ID_spc_CD \\\n", + "0 Urban City and Town 1 \n", + "1 Urban City and Town 1 \n", + "2 Urban City and Town 1 \n", + "3 Rural Town and Fringe 2 \n", + "4 Rural Town and Fringe 2 \n", "\n", - " Settlement2011EW_B03ID_spc_CD Settlement2011EW_B04ID_spc_CD \\\n", - "0 1 2 \n", - "1 1 2 \n", - "2 1 2 \n", - "3 2 3 \n", - "4 2 3 \n", + " Settlement2011EW_B04ID_spc_CD tenure_spc_for_matching \\\n", + "0 2 1.0 \n", + "1 2 1.0 \n", + "2 2 1.0 \n", + "3 3 1.0 \n", + "4 3 1.0 \n", "\n", - " tenure_spc_for_matching nts_hh_id \n", - "0 1.0 [2019004064.0, 2019000229.0, 2019002914.0, 201... \n", - "1 1.0 [2019004130.0, 2019004126.0, 2019004144.0, 201... \n", - "2 1.0 [2019004130.0, 2019004126.0, 2019004144.0, 201... \n", - "3 1.0 [2019001923.0, 2019003253.0, 2019001755.0, 201... \n", - "4 1.0 [2019001923.0, 2019003253.0, 2019001755.0, 201... " + " nts_hh_id \n", + "0 [2019004064.0, 2019000229.0, 2019002914.0, 201... \n", + "1 [2019004130.0, 2019004126.0, 2019004144.0, 201... \n", + "2 [2019004130.0, 2019004126.0, 2019004144.0, 201... \n", + "3 [2019001923.0, 2019003253.0, 2019001755.0, 201... \n", + "4 [2019001923.0, 2019003253.0, 2019001755.0, 201... " ] }, "execution_count": 39, @@ -3704,7 +3686,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "('E02002183_0595', 2019003190.0)\n" + "('E02002183_0595', 2019005260.0)\n" ] } ], @@ -3740,16 +3722,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "# random sample\n", - "with open('../data/interim/matching/matches_hh_level_categorical_random_sample.pkl', 'wb') as f:\n", + "with open(get_interim_path(\"matches_hh_level_categorical_random_sample.pkl\"), 'wb') as f:\n", " pkl.dump(matches_hh_level_sample, f)\n", "\n", "# multiple random samples\n", - "with open('../data/interim/matching/matches_hh_level_categorical_random_sample_multiple.pkl', 'wb') as f:\n", + "with open(get_interim_path('matches_hh_level_categorical_random_sample_multiple.pkl'), 'wb') as f:\n", " pkl.dump(matches_hh_level_sample_list, f)" ] }, @@ -3762,7 +3744,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 44, "metadata": {}, "outputs": [], "source": [ @@ -3789,7 +3771,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 45, "metadata": {}, "outputs": [ { @@ -4066,7 +4048,7 @@ "340876 -9 -9 -9 -10 " ] }, - "execution_count": 43, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -4084,7 +4066,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 46, "metadata": {}, "outputs": [], "source": [ @@ -4116,7 +4098,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 47, "metadata": {}, "outputs": [], "source": [ @@ -4134,7 +4116,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 48, "metadata": {}, "outputs": [], "source": [ @@ -4145,32 +4127,32 @@ " df1_id = 'hid',\n", " df2_id = 'HouseholdID',\n", " matches_hh = matches_hh_level_sample,\n", - " show_progress = True)\n", + " show_progress = False)\n", "\n", "#matches_ind" ] }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{0: 349184,\n", - " 1: 368506,\n", - " 2: 368505,\n", - " 3: 355607,\n", - " 4: 355606,\n", - " 5: 344330,\n", - " 9: 352777,\n", + "{0: 380108,\n", + " 1: 383560,\n", + " 2: 383561,\n", + " 3: 375552,\n", + " 4: 375553,\n", + " 5: 374637,\n", + " 9: 355974,\n", " 10: 354879,\n", " 11: 354878,\n", " 12: 354880}" ] }, - "execution_count": 47, + "execution_count": 49, "metadata": {}, "output_type": "execute_result" } @@ -4184,7 +4166,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 50, "metadata": {}, "outputs": [], "source": [ @@ -4197,7 +4179,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 51, "metadata": {}, "outputs": [ { @@ -4223,10 +4205,9 @@ " \n", " id\n", " household\n", - " location\n", " pid_hs\n", - " msoa\n", - " oa\n", + " msoa11cd\n", + " oa11cd\n", " members\n", " sic1d2007\n", " sic2d2007\n", @@ -4273,7 +4254,6 @@ " 0\n", " 0\n", " 0\n", - " {'x': -1.7892179489135742, 'y': 53.91915130615...\n", " 2905399\n", " E02002183\n", " E00053954\n", @@ -4315,13 +4295,12 @@ " 1.0\n", " [2019004064.0, 2019000229.0, 2019002914.0, 201...\n", " 9\n", - " 2.019009e+09\n", + " 2.022009e+09\n", " \n", " \n", " 1\n", " 1\n", " 1\n", - " {'x': -1.8262380361557007, 'y': 53.92028045654...\n", " 2905308\n", " E02002183\n", " E00053953\n", @@ -4363,13 +4342,12 @@ " 1.0\n", " [2019004130.0, 2019004126.0, 2019004144.0, 201...\n", " 9\n", - " 2.021011e+09\n", + " 2.022009e+09\n", " \n", " \n", " 2\n", " 2\n", " 1\n", - " {'x': -1.8262380361557007, 'y': 53.92028045654...\n", " 2907681\n", " E02002183\n", " E00053953\n", @@ -4389,7 +4367,7 @@ " 2\n", " 68\n", " 1\n", - " 2.0\n", + " 1.0\n", " 0.000000\n", " 1\n", " 1\n", @@ -4411,13 +4389,12 @@ " 1.0\n", " [2019004130.0, 2019004126.0, 2019004144.0, 201...\n", " 9\n", - " 2.021011e+09\n", + " 2.022009e+09\n", " \n", " \n", " 3\n", " 3\n", " 2\n", - " {'x': -1.8749940395355225, 'y': 53.94298934936...\n", " 2902817\n", " E02002183\n", " E00053689\n", @@ -4459,13 +4436,12 @@ " 1.0\n", " [2019001923.0, 2019003253.0, 2019001755.0, 201...\n", " 5\n", - " 2.019010e+09\n", + " 2.022002e+09\n", " \n", " \n", " 4\n", " 4\n", " 2\n", - " {'x': -1.8749940395355225, 'y': 53.94298934936...\n", " 2900884\n", " E02002183\n", " E00053689\n", @@ -4485,7 +4461,7 @@ " 2\n", " 26\n", " 1\n", - " 6.0\n", + " 4.0\n", " 51020.310547\n", " 3\n", " 1\n", @@ -4507,92 +4483,85 @@ " 1.0\n", " [2019001923.0, 2019003253.0, 2019001755.0, 201...\n", " 5\n", - " 2.019010e+09\n", + " 2.022002e+09\n", " \n", " \n", "\n", "" ], "text/plain": [ - " id household location pid_hs \\\n", - "0 0 0 {'x': -1.7892179489135742, 'y': 53.91915130615... 2905399 \n", - "1 1 1 {'x': -1.8262380361557007, 'y': 53.92028045654... 2905308 \n", - "2 2 1 {'x': -1.8262380361557007, 'y': 53.92028045654... 2907681 \n", - "3 3 2 {'x': -1.8749940395355225, 'y': 53.94298934936... 2902817 \n", - "4 4 2 {'x': -1.8749940395355225, 'y': 53.94298934936... 2900884 \n", - "\n", - " msoa oa members sic1d2007 sic2d2007 pwkstat salary_yearly \\\n", - "0 E02002183 E00053954 [0] J 58.0 6 NaN \n", - "1 E02002183 E00053953 [1, 2] C 25.0 6 NaN \n", - "2 E02002183 E00053953 [1, 2] P 85.0 6 NaN \n", - "3 E02002183 E00053689 [3, 4] C 31.0 1 32857.859375 \n", - "4 E02002183 E00053689 [3, 4] J 62.0 1 18162.451172 \n", + " id household pid_hs msoa11cd oa11cd members sic1d2007 sic2d2007 \\\n", + "0 0 0 2905399 E02002183 E00053954 [0] J 58.0 \n", + "1 1 1 2905308 E02002183 E00053953 [1, 2] C 25.0 \n", + "2 2 1 2907681 E02002183 E00053953 [1, 2] P 85.0 \n", + "3 3 2 2902817 E02002183 E00053689 [3, 4] C 31.0 \n", + "4 4 2 2900884 E02002183 E00053689 [3, 4] J 62.0 \n", "\n", - " salary_hourly hid accommodation_type communal_type \\\n", - "0 NaN E02002183_0001 1.0 NaN \n", - "1 NaN E02002183_0002 3.0 NaN \n", - "2 NaN E02002183_0002 3.0 NaN \n", - "3 14.360952 E02002183_0003 3.0 NaN \n", - "4 9.439944 E02002183_0003 3.0 NaN \n", + " pwkstat salary_yearly salary_hourly hid accommodation_type \\\n", + "0 6 NaN NaN E02002183_0001 1.0 \n", + "1 6 NaN NaN E02002183_0002 3.0 \n", + "2 6 NaN NaN E02002183_0002 3.0 \n", + "3 1 32857.859375 14.360952 E02002183_0003 3.0 \n", + "4 1 18162.451172 9.439944 E02002183_0003 3.0 \n", "\n", - " num_rooms central_heat tenure num_cars sex age_years ethnicity \\\n", - "0 2.0 True 2.0 2 1 86 1 \n", - "1 6.0 True 2.0 2 1 74 3 \n", - "2 6.0 True 2.0 2 2 68 1 \n", - "3 6.0 True 2.0 1 1 27 1 \n", - "4 6.0 True 2.0 1 2 26 1 \n", + " communal_type num_rooms central_heat tenure num_cars sex age_years \\\n", + "0 NaN 2.0 True 2.0 2 1 86 \n", + "1 NaN 6.0 True 2.0 2 1 74 \n", + "2 NaN 6.0 True 2.0 2 2 68 \n", + "3 NaN 6.0 True 2.0 1 1 27 \n", + "4 NaN 6.0 True 2.0 1 2 26 \n", "\n", - " nssec8 salary_yearly_hh salary_yearly_hh_cat is_adult num_adults \\\n", - "0 1.0 0.000000 1 1 1 \n", - "1 1.0 0.000000 1 1 2 \n", - "2 2.0 0.000000 1 1 2 \n", - "3 4.0 51020.310547 3 1 2 \n", - "4 6.0 51020.310547 3 1 2 \n", + " ethnicity nssec8 salary_yearly_hh salary_yearly_hh_cat is_adult \\\n", + "0 1 1.0 0.000000 1 1 \n", + "1 3 1.0 0.000000 1 1 \n", + "2 1 1.0 0.000000 1 1 \n", + "3 1 4.0 51020.310547 3 1 \n", + "4 1 4.0 51020.310547 3 1 \n", "\n", - " is_child num_children is_pension_age num_pension_age pwkstat_FT_hh \\\n", - "0 0 0 1 1 0 \n", - "1 0 0 1 2 0 \n", - "2 0 0 1 2 0 \n", - "3 0 0 0 0 2 \n", - "4 0 0 0 0 2 \n", + " num_adults is_child num_children is_pension_age num_pension_age \\\n", + "0 1 0 0 1 1 \n", + "1 2 0 0 1 2 \n", + "2 2 0 0 1 2 \n", + "3 2 0 0 0 0 \n", + "4 2 0 0 0 0 \n", "\n", - " pwkstat_PT_hh pwkstat_NTS_match OA11CD RUC11 RUC11CD \\\n", - "0 0 1 E00053954 Urban city and town C1 \n", - "1 0 1 E00053953 Urban city and town C1 \n", - "2 0 1 E00053953 Urban city and town C1 \n", - "3 0 6 E00053689 Rural town and fringe D1 \n", - "4 0 6 E00053689 Rural town and fringe D1 \n", + " pwkstat_FT_hh pwkstat_PT_hh pwkstat_NTS_match OA11CD \\\n", + "0 0 0 1 E00053954 \n", + "1 0 0 1 E00053953 \n", + "2 0 0 1 E00053953 \n", + "3 2 0 6 E00053689 \n", + "4 2 0 6 E00053689 \n", "\n", - " Settlement2011EW_B03ID_spc Settlement2011EW_B04ID_spc \\\n", - "0 Urban Urban City and Town \n", - "1 Urban Urban City and Town \n", - "2 Urban Urban City and Town \n", - "3 Rural Rural Town and Fringe \n", - "4 Rural Rural Town and Fringe \n", + " RUC11 RUC11CD Settlement2011EW_B03ID_spc \\\n", + "0 Urban city and town C1 Urban \n", + "1 Urban city and town C1 Urban \n", + "2 Urban city and town C1 Urban \n", + "3 Rural town and fringe D1 Rural \n", + "4 Rural town and fringe D1 Rural \n", "\n", - " Settlement2011EW_B03ID_spc_CD Settlement2011EW_B04ID_spc_CD \\\n", - "0 1 2 \n", - "1 1 2 \n", - "2 1 2 \n", - "3 2 3 \n", - "4 2 3 \n", + " Settlement2011EW_B04ID_spc Settlement2011EW_B03ID_spc_CD \\\n", + "0 Urban City and Town 1 \n", + "1 Urban City and Town 1 \n", + "2 Urban City and Town 1 \n", + "3 Rural Town and Fringe 2 \n", + "4 Rural Town and Fringe 2 \n", "\n", - " tenure_spc_for_matching nts_hh_id \\\n", - "0 1.0 [2019004064.0, 2019000229.0, 2019002914.0, 201... \n", - "1 1.0 [2019004130.0, 2019004126.0, 2019004144.0, 201... \n", - "2 1.0 [2019004130.0, 2019004126.0, 2019004144.0, 201... \n", - "3 1.0 [2019001923.0, 2019003253.0, 2019001755.0, 201... \n", - "4 1.0 [2019001923.0, 2019003253.0, 2019001755.0, 201... \n", + " Settlement2011EW_B04ID_spc_CD tenure_spc_for_matching \\\n", + "0 2 1.0 \n", + "1 2 1.0 \n", + "2 2 1.0 \n", + "3 3 1.0 \n", + "4 3 1.0 \n", "\n", - " age_group nts_ind_id \n", - "0 9 2.019009e+09 \n", - "1 9 2.021011e+09 \n", - "2 9 2.021011e+09 \n", - "3 5 2.019010e+09 \n", - "4 5 2.019010e+09 " + " nts_hh_id age_group nts_ind_id \n", + "0 [2019004064.0, 2019000229.0, 2019002914.0, 201... 9 2.022009e+09 \n", + "1 [2019004130.0, 2019004126.0, 2019004144.0, 201... 9 2.022009e+09 \n", + "2 [2019004130.0, 2019004126.0, 2019004144.0, 201... 9 2.022009e+09 \n", + "3 [2019001923.0, 2019003253.0, 2019001755.0, 201... 5 2.022002e+09 \n", + "4 [2019001923.0, 2019003253.0, 2019001755.0, 201... 5 2.022002e+09 " ] }, - "execution_count": 49, + "execution_count": 51, "metadata": {}, "output_type": "execute_result" } @@ -4610,7 +4579,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 52, "metadata": {}, "outputs": [ { @@ -4636,10 +4605,9 @@ " \n", " id\n", " household\n", - " location\n", " pid_hs\n", - " msoa\n", - " oa\n", + " msoa11cd\n", + " oa11cd\n", " members\n", " sic1d2007\n", " sic2d2007\n", @@ -4686,7 +4654,6 @@ " 117\n", " 117\n", " 61\n", - " {'x': -1.887002944946289, 'y': 53.944278717041...\n", " 2904126\n", " E02002183\n", " E00053688\n", @@ -4706,7 +4673,7 @@ " 1\n", " 71\n", " 1\n", - " 1.0\n", + " 7.0\n", " 5020.788086\n", " 1\n", " 1\n", @@ -4728,13 +4695,12 @@ " 2.0\n", " [2019000929.0, 2019003194.0, 2019003199.0, 201...\n", " 9\n", - " 2022001198.0\n", + " 2021004862.0\n", " \n", " \n", " 118\n", " 118\n", " 62\n", - " {'x': -1.8956769704818726, 'y': 53.94247055053...\n", " 2908490\n", " E02002183\n", " E00053691\n", @@ -4776,13 +4742,12 @@ " 1.0\n", " [2019001923.0, 2019003253.0, 2019001755.0, 201...\n", " 6\n", - " 2019007422.0\n", + " 2019007041.0\n", " \n", " \n", " 119\n", " 119\n", " 62\n", - " {'x': -1.8956769704818726, 'y': 53.94247055053...\n", " 2911873\n", " E02002183\n", " E00053691\n", @@ -4802,7 +4767,7 @@ " 2\n", " 29\n", " 1\n", - " 2.0\n", + " 4.0\n", " 18557.246094\n", " 1\n", " 1\n", @@ -4824,13 +4789,12 @@ " 1.0\n", " [2019001923.0, 2019003253.0, 2019001755.0, 201...\n", " 5\n", - " 2019007423.0\n", + " 2019007042.0\n", " \n", " \n", " 120\n", " 120\n", " 63\n", - " {'x': -1.7892179489135742, 'y': 53.91915130615...\n", " 2911879\n", " E02002183\n", " E00053954\n", @@ -4872,13 +4836,12 @@ " 1.0\n", " [2019001902.0, 2019004101.0, 2019004092.0, 201...\n", " 7\n", - " 2022006066.0\n", + " 2019013602.0\n", " \n", " \n", " 121\n", " 121\n", " 63\n", - " {'x': -1.7892179489135742, 'y': 53.91915130615...\n", " 2904728\n", " E02002183\n", " E00053954\n", @@ -4898,7 +4861,7 @@ " 2\n", " 42\n", " 1\n", - " 5.0\n", + " 1.0\n", " 53864.953125\n", " 3\n", " 1\n", @@ -4920,13 +4883,12 @@ " 1.0\n", " [2019001902.0, 2019004101.0, 2019004092.0, 201...\n", " 7\n", - " 2022006067.0\n", + " 2019013603.0\n", " \n", " \n", " 122\n", " 122\n", " 64\n", - " {'x': -1.8792779445648193, 'y': 53.94593048095...\n", " 2910111\n", " E02002183\n", " E00053696\n", @@ -4968,60 +4930,52 @@ " 2.0\n", " [2019000933.0, 2019001918.0, 2019001705.0, 201...\n", " 8\n", - " 2022004957.0\n", + " 2019009052.0\n", " \n", " \n", "\n", "" ], "text/plain": [ - " id household location \\\n", - "117 117 61 {'x': -1.887002944946289, 'y': 53.944278717041... \n", - "118 118 62 {'x': -1.8956769704818726, 'y': 53.94247055053... \n", - "119 119 62 {'x': -1.8956769704818726, 'y': 53.94247055053... \n", - "120 120 63 {'x': -1.7892179489135742, 'y': 53.91915130615... \n", - "121 121 63 {'x': -1.7892179489135742, 'y': 53.91915130615... \n", - "122 122 64 {'x': -1.8792779445648193, 'y': 53.94593048095... \n", - "\n", - " pid_hs msoa oa members sic1d2007 sic2d2007 pwkstat \\\n", - "117 2904126 E02002183 E00053688 [116, 117] G 47.0 10 \n", - "118 2908490 E02002183 E00053691 [118, 119] J 61.0 1 \n", - "119 2911873 E02002183 E00053691 [118, 119] M 70.0 4 \n", - "120 2911879 E02002183 E00053954 [120, 121] C 27.0 1 \n", - "121 2904728 E02002183 E00053954 [120, 121] C 14.0 4 \n", - "122 2910111 E02002183 E00053696 [122] M 71.0 1 \n", + " id household pid_hs msoa11cd oa11cd members sic1d2007 \\\n", + "117 117 61 2904126 E02002183 E00053688 [116, 117] G \n", + "118 118 62 2908490 E02002183 E00053691 [118, 119] J \n", + "119 119 62 2911873 E02002183 E00053691 [118, 119] M \n", + "120 120 63 2911879 E02002183 E00053954 [120, 121] C \n", + "121 121 63 2904728 E02002183 E00053954 [120, 121] C \n", + "122 122 64 2910111 E02002183 E00053696 [122] M \n", "\n", - " salary_yearly salary_hourly hid accommodation_type \\\n", - "117 NaN NaN E02002183_0062 3.0 \n", - "118 18557.246094 9.391318 E02002183_0063 1.0 \n", - "119 NaN NaN E02002183_0063 1.0 \n", - "120 53864.953125 25.896612 E02002183_0064 4.0 \n", - "121 NaN NaN E02002183_0064 4.0 \n", - "122 51522.851562 24.770601 E02002183_0065 2.0 \n", + " sic2d2007 pwkstat salary_yearly salary_hourly hid \\\n", + "117 47.0 10 NaN NaN E02002183_0062 \n", + "118 61.0 1 18557.246094 9.391318 E02002183_0063 \n", + "119 70.0 4 NaN NaN E02002183_0063 \n", + "120 27.0 1 53864.953125 25.896612 E02002183_0064 \n", + "121 14.0 4 NaN NaN E02002183_0064 \n", + "122 71.0 1 51522.851562 24.770601 E02002183_0065 \n", "\n", - " communal_type num_rooms central_heat tenure num_cars sex age_years \\\n", - "117 NaN 4.0 True 5.0 2 1 71 \n", - "118 NaN 5.0 True 1.0 1 1 30 \n", - "119 NaN 5.0 True 1.0 1 2 29 \n", - "120 NaN 6.0 True 1.0 2 1 42 \n", - "121 NaN 6.0 True 1.0 2 2 42 \n", - "122 NaN 3.0 True 4.0 1 1 53 \n", + " accommodation_type communal_type num_rooms central_heat tenure num_cars \\\n", + "117 3.0 NaN 4.0 True 5.0 2 \n", + "118 1.0 NaN 5.0 True 1.0 1 \n", + "119 1.0 NaN 5.0 True 1.0 1 \n", + "120 4.0 NaN 6.0 True 1.0 2 \n", + "121 4.0 NaN 6.0 True 1.0 2 \n", + "122 2.0 NaN 3.0 True 4.0 1 \n", "\n", - " ethnicity nssec8 salary_yearly_hh salary_yearly_hh_cat is_adult \\\n", - "117 1 1.0 5020.788086 1 1 \n", - "118 1 4.0 18557.246094 1 1 \n", - "119 1 2.0 18557.246094 1 1 \n", - "120 1 1.0 53864.953125 3 1 \n", - "121 1 5.0 53864.953125 3 1 \n", - "122 3 2.0 51522.851562 3 1 \n", + " sex age_years ethnicity nssec8 salary_yearly_hh salary_yearly_hh_cat \\\n", + "117 1 71 1 7.0 5020.788086 1 \n", + "118 1 30 1 4.0 18557.246094 1 \n", + "119 2 29 1 4.0 18557.246094 1 \n", + "120 1 42 1 1.0 53864.953125 3 \n", + "121 2 42 1 1.0 53864.953125 3 \n", + "122 1 53 3 2.0 51522.851562 3 \n", "\n", - " num_adults is_child num_children is_pension_age num_pension_age \\\n", - "117 2 0 0 1 2 \n", - "118 2 0 0 0 0 \n", - "119 2 0 0 0 0 \n", - "120 2 0 0 0 0 \n", - "121 2 0 0 0 0 \n", - "122 1 0 0 0 0 \n", + " is_adult num_adults is_child num_children is_pension_age num_pension_age \\\n", + "117 1 2 0 0 1 2 \n", + "118 1 2 0 0 0 0 \n", + "119 1 2 0 0 0 0 \n", + "120 1 2 0 0 0 0 \n", + "121 1 2 0 0 0 0 \n", + "122 1 1 0 0 0 0 \n", "\n", " pwkstat_FT_hh pwkstat_PT_hh pwkstat_NTS_match OA11CD \\\n", "117 0 1 2 E00053688 \n", @@ -5056,15 +5010,15 @@ "122 3 2.0 \n", "\n", " nts_hh_id age_group nts_ind_id \n", - "117 [2019000929.0, 2019003194.0, 2019003199.0, 201... 9 2022001198.0 \n", - "118 [2019001923.0, 2019003253.0, 2019001755.0, 201... 6 2019007422.0 \n", - "119 [2019001923.0, 2019003253.0, 2019001755.0, 201... 5 2019007423.0 \n", - "120 [2019001902.0, 2019004101.0, 2019004092.0, 201... 7 2022006066.0 \n", - "121 [2019001902.0, 2019004101.0, 2019004092.0, 201... 7 2022006067.0 \n", - "122 [2019000933.0, 2019001918.0, 2019001705.0, 201... 8 2022004957.0 " + "117 [2019000929.0, 2019003194.0, 2019003199.0, 201... 9 2021004862.0 \n", + "118 [2019001923.0, 2019003253.0, 2019001755.0, 201... 6 2019007041.0 \n", + "119 [2019001923.0, 2019003253.0, 2019001755.0, 201... 5 2019007042.0 \n", + "120 [2019001902.0, 2019004101.0, 2019004092.0, 201... 7 2019013602.0 \n", + "121 [2019001902.0, 2019004101.0, 2019004092.0, 201... 7 2019013603.0 \n", + "122 [2019000933.0, 2019001918.0, 2019001705.0, 201... 8 2019009052.0 " ] }, - "execution_count": 50, + "execution_count": 52, "metadata": {}, "output_type": "execute_result" } @@ -5099,7 +5053,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 53, "metadata": {}, "outputs": [ { @@ -5163,7 +5117,7 @@ " 1\n", " 71\n", " 9\n", - " 1.0\n", + " 7.0\n", " 5020.788086\n", " 1\n", " 1\n", @@ -5217,7 +5171,7 @@ " 2\n", " 29\n", " 5\n", - " 2.0\n", + " 4.0\n", " 18557.246094\n", " 1\n", " 1\n", @@ -5271,7 +5225,7 @@ " 2\n", " 42\n", " 7\n", - " 5.0\n", + " 1.0\n", " 53864.953125\n", " 3\n", " 1\n", @@ -5326,11 +5280,11 @@ "122 122 64 1 51522.851562 24.770601 E02002183_0065 4.0 \n", "\n", " num_cars sex age_years age_group nssec8 salary_yearly_hh \\\n", - "117 2 1 71 9 1.0 5020.788086 \n", + "117 2 1 71 9 7.0 5020.788086 \n", "118 1 1 30 6 4.0 18557.246094 \n", - "119 1 2 29 5 2.0 18557.246094 \n", + "119 1 2 29 5 4.0 18557.246094 \n", "120 2 1 42 7 1.0 53864.953125 \n", - "121 2 2 42 7 5.0 53864.953125 \n", + "121 2 2 42 7 1.0 53864.953125 \n", "122 1 1 53 8 2.0 51522.851562 \n", "\n", " salary_yearly_hh_cat is_adult is_child is_pension_age pwkstat_FT_hh \\\n", @@ -5401,61 +5355,61 @@ " \n", " \n", " \n", - " 375894\n", - " 2.022001e+09\n", - " 2.022001e+09\n", - " 18.0\n", + " 371335\n", + " 2.021005e+09\n", + " 2.021002e+09\n", + " 19.0\n", " 9.0\n", " 1.0\n", " 1.0\n", - " 1.0\n", + " 2.0\n", " \n", " \n", - " 351919\n", + " 344976\n", " 2.019007e+09\n", " 2.019003e+09\n", - " 16.0\n", - " 9.0\n", + " 15.0\n", + " 8.0\n", " 1.0\n", " 2.0\n", " 1.0\n", " \n", " \n", - " 351920\n", + " 344977\n", " 2.019007e+09\n", " 2.019003e+09\n", - " 16.0\n", - " 9.0\n", - " 1.0\n", + " 15.0\n", + " 8.0\n", " 2.0\n", - " 3.0\n", + " 2.0\n", + " 1.0\n", " \n", " \n", - " 380675\n", - " 2.022006e+09\n", - " 2.022003e+09\n", - " 14.0\n", - " 7.0\n", + " 354031\n", + " 2.019014e+09\n", + " 2.019006e+09\n", + " 13.0\n", + " 6.0\n", " 1.0\n", " 2.0\n", - " 1.0\n", + " 2.0\n", " \n", " \n", - " 380676\n", - " 2.022006e+09\n", - " 2.022003e+09\n", - " 14.0\n", - " 7.0\n", + " 354032\n", + " 2.019014e+09\n", + " 2.019006e+09\n", + " 13.0\n", + " 6.0\n", " 2.0\n", " 2.0\n", - " 3.0\n", + " 1.0\n", " \n", " \n", - " 378401\n", - " 2.022005e+09\n", - " 2.022002e+09\n", - " 17.0\n", - " 9.0\n", + " 354318\n", + " 2.019009e+09\n", + " 2.019004e+09\n", + " 13.0\n", + " 6.0\n", " 2.0\n", " 2.0\n", " 1.0\n", @@ -5466,20 +5420,20 @@ ], "text/plain": [ " IndividualID HouseholdID Age_B01ID age_group sex OfPenAge_B01ID \\\n", - "375894 2.022001e+09 2.022001e+09 18.0 9.0 1.0 1.0 \n", - "351919 2.019007e+09 2.019003e+09 16.0 9.0 1.0 2.0 \n", - "351920 2.019007e+09 2.019003e+09 16.0 9.0 1.0 2.0 \n", - "380675 2.022006e+09 2.022003e+09 14.0 7.0 1.0 2.0 \n", - "380676 2.022006e+09 2.022003e+09 14.0 7.0 2.0 2.0 \n", - "378401 2.022005e+09 2.022002e+09 17.0 9.0 2.0 2.0 \n", + "371335 2.021005e+09 2.021002e+09 19.0 9.0 1.0 1.0 \n", + "344976 2.019007e+09 2.019003e+09 15.0 8.0 1.0 2.0 \n", + "344977 2.019007e+09 2.019003e+09 15.0 8.0 2.0 2.0 \n", + "354031 2.019014e+09 2.019006e+09 13.0 6.0 1.0 2.0 \n", + "354032 2.019014e+09 2.019006e+09 13.0 6.0 2.0 2.0 \n", + "354318 2.019009e+09 2.019004e+09 13.0 6.0 2.0 2.0 \n", "\n", " IndIncome2002_B02ID \n", - "375894 1.0 \n", - "351919 1.0 \n", - "351920 3.0 \n", - "380675 1.0 \n", - "380676 3.0 \n", - "378401 1.0 " + "371335 2.0 \n", + "344976 1.0 \n", + "344977 1.0 \n", + "354031 2.0 \n", + "354032 1.0 \n", + "354318 1.0 " ] }, "metadata": {}, @@ -5525,8 +5479,7 @@ "# iterate over all items in the matches_hh_level_sample_list and apply the match_individuals function to each\n", "\n", "matches_list_of_dict = []\n", - "for i in range(len(matches_hh_level_sample_list)):\n", - " print(f'Processing item {i}')\n", + "for i in trange(len(matches_hh_level_sample_list)):\n", " # apply match_individuals function to each item in the list\n", " matches_ind = match_individuals(\n", " df1 = spc_edited,\n", @@ -5535,7 +5488,7 @@ " df1_id = 'hid',\n", " df2_id = 'HouseholdID',\n", " matches_hh = matches_hh_level_sample_list[i],\n", - " show_progress= True)\n", + " show_progress= False)\n", "\n", " matches_list_of_dict.append(matches_ind)" ] @@ -5554,11 +5507,11 @@ "outputs": [], "source": [ "# random sample\n", - "with open('../data/interim/matching/matches_ind_level_categorical_random_sample.pkl', 'wb') as f:\n", + "with open(get_interim_path(\"matches_ind_level_categorical_random_sample.pkl\"), 'wb') as f:\n", " pkl.dump(matches_ind, f)\n", "\n", "# multiple random samples\n", - "with open('../data/interim/matching/matches_ind_level_categorical_random_sample_multiple.pkl', 'wb') as f:\n", + "with open(get_interim_path('matches_ind_level_categorical_random_sample_multiple.pkl'), 'wb') as f:\n", " pkl.dump(matches_list_of_dict, f)\n", "\n" ] @@ -5572,7 +5525,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -6002,7 +5955,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -6424,7 +6377,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -6484,7 +6437,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -6927,7 +6880,7 @@ "outputs": [], "source": [ "# save the file as a parquet file\n", - "spc_edited_copy.to_parquet('../data/interim/matching/spc_with_nts_trips.parquet')" + "spc_edited_copy.to_parquet(get_interim_path('spc_with_nts_trips.parquet'))" ] } ], @@ -6947,7 +6900,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.11.8" } }, "nbformat": 4, diff --git a/scripts/2_match_households_and_individuals.py b/scripts/2_match_households_and_individuals.py new file mode 100644 index 0000000..f607cab --- /dev/null +++ b/scripts/2_match_households_and_individuals.py @@ -0,0 +1,1312 @@ +#!/usr/bin/env python + +# # Adding activity chains to synthetic populations +# +# The purpose of this script is to match each individual in the synthetic population to a respondant from the [National Travel Survey (NTS)](https://beta.ukdataservice.ac.uk/datacatalogue/studies/study?id=5340). +# +# ### Methods +# +# We will try two methods +# +# 1. categorical matching: joining on relevant socio-demographic variables +# 2. statistical matching, as described in [An unconstrained statistical matching algorithm for combining individual and household level geo-specific census and survey data](https://doi.org/10.1016/j.compenvurbsys.2016.11.003). + +import os +import pickle as pkl + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from tqdm import tqdm, trange + +# from tqdm.notebook import trange +from acbm.matching import match_categorical, match_individuals +from acbm.preprocessing import ( + count_per_group, + # nts_filter_by_region, + nts_filter_by_year, + num_adult_child_hh, + transform_by_group, + truncate_values, +) + +pd.set_option("display.max_columns", None) + + +def get_interim_path(file_name: str, path: str = "../data/interim/matching/") -> str: + os.makedirs(path, exist_ok=True) + return f"{path}/{file_name}" + + +# ## Step 1: Load in the datasets + +# ### SPC + +# useful variables +region = "west-yorkshire" + + +# Read in the spc data (parquet format) +spc = pd.read_parquet("../data/external/spc_output/" + region + "_people_hh.parquet") +spc.head() + + +# select columns +spc = spc[ + [ + "id", + "household", + "pid_hs", + "msoa11cd", + "oa11cd", + "members", + "sic1d2007", + "sic2d2007", + "pwkstat", + "salary_yearly", + "salary_hourly", + "hid", + "accommodation_type", + "communal_type", + "num_rooms", + "central_heat", + "tenure", + "num_cars", + "sex", + "age_years", + "ethnicity", + "nssec8", + ] +] + + +# temporary reduction of the dataset for quick analysis +spc = spc.head(15000) +# spc = spc.head(500000) + + +# ### NTS +# +# The NTS is split up into multiple tables. We will load in the following tables: +# - individuals +# - households +# - trips + +path_psu = "../data/external/nts/UKDA-5340-tab/tab/psu_eul_2002-2022.tab" +psu = pd.read_csv(path_psu, sep="\t") + + +# #### Individuals + +path_individuals = "../data/external/nts/UKDA-5340-tab/tab/individual_eul_2002-2022.tab" +nts_individuals = pd.read_csv( + path_individuals, + sep="\t", + usecols=[ + "IndividualID", + "HouseholdID", + "PSUID", + "Age_B01ID", + "Age_B04ID", + "Sex_B01ID", + "OfPenAge_B01ID", + "HRPRelation_B01ID", + "EdAttn1_B01ID", + "EdAttn2_B01ID", + "EdAttn3_B01ID", + "OwnCycle_B01ID", # Owns a cycle + "DrivLic_B02ID", # type of driving license + "CarAccess_B01ID", + "IndIncome2002_B02ID", + "IndWkGOR_B02ID", # Region of usual place of work + "EcoStat_B02ID", # Working status of individual + "EcoStat_B03ID", + "NSSec_B03ID", # NSSEC high level breakdown + "SC_B01ID", # Social class of individual + "Stat_B01ID", # employee or self-employed + "WkMode_B01ID", # Usual means of travel to work + "WkHome_B01ID", # Work from home + "PossHom_B01ID", # Is it possible to work from home? + "OftHome_B01ID", # How often work from home + "TravSh_B01ID", # Usual mode from main food shopping trip + "SchDly_B01ID", # Daily school journey? + "SchTrav_B01ID", # Usual mode of travel to school + "SchAcc_B01ID", # IS school trip accompanied by an adult? + "FdShp_B01ID", # How do you usually carry ot main food shop (go to shop, online etc) + ], +) + + +# #### Households + +path_households = "../data/external/nts/UKDA-5340-tab/tab/household_eul_2002-2022.tab" +nts_households = pd.read_csv( + path_households, + sep="\t", + usecols=[ + "HouseholdID", + "PSUID", + "HHIncome2002_B02ID", + "AddressType_B01ID", # type of house + "Ten1_B02ID", # type of tenure + "HHoldNumAdults", # total no. of adults in household + "HHoldNumChildren", # total no. of children in household + "HHoldNumPeople", # total no. of people in household + "NumLicHolders", # total no. of driving license holders in household + "HHoldEmploy_B01ID", # number of employed in household + "NumBike", # no. of bikes + "NumCar", # no. of cars + "NumVanLorry", # no. of vans or lorries + "NumMCycle", # no. of motorcycles + "WalkBus_B01ID", # walk time from house to nearest bus stop + "Getbus_B01ID", # frequency of bus service + "WalkRail_B01ID", # walk time from house to nearest rail station + "JTimeHosp_B01ID", # journey time to nearest hospital + "DVShop_B01ID", # person no. for main food shooper in hh + "Settlement2011EW_B03ID", # ONS Urban/Rural: 2 categories + "Settlement2011EW_B04ID", # ONS Urban/Rural: 3 categories + "HHoldOAClass2011_B03ID", # Census 2011 OA Classification + "HRPWorkStat_B02ID", # HH ref person working status + "HRPSEGWorkStat_B01ID", # HH ref person socio economic group for active workers + "W0", # Unweighted interview sample + "W1", # Unweighted diary sample + "W2", # Weighted diary sample + "W3", # Weighted interview sample + ], +) + + +# #### Trips + +path_trips = "../data/external/nts/UKDA-5340-tab/tab/trip_eul_2002-2022.tab" +nts_trips = pd.read_csv( + path_trips, + sep="\t", + usecols=[ + "TripID", + "DayID", + "IndividualID", + "HouseholdID", + "PSUID", + "PersNo", + "TravDay", + "JourSeq", + "ShortWalkTrip_B01ID", + "NumStages", + "MainMode_B03ID", + "MainMode_B04ID", + "TripPurpFrom_B01ID", + "TripPurpTo_B01ID", + "TripPurpose_B04ID", + "TripStart", + "TripEnd", + "TripTotalTime", + "TripTravTime", + "TripDisIncSW", + "TripDisExSW", + "TripOrigGOR_B02ID", + "TripDestGOR_B02ID", + "W5", + "W5xHH", + ], +) + + +# #### Filter by year +# +# We will filter the NTS data to only include data from specific years. We can choose only 1 year, or multiple years to increase our sample size and the likelihood of a match with the spc + +years = [2019, 2021, 2022] + +nts_individuals = nts_filter_by_year(nts_individuals, psu, years) +nts_households = nts_filter_by_year(nts_households, psu, years) +nts_trips = nts_filter_by_year(nts_trips, psu, years) + + +# #### Filter by geography +# +# I will not do this for categorical matching, as it reduces the sample significantly, and leads to more spc households not being matched + +# regions = ['Yorkshire and the Humber', 'North West'] + +# nts_individuals = nts_filter_by_region(nts_individuals, psu, regions) +# nts_households = nts_filter_by_region(nts_households, psu, regions) +# nts_trips = nts_filter_by_region(nts_trips, psu, regions) + + +# Create dictionaries of key value pairs + +""" +guide to the dictionaries: + +_nts_hh: from NTS households table +_nts_ind: from NTS individuals table +_spc: from SPC + +""" + + +# ---------- NTS + +# Create a dictionary for the HHIncome2002_B02ID column +income_dict_nts_hh = { + "1": "0-25k", + "2": "25k-50k", + "3": "50k+", + "-8": "NA", + # should be -10, but + # it could be a typo in household_eul_2002-2022_ukda_data_dictionary + "-1": "DEAD", +} + +# Create a dictionary for the HHoldEmploy_B01ID column +# (PT: Part time, FT: Full time) +employment_dict_nts_hh = { + "1": "None", + "2": "0 FT, 1 PT", + "3": "1 FT, 0 PT", + "4": "0 FT, 2 PT", + "5": "1 FT, 1 PT", + "6": "2 FT, 0 PT", + "7": "1 FT, 2+ PT", + "8": "2 FT, 1+ PT", + "9": "0 FT, 3+ PT", + "10": "3+ FT, 0 PT", + "11": "3+ FT, 1+ PT", + "-8": "NA", + "-10": "DEAD", +} + +# Create a dictionary for the Ten1_B02ID column +tenure_dict_nts_hh = { + "1": "Owns / buying", + "2": "Rents", + "3": "Other (including rent free)", + "-8": "NA", + "-9": "DNA", + "-10": "DEAD", +} + + +# ---------- SPC + + +# create a dictionary for the pwkstat column +employment_dict_spc = { + "0": "Not applicable (age < 16)", + "1": "Employee FT", + "2": "Employee PT", + "3": "Employee unspecified", + "4": "Self-employed", + "5": "Unemployed", + "6": "Retired", + "7": "Homemaker/Maternal leave", + "8": "Student", + "9": "Long term sickness/disability", + "10": "Other", +} + + +# Create a dictionary for the tenure column +tenure_dict_spc = { + "1": "Owned: Owned outright", + "2": "Owned: Owned with a mortgage or loan or shared ownership", + "3": "Rented or living rent free: Total", + "4": "Rented: Social rented", + "5": "Rented: Private rented or living rent free", + "-8": "NA", + "-9": "DNA", + "-10": "DEAD", +} + + +# Combine the dictionaries into a dictionary of dictionaries + +dict_nts = { + "HHIncome2002_B02ID": income_dict_nts_hh, + "HHoldEmploy_B01ID": employment_dict_nts_hh, + "Ten1_B02ID": tenure_dict_nts_hh, +} + +dict_spc = {"pwkstat": employment_dict_spc, "tenure": tenure_dict_spc} + + +# ## Step 2: Decide on matching variables +# +# We need to identify the socio-demographic characteristics that we will match on. The schema for the synthetic population can be found [here](https://github.com/alan-turing-institute/uatk-spc/blob/main/synthpop.proto). +# +# Matching between the SPC and the NTS will happen in two steps: +# +# 1. Match at the household level +# 2. Match individuals within the household +# +# ### Household level matching +# +# | Variable | Name (NTS) | Name (SPC) | Transformation (NTS) | Transformation (SPC) | +# | ------------------ | -------------------- | --------------- | -------------------- | -------------------- | +# | Household income | `HHIncome2002_BO2ID` | `salary_yearly` | NA | Group by household ID and sum | +# | Number of adults | `HHoldNumAdults` | `age_years` | NA | Group by household ID and count | +# | Number of children | `HHoldNumChildren` | `age_years` | NA | Group by household ID and count | +# | Employment status | `HHoldEmploy_B01ID` | `pwkstat` | NA | a) match to NTS categories. b) group by household ID | +# | Car ownership | `NumCar` | `num_cars` | SPC is capped at 2. We change all entries > 2 to 2 | NA | +# +# Other columns to match in the future +# | Variable | Name (NTS) | Name (SPC) | Transformation (NTS) | Transformation (SPC) | +# | ------------------ | -------------------- | --------------- | -------------------- | -------------------- | +# | Type of tenancy | `Ten1_B02ID` | `tenure` | ?? | ?? | +# | Urban-Rural classification of residence | `Settlement2011EW_B04ID` | NA | NA | Spatial join between [layer](https://www.gov.uk/government/collections/rural-urban-classification) and SPC | +# +# + +# ### 2.1 Edit SPC columns + +# #### Household Income +# +# Edit the spc so that we have household income as well as individual income. + +# add household income column for SPC +spc_edited = transform_by_group( + data=spc, + group_col="household", + transform_col="salary_yearly", + new_col="salary_yearly_hh", + transformation_type="sum", +) + + +# Check number of individuals and households with reported salaries + +# histogram for individuals and households (include NAs as 0) +fig, ax = plt.subplots(1, 2, figsize=(12, 6), sharey=True) +ax[0].hist(spc_edited["salary_yearly"].fillna(0), bins=30) +ax[0].set_title("Salary yearly (Individuals)") +ax[0].set_xlabel("Salary yearly") +ax[0].set_ylabel("Frequency") +ax[1].hist(spc_edited["salary_yearly_hh"].fillna(0), bins=30) +ax[1].set_title("Salary yearly (Households)") +ax[1].set_xlabel("Salary yearly") + + +# statistics + +# print the total number of rows in the spc. Add a message "Values =" +print("Individuals in SPC =", spc_edited.shape[0]) +# number of individuals without reported income +print("Individuals without reported income =", spc_edited["salary_yearly"].isna().sum()) +# % of individuals with reported income (salary_yearly not equal NA) +print( + "% of individuals with reported income =", + round((spc_edited["salary_yearly"].count() / spc_edited.shape[0]) * 100, 1), +) +print( + "Individuals with reported income: 0 =", + spc_edited[spc_edited["salary_yearly"] == 0].shape[0], +) + + +# print the total number of households +print("Households in SPC =", spc_edited["household"].nunique()) +# number of households without reported income (salary yearly_hh = 0) +print( + "Households without reported income =", + spc_edited[spc_edited["salary_yearly_hh"] == 0].shape[0], +) +# # % of households with reported income (salary_yearly not equal NA) +print( + "% of households with reported income =", + round( + ( + spc_edited[spc_edited["salary_yearly_hh"] == 0].shape[0] + / spc_edited["household"].nunique() + ) + * 100, + 1, + ), +) +print( + "Households with reported income: 0 =", + spc_edited[spc_edited["salary_yearly_hh"] == 0].shape[0], +) + + +# --- Recode column so that it matches the reported NTS values (Use income_dict_nts_hh dictionary for reference) + +# Define the bins (first ) +bins = [0, 24999, 49999, np.inf] +# Define the labels for the bins +labels = [1, 2, 3] + +spc_edited = spc_edited.copy() + +spc_edited["salary_yearly_hh_cat"] = ( + pd.cut( + spc_edited["salary_yearly_hh"], bins=bins, labels=labels, include_lowest=True + ) + .astype("str") + .astype("float") +) + + +# replace NA values with -8 (to be consistent with NTS) +spc_edited["salary_yearly_hh_cat"] = spc_edited["salary_yearly_hh_cat"].fillna(-8) + +# Convert the column to int +spc_edited["salary_yearly_hh_cat"] = spc_edited["salary_yearly_hh_cat"].astype("int") + + +# If we compare household income from the SPC and the NTS, we find that the SPC has many more households with no reported income (-8). This will create an issue when matching using household income + +# bar plot showing spc_edited.salary_yearly_hh_cat and nts_households.HHIncome2002_B02ID side by side +fig, ax = plt.subplots(1, 2, figsize=(12, 6), sharey=True) +ax[0].bar( + spc_edited["salary_yearly_hh_cat"].value_counts().index, + spc_edited["salary_yearly_hh_cat"].value_counts().values, +) +ax[0].set_title("SPC") +ax[0].set_xlabel("Income Bracket - Household level") +ax[0].set_ylabel("No of Households") +ax[1].bar( + nts_households["HHIncome2002_B02ID"].value_counts().index, + nts_households["HHIncome2002_B02ID"].value_counts().values, +) +ax[1].set_title("NTS") +ax[1].set_xlabel("Income Bracket - Household level") + + +# same as above but (%) +fig, ax = plt.subplots(1, 2, figsize=(12, 6), sharey=True) +ax[0].bar( + spc_edited["salary_yearly_hh_cat"].value_counts(normalize=True).index, + spc_edited["salary_yearly_hh_cat"].value_counts(normalize=True).values, +) +ax[0].set_title("SPC") +ax[0].set_xlabel("Income Bracket - Household level") +ax[0].set_ylabel("Fraction of Households") +ax[1].bar( + nts_households["HHIncome2002_B02ID"].value_counts(normalize=True).index, + nts_households["HHIncome2002_B02ID"].value_counts(normalize=True).values, +) +ax[1].set_title("NTS") +ax[1].set_xlabel("Income Bracket - Household level") + + +# get the % of households in each income bracket for the nts +nts_households["HHIncome2002_B02ID"].value_counts(normalize=True) * 100 + + +# #### Household Composition (No. of Adults / Children) + +# Number of adults and children in the household + +spc_edited = num_adult_child_hh( + data=spc_edited, group_col="household", age_col="age_years" +) + + +# #### Employment Status + +# Employment status + +# check the colums values from our dictionary +dict_spc["pwkstat"], dict_nts["HHoldEmploy_B01ID"] + + +# The NTS only reports the number of Full time and Part time employees for each household. For the SPC we also need to get the number of full time and part time workers for each household. +# +# Step 1: Create a column for Full time and a column for Part time + +# We will only use '1' and '2' for the employment status + +counts_df = count_per_group( + df=spc_edited, + group_col="household", + count_col="pwkstat", + values=[1, 2], + value_names=["pwkstat_FT_hh", "pwkstat_PT_hh"], +) + +counts_df.head(10) + + +# Create a column that matches the NTS categories (m FT, n PT) + +# We want to match the SPC values to the NTS +dict_nts["HHoldEmploy_B01ID"] +""" +{ + '1': 'None', + '2': '0 FT, 1 PT', + '3': '1 FT, 0 PT', + '4': '0 FT, 2 PT', + '5': '1 FT, 1 PT', + '6': '2 FT, 0 PT', + '7': '1 FT, 2+ PT', + '8': '2 FT, 1+ PT', + '9': '0 FT, 3+ PT', + '10': '3+ FT, 0 PT', + '11': '3+ FT, 1+ PT', + '-8': 'NA', + '-10': 'DEAD'} + """ + +# 1) Match each row to the NTS + +# Define the conditions and outputs. +# We are using the keys in dict_nts['HHoldEmploy_B01ID'] as reference +conditions = [ + (counts_df["pwkstat_FT_hh"] == 0) & (counts_df["pwkstat_PT_hh"] == 0), + (counts_df["pwkstat_FT_hh"] == 0) & (counts_df["pwkstat_PT_hh"] == 1), + (counts_df["pwkstat_FT_hh"] == 1) & (counts_df["pwkstat_PT_hh"] == 0), + (counts_df["pwkstat_FT_hh"] == 0) & (counts_df["pwkstat_PT_hh"] == 2), + (counts_df["pwkstat_FT_hh"] == 1) & (counts_df["pwkstat_PT_hh"] == 1), + (counts_df["pwkstat_FT_hh"] == 2) & (counts_df["pwkstat_PT_hh"] == 0), + (counts_df["pwkstat_FT_hh"] == 1) & (counts_df["pwkstat_PT_hh"] >= 2), + (counts_df["pwkstat_FT_hh"] == 2) & (counts_df["pwkstat_PT_hh"] >= 1), + (counts_df["pwkstat_FT_hh"] == 0) & (counts_df["pwkstat_PT_hh"] >= 3), + (counts_df["pwkstat_FT_hh"] >= 3) & (counts_df["pwkstat_PT_hh"] == 0), + (counts_df["pwkstat_FT_hh"] >= 3) & (counts_df["pwkstat_PT_hh"] >= 1), +] + +# Define the corresponding outputs based on dict_nts['HHoldEmploy_B01ID] +outputs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + +# Create a new column using np.select +counts_df["pwkstat_NTS_match"] = np.select(conditions, outputs, default=-8) + + +# 2) merge back onto the spc +spc_edited = spc_edited.merge(counts_df, left_on="household", right_index=True) + +# check the output +spc_edited[ + ["household", "pwkstat", "pwkstat_FT_hh", "pwkstat_PT_hh", "pwkstat_NTS_match"] +].head(10) + + +# bar plot of counts_df['pwkstat_NTS_match'] and nts_households['HHoldEmploy_B01ID'] +fig, ax = plt.subplots(1, 2, figsize=(12, 6)) +ax[0].bar( + counts_df["pwkstat_NTS_match"].value_counts().index, + counts_df["pwkstat_NTS_match"].value_counts().values, +) +ax[0].set_title("SPC") +ax[0].set_xlabel("Employment status - Household level") +ax[0].set_ylabel("Frequency") +ax[1].bar( + nts_households["HHoldEmploy_B01ID"].value_counts().index, + nts_households["HHoldEmploy_B01ID"].value_counts().values, +) +ax[1].set_title("NTS") +ax[1].set_xlabel("Employment status - Household level") + + +# same as above but percentages +fig, ax = plt.subplots(1, 2, figsize=(12, 6)) +ax[0].bar( + counts_df["pwkstat_NTS_match"].value_counts().index, + counts_df["pwkstat_NTS_match"].value_counts(normalize=True).values, +) +ax[0].set_title("SPC") +ax[0].set_xlabel("Employment status - Household level") +ax[0].set_ylabel("Frequency (normalized)") +ax[1].bar( + nts_households["HHoldEmploy_B01ID"].value_counts().index, + nts_households["HHoldEmploy_B01ID"].value_counts(normalize=True).values, +) +ax[1].set_title("NTS") +ax[1].set_xlabel("Employment status - Household level") + + +# #### Urban Rural Classification +# +# We use the 2011 rural urban classification to match the SPC to the NTS. The NTS has 2 columns that we can use to match to the SPC: `Settlement2011EW_B03ID` and `Settlement2011EW_B04ID`. The `Settlement2011EW_B03ID` column is more general (urban / rural only), while the `Settlement2011EW_B04ID` column is more specific. We stick to the more general column for now. + +# read the rural urban classification data +rural_urban = pd.read_csv("../data/external/census_2011_rural_urban.csv", sep=",") + +# merge the rural_urban data with the spc +spc_edited = spc_edited.merge( + rural_urban[["OA11CD", "RUC11", "RUC11CD"]], left_on="oa11cd", right_on="OA11CD" +) +spc_edited.head(5) + + +# create dictionary from the NTS `Settlement2011EW_B03ID` column +Settlement2011EW_B03ID_nts_hh = { + "1": "Urban", + "2": "Rural", + "3": "Scotland", + "-8": "NA", + "-10": "DEAD", +} + +Settlement2011EW_B04ID_nts_hh = { + "1": "Urban Conurbation", + "2": "Urban City and Town", + "3": "Rural Town and Fringe", + "4": "Rural Village, Hamlet and Isolated Dwellings", + "5": "Scotland", + "-8": "NA", + "-10": "DEAD", +} + + +census_2011_to_nts_B03ID = { + "Urban major conurbation": "Urban", + "Urban minor conurbation": "Urban", + "Urban city and town": "Urban", + "Urban city and town in a sparse setting": "Urban", + "Rural town and fringe": "Rural", + "Rural town and fringe in a sparse setting": "Rural", + "Rural village": "Rural", + "Rural village in a sparse setting": "Rural", + "Rural hamlets and isolated dwellings": "Rural", + "Rural hamlets and isolated dwellings in a sparse setting": "Rural", +} + +census_2011_to_nts_B04ID = { + "Urban major conurbation": "Urban Conurbation", + "Urban minor conurbation": "Urban Conurbation", + "Urban city and town": "Urban City and Town", + "Urban city and town in a sparse setting": "Urban City and Town", + "Rural town and fringe": "Rural Town and Fringe", + "Rural town and fringe in a sparse setting": "Rural Town and Fringe", + "Rural village": "Rural Village, Hamlet and Isolated Dwellings", + "Rural village in a sparse setting": "Rural Village, Hamlet and Isolated Dwellings", + "Rural hamlets and isolated dwellings": "Rural Village, Hamlet and Isolated Dwellings", + "Rural hamlets and isolated dwellings in a sparse setting": "Rural Village, Hamlet and Isolated Dwellings", +} + + +# add the nts Settlement2011EW_B03ID and Settlement2011EW_B04ID columns to the spc +spc_edited["Settlement2011EW_B03ID_spc"] = spc_edited["RUC11"].map( + census_2011_to_nts_B03ID +) +spc_edited["Settlement2011EW_B04ID_spc"] = spc_edited["RUC11"].map( + census_2011_to_nts_B04ID +) +spc_edited.head() + +# add the keys from nts_Settlement2011EW_B03ID and nts_Settlement2011EW_B04ID to the spc based on above mappings + +# reverse the dictionaries +Settlement2011EW_B03ID_nts_rev = { + v: k for k, v in Settlement2011EW_B03ID_nts_hh.items() +} +# map the values +spc_edited["Settlement2011EW_B03ID_spc_CD"] = ( + spc_edited["Settlement2011EW_B03ID_spc"] + .map(Settlement2011EW_B03ID_nts_rev) + .astype("int") +) + +Settlement2011EW_B04ID_nts_rev = { + v: k for k, v in Settlement2011EW_B04ID_nts_hh.items() +} +spc_edited["Settlement2011EW_B04ID_spc_CD"] = ( + spc_edited["Settlement2011EW_B04ID_spc"] + .map(Settlement2011EW_B04ID_nts_rev) + .astype("int") +) +spc_edited.head() + + +# ### 2.2 Edit NTS columns + +# #### Number of people of pension age + +nts_pensioners = count_per_group( + df=nts_individuals, + group_col="HouseholdID", + count_col="OfPenAge_B01ID", + values=[1], + value_names=["num_pension_age_nts"], +) + +nts_pensioners.head() + +# join onto the nts household df +nts_households = nts_households.merge( + nts_pensioners, left_on="HouseholdID", right_index=True, how="left" +) + + +# #### Number of cars +# +# - `SPC.num_cars` only has values [0, 1, 2]. 2 is for all households with 2 or more cars +# - `NTS.NumCar` is more detailed. It has the actual value of the number of cars. We will cap this at 2. + +# Create a new column in NTS +nts_households.loc[:, "NumCar_SPC_match"] = nts_households["NumCar"].apply( + truncate_values, upper=2 +) + +nts_households[["NumCar", "NumCar_SPC_match"]].head(20) + + +# #### Type of tenancy +# +# Breakdown between NTS and SPC is different. + +dict_nts["Ten1_B02ID"], dict_spc["tenure"] + + +# Create dictionaries to map tenure onto the spc and nts dfs + +# Dictionary showing how we want the final columns to look like +tenure_dict_nts_spc = { + 1: "Owned", + 2: "Rented or rent free", + -8: "NA", + -9: "DNA", + -10: "DEAD", +} + +# Matching NTS to tenure_dict_nts_spc + +# Create a new dictionary for matching +matching_dict_nts_tenure = {1: 1, 2: 2, 3: 2} + +matching_dict_spc_tenure = { + 1: 1, #'Owned: Owned outright' : 'Owned' + 2: 1, #'Owned: Owned with a mortgage or loan or shared ownership', : 'Owned' + 3: 2, #'Rented or living rent free: Total', : 'Rented or rent free' + 4: 2, #'Rented: Social rented', : 'Rented or rent free' + 5: 2, #'Rented: Private rented or living rent free', : 'Rented or rent free' +} + + +# map dictionaries to create comparable columns + +# Create a new column in nts_households +nts_households["tenure_nts_for_matching"] = ( + nts_households["Ten1_B02ID"] + .map(matching_dict_nts_tenure) # map the values to the new dictionary + .fillna(nts_households["Ten1_B02ID"]) +) # fill the NaNs with the original values + +# Create a new column in spc +spc_edited["tenure_spc_for_matching"] = ( + spc_edited["tenure"] + .map(matching_dict_spc_tenure) # map the values to the new dictionary + .fillna(spc_edited["tenure"]) +) # fill the NaNs with the original values + + +# ## Step 3: Matching at Household Level +# +# Now that we've prepared all the columns, we can start matching. + +# ### 3.1 Categorical matching +# +# We will match on (a subset of) the following columns: +# +# | Matching variable | NTS column | SPC column | +# | ------------------| ---------- | ---------- | +# | Household income | `HHIncome2002_BO2ID` | `salary_yearly_hh_cat` | +# | Number of adults | `HHoldNumAdults` | `num_adults` | +# | Number of children | `HHoldNumChildren` | `num_children` | +# | Employment status | `HHoldEmploy_B01ID` | `pwkstat_NTS_match` | +# | Car ownership | `NumCar_SPC_match` | `num_cars` | +# | Type of tenancy | `tenure_nts_for_matching` | `tenure_spc_for_matching` | +# | Rural/Urban Classification | `Settlement2011EW_B03ID` | `Settlement2011EW_B03ID_spc_CD` | + +# Prepare SPC df for matching + +# Select multiple columns +spc_matching = spc_edited[ + [ + "hid", + "salary_yearly_hh_cat", + "num_adults", + "num_children", + "num_pension_age", + "pwkstat_NTS_match", + "num_cars", + "tenure_spc_for_matching", + "Settlement2011EW_B03ID_spc_CD", + "Settlement2011EW_B04ID_spc_CD", + ] +] + +# edit the df so that we have one row per hid +spc_matching = spc_matching.drop_duplicates(subset="hid") + +spc_matching.head(10) + + +# Prepare NTS df for matching + +nts_matching = nts_households[ + [ + "HouseholdID", + "HHIncome2002_B02ID", + "HHoldNumAdults", + "HHoldNumChildren", + "num_pension_age_nts", + "HHoldEmploy_B01ID", + "NumCar_SPC_match", + "tenure_nts_for_matching", + "Settlement2011EW_B03ID", + "Settlement2011EW_B04ID", + ] +] + +nts_matching.head(10) + + +# Dictionary of matching columns. We extract column names from this dictioary when matching on a subset of the columns + +# column_names (keys) for the dictionary +matching_ids = [ + "household_id", + "yearly_income", + "number_adults", + "number_children", + "num_pension_age", + "employment_status", + "number_cars", + "tenure_status", + "rural_urban_2_categories", + "rural_urban_4_categories", +] + +# i want the value to be a list with spc_matching and nts_matching +matching_dfs_dict = { + column_name: [spc_value, nts_value] + for column_name, spc_value, nts_value in zip( + matching_ids, spc_matching, nts_matching + ) +} +matching_dfs_dict + + +# #### Match on a subset of columns (exclude salary, tenure, and employment status) +# +# To decide on the subset of columns to match on, we explore the results from different combinations. This is shown in a separate notebook: `2.1_sandbox-match_households.ipynb`. + +# columns for matching +keys = [ + "number_adults", + "number_children", + "num_pension_age", + "number_cars", + "rural_urban_2_categories", +] +# extract equivalent column names from dictionary +spc_cols = [matching_dfs_dict[key][0] for key in keys] +nts_cols = [matching_dfs_dict[key][1] for key in keys] + + +# Match + +matches_hh_level = match_categorical( + df_pop=spc_matching, + df_pop_cols=spc_cols, + df_pop_id="hid", + df_sample=nts_matching, + df_sample_cols=nts_cols, + df_sample_id="HouseholdID", + chunk_size=50000, + show_progress=True, +) + + +# Plot number of matches for each SPC household + +# Get the counts of each key +counts = [len(v) for v in matches_hh_level.values()] + +# Create the histogram +plt.hist(counts, bins="auto") # 'auto' automatically determines the number of bins + +plt.title("Categorical (Exact) Matching - Household Level") +plt.xlabel("No. of Households in SPC") +plt.ylabel("No. of matching households in NTS") + + +# Number of unmatched households + +# no. of keys where value is na +na_count = sum([1 for v in matches_hh_level.values() if pd.isna(v).all()]) + + +print(na_count, "households in the SPC had no match") +print( + round((na_count / len(matches_hh_level)) * 100, 1), + "% of households in the SPC had no match", +) + + +# print the 6th key, value in the matches_hh_level dictionary +print(list(matches_hh_level.items())[90]) + + +## add matches_hh_level as a column in spc_edited +spc_edited["nts_hh_id"] = spc_edited["hid"].map(matches_hh_level) + +spc_edited.head(5) + + +# ### Random Sampling from matched households +# +# In categorical matching, many households in the SPC are matched to more than 1 household in the NTS. Which household to choose? We do random sampling + +# for each key in the dictionary, sample 1 of the values associated with it and store it in a new dictionary + +""" +- iterate over each key-value pair in the matches_hh_result dictionary. +- For each key-value pair, use np.random.choice(value) to randomly select +one item from the list of values associated with the current key. +- create a new dictionary hid_to_HouseholdID_sample where each key from the +original dictionary is associated with one randomly selected value from the +original list of values. + +""" +matches_hh_level_sample = { + key: np.random.choice(value) for key, value in matches_hh_level.items() +} + +# remove items in list where value is nan +matches_hh_level_sample = { + key: value for key, value in matches_hh_level_sample.items() if not pd.isna(value) +} + + +print(list(matches_hh_level_sample.items())[568]) + + +# Multiple matches in case we want to try stochastic runs + +# same logic as cell above, but repeat it multiple times and store each result as a separate dictionary in a list +matches_hh_level_sample_list = [ + {key: np.random.choice(value) for key, value in matches_hh_level.items()} + for i in range(100) +] + +# matches_hh_level_sample_list + + +# Save results + +# random sample +with open( + get_interim_path("matches_hh_level_categorical_random_sample.pkl"), "wb" +) as f: + pkl.dump(matches_hh_level_sample, f) + +# multiple random samples +with open( + get_interim_path("matches_hh_level_categorical_random_sample_multiple.pkl"), "wb" +) as f: + pkl.dump(matches_hh_level_sample_list, f) + + +# Do the same at the df level. Add nts_hh_id_sample column to the spc df + +# # for each hid in spc_edited, sample a value from the nts_hh_id col. +# spc_edited['nts_hh_id_sample'] = spc_edited['nts_hh_id'].apply(lambda x: np.random.choice(x) if x is not np.nan else np.nan) +# # All rows with the same 'hid' should have the same value for 'nts_hh_id_sample'. Group by hid and assign the first value to all rows in the group +# spc_edited['nts_hh_id_sample'] = spc_edited.groupby('hid')['nts_hh_id_sample'].transform('first') + +# spc_edited.head(10) + + +# ## Step 4: Matching at Individual Level +# +# 1) Prepare columns for matching - they should all be numerical +# a) age_years in the SPC -> Convert from actual age to age brackets from the dictionary +# 2) Filter to specific household +# 3) Nearest neighbor merge without replacement (edit while function below) +# +# + +nts_individuals.head() + + +# Create an 'age' column in the SPC that matches the NTS categories + +# create a dictionary for reference on how the labels for "Age_B04ID" match the actual age brackets + +# dict_nts_ind_age = {-10: 'DEAD', +# -8: 'NA', +# 1: '0-4', +# 2: '5-10', +# 3: '11-16', +# 4: '17-20', +# 5: '21-29', +# 6: '30-39', +# 7: '40-49', +# 8: '50-59', +# 9: '60+' +# } + + +# Define the bins and labels based on dict_nts_ind_age +bins = [0, 4, 10, 16, 20, 29, 39, 49, 59, np.inf] +labels = [1, 2, 3, 4, 5, 6, 7, 8, 9] + +# Create a new column in spc_edited that maps the age_years to the keys of dict_nts_ind_age +spc_edited["age_group"] = ( + pd.cut(spc_edited["age_years"], bins=bins, labels=labels).astype("int").fillna(-8) +) + + +# rename nts columns in preparation for matching + +nts_individuals.rename( + columns={"Age_B04ID": "age_group", "Sex_B01ID": "sex"}, inplace=True +) + + +# PSM matching using internal match_individuals function + +matches_ind = match_individuals( + df1=spc_edited, + df2=nts_individuals, + matching_columns=["age_group", "sex"], + df1_id="hid", + df2_id="HouseholdID", + matches_hh=matches_hh_level_sample, + show_progress=False, +) + +# matches_ind + + +import itertools + +# Output the first n items of the dictionary +dict(itertools.islice(matches_ind.items(), 10)) + + +# Add matches_ind values to spc_edited using map +spc_edited["nts_ind_id"] = spc_edited.index.map(matches_ind) + +# add the nts_individuals.IndividualID to spc_edit. The current nts_ind_id is the row index of nts_individuals +spc_edited["nts_ind_id"] = spc_edited["nts_ind_id"].map(nts_individuals["IndividualID"]) + + +spc_edited.head(5) + + +# ### Check that matching is working as intended + +# ids = [99, 100, 101, 102] +ids = [109, 110, 111, 112, 113, 114] + + +spc_rows = [] +nts_rows = [] + +for id in ids: + # get spc and nts values for position id + spc_ind = list(matches_ind.keys())[id] + nts_ind = matches_ind[list(matches_ind.keys())[id]] + + # get rows from spc and nts dfs that match spc_ind and nts_ind + spc_row = spc_edited.loc[spc_ind] + nts_row = nts_individuals.loc[nts_ind] + + # convert to df and append + spc_rows.append(spc_row.to_frame().transpose()) + nts_rows.append(nts_row.to_frame().transpose()) +# convert individual dfs to one df +spc_rows_df = pd.concat(spc_rows) +nts_rows_df = pd.concat(nts_rows) + + +spc_rows_df + + +from IPython.display import display + +display( + spc_rows_df[ + [ + "id", + "household", + "pwkstat", + "salary_yearly", + "salary_hourly", + "hid", + "tenure", + "num_cars", + "sex", + "age_years", + "age_group", + "nssec8", + "salary_yearly_hh", + "salary_yearly_hh_cat", + "is_adult", + "is_child", + "is_pension_age", + "pwkstat_FT_hh", + "pwkstat_PT_hh", + "pwkstat_NTS_match", + "Settlement2011EW_B03ID_spc", + "Settlement2011EW_B04ID_spc", + "Settlement2011EW_B03ID_spc_CD", + "Settlement2011EW_B04ID_spc_CD", + ] + ] +) + +display( + nts_rows_df[ + [ + "IndividualID", + "HouseholdID", + "Age_B01ID", + "age_group", + "sex", + "OfPenAge_B01ID", + "IndIncome2002_B02ID", + ] + ] +) + + +# ### Match on multiple samples + +# In household level matching, some households in the SPC are matched to multiple households in the NTS. To have 1:1 match between the SPC and NTS, we randomly sample from the list of matches +# +# The random sample produces different results each time. In `matches_hh_level_sample_list` we did many iterations of random sampling to produce multiple results of household matching, and saved the output in a list of dictionaries. +# +# Here, we iterate over the list and do individual matching for each item. The output is a list of n dictionaries, each of which could be used as a synthetic population matched to the NTS + +# iterate over all items in the matches_hh_level_sample_list and apply the match_individuals function to each + +matches_list_of_dict = [] +for i in trange(len(matches_hh_level_sample_list)): + # apply match_individuals function to each item in the list + matches_ind = match_individuals( + df1=spc_edited, + df2=nts_individuals, + matching_columns=["age_group", "sex"], + df1_id="hid", + df2_id="HouseholdID", + matches_hh=matches_hh_level_sample_list[i], + show_progress=False, + ) + + matches_list_of_dict.append(matches_ind) + + +# Save the results of individual matching + +# random sample +with open( + get_interim_path("matches_ind_level_categorical_random_sample.pkl"), "wb" +) as f: + pkl.dump(matches_ind, f) + +# multiple random samples +with open( + get_interim_path("matches_ind_level_categorical_random_sample_multiple.pkl"), "wb" +) as f: + pkl.dump(matches_list_of_dict, f) + + +# ### Add trip data +# + +nts_trips.head(10) + + +# Rename columns and map actual modes and trip purposes to the trip table. +# +# Code taken from: https://github.com/arup-group/pam/blob/main/examples/07_travel_survey_to_matsim.ipynb + +nts_trips = nts_trips.rename( + columns={ # rename data + "JourSeq": "seq", + "TripOrigGOR_B02ID": "ozone", + "TripDestGOR_B02ID": "dzone", + "TripPurpFrom_B01ID": "oact", + "TripPurpTo_B01ID": "dact", + "MainMode_B04ID": "mode", + "TripStart": "tst", + "TripEnd": "tet", + } +) + +nts_trips.head(10) + + +mode_mapping = { + 1: "walk", + 2: "bike", + 3: "car", #'Car/van driver' + 4: "car", #'Car/van driver' + 5: "motorcycle", #'Motorcycle', + 6: "car", #'Other private transport', + 7: "pt", # Bus in London', + 8: "pt", #'Other local bus', + 9: "pt", #'Non-local bus', + 10: "pt", #'London Underground', + 11: "pt", #'Surface Rail', + 12: "car", #'Taxi/minicab', + 13: "pt", #'Other public transport', + -10: "DEAD", + -8: "NA", +} + +purp_mapping = { + 1: "work", + 2: "work", #'In course of work', + 3: "education", + 4: "shop", #'Food shopping', + 5: "shop", #'Non food shopping', + 6: "medical", #'Personal business medical', + 7: "other", #'Personal business eat/drink', + 8: "other", #'Personal business other', + 9: "other", #'Eat/drink with friends', + 10: "visit", #'Visit friends', + 11: "other", #'Other social', + 12: "other", #'Entertain/ public activity', + 13: "other", #'Sport: participate', + 14: "home", #'Holiday: base', + 15: "other", #'Day trip/just walk', + 16: "other", #'Other non-escort', + 17: "escort", #'Escort home', + 18: "escort", #'Escort work', + 19: "escort", #'Escort in course of work', + 20: "escort", #'Escort education', + 21: "escort", #'Escort shopping/personal business', + 22: "escort", #'Other escort', + 23: "home", #'Home', + -10: "DEAD", + -8: "NA", +} + + +nts_trips["mode"] = nts_trips["mode"].map(mode_mapping) + +nts_trips["oact"] = nts_trips["oact"].map(purp_mapping) + +nts_trips["dact"] = nts_trips["dact"].map(purp_mapping) + + +nts_trips.head(10) + + +# create an independant copy of spc_edited +spc_edited_copy = spc_edited.copy() + +# replace non-finite values with a default value +spc_edited_copy["nts_ind_id"].fillna(-1, inplace=True) +# convert the nts_ind_id column to int for merging +spc_edited_copy["nts_ind_id"] = spc_edited_copy["nts_ind_id"].astype(int) + +# merge the copy with nts_trips using IndividualID +spc_edited_copy = spc_edited_copy.merge( + nts_trips, left_on="nts_ind_id", right_on="IndividualID", how="left" +) + + +spc_edited_copy.head(10) + + +# save the file as a parquet file +spc_edited_copy.to_parquet(get_interim_path("spc_with_nts_trips.parquet")) From 9ad7fae5ef23912f5f05ec4d832ec01b02ce285a Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Mon, 29 Apr 2024 11:20:36 +0100 Subject: [PATCH 04/16] Move first comment to README --- scripts/2_match_households_and_individuals.py | 41 ++++++++----------- scripts/README.md | 16 +++++++- 2 files changed, 31 insertions(+), 26 deletions(-) diff --git a/scripts/2_match_households_and_individuals.py b/scripts/2_match_households_and_individuals.py index f607cab..875181e 100644 --- a/scripts/2_match_households_and_individuals.py +++ b/scripts/2_match_households_and_individuals.py @@ -1,25 +1,11 @@ -#!/usr/bin/env python - -# # Adding activity chains to synthetic populations -# -# The purpose of this script is to match each individual in the synthetic population to a respondant from the [National Travel Survey (NTS)](https://beta.ukdataservice.ac.uk/datacatalogue/studies/study?id=5340). -# -# ### Methods -# -# We will try two methods -# -# 1. categorical matching: joining on relevant socio-demographic variables -# 2. statistical matching, as described in [An unconstrained statistical matching algorithm for combining individual and household level geo-specific census and survey data](https://doi.org/10.1016/j.compenvurbsys.2016.11.003). - import os import pickle as pkl import matplotlib.pyplot as plt import numpy as np import pandas as pd -from tqdm import tqdm, trange +from tqdm import trange -# from tqdm.notebook import trange from acbm.matching import match_categorical, match_individuals from acbm.preprocessing import ( count_per_group, @@ -45,11 +31,8 @@ def get_interim_path(file_name: str, path: str = "../data/interim/matching/") -> # useful variables region = "west-yorkshire" - # Read in the spc data (parquet format) spc = pd.read_parquet("../data/external/spc_output/" + region + "_people_hh.parquet") -spc.head() - # select columns spc = spc[ @@ -81,6 +64,7 @@ def get_interim_path(file_name: str, path: str = "../data/interim/matching/") -> # temporary reduction of the dataset for quick analysis +# TODO: check if this should be present? spc = spc.head(15000) # spc = spc.head(500000) @@ -214,7 +198,9 @@ def get_interim_path(file_name: str, path: str = "../data/interim/matching/") -> # #### Filter by year # -# We will filter the NTS data to only include data from specific years. We can choose only 1 year, or multiple years to increase our sample size and the likelihood of a match with the spc +# We will filter the NTS data to only include data from specific years. We can choose +# only 1 year, or multiple years to increase our sample size and the likelihood of a +# match with the spc. years = [2019, 2021, 2022] @@ -225,7 +211,8 @@ def get_interim_path(file_name: str, path: str = "../data/interim/matching/") -> # #### Filter by geography # -# I will not do this for categorical matching, as it reduces the sample significantly, and leads to more spc households not being matched +# I will not do this for categorical matching, as it reduces the sample significantly, +# and leads to more spc households not being matched # regions = ['Yorkshire and the Humber', 'North West'] @@ -333,7 +320,8 @@ def get_interim_path(file_name: str, path: str = "../data/interim/matching/") -> # ## Step 2: Decide on matching variables # -# We need to identify the socio-demographic characteristics that we will match on. The schema for the synthetic population can be found [here](https://github.com/alan-turing-institute/uatk-spc/blob/main/synthpop.proto). +# We need to identify the socio-demographic characteristics that we will match on. The +# schema for the synthetic population can be found [here](https://github.com/alan-turing-institute/uatk-spc/blob/main/synthpop.proto). # # Matching between the SPC and the NTS will happen in two steps: # @@ -429,7 +417,8 @@ def get_interim_path(file_name: str, path: str = "../data/interim/matching/") -> ) -# --- Recode column so that it matches the reported NTS values (Use income_dict_nts_hh dictionary for reference) +# --- Recode column so that it matches the reported NTS values (Use income_dict_nts_hh +# dictionary for reference) # Define the bins (first ) bins = [0, 24999, 49999, np.inf] @@ -454,7 +443,9 @@ def get_interim_path(file_name: str, path: str = "../data/interim/matching/") -> spc_edited["salary_yearly_hh_cat"] = spc_edited["salary_yearly_hh_cat"].astype("int") -# If we compare household income from the SPC and the NTS, we find that the SPC has many more households with no reported income (-8). This will create an issue when matching using household income +# If we compare household income from the SPC and the NTS, we find that the SPC has many +# more households with no reported income (-8). This will create an issue when matching +# using household income # bar plot showing spc_edited.salary_yearly_hh_cat and nts_households.HHIncome2002_B02ID side by side fig, ax = plt.subplots(1, 2, figsize=(12, 6), sharey=True) @@ -511,7 +502,9 @@ def get_interim_path(file_name: str, path: str = "../data/interim/matching/") -> dict_spc["pwkstat"], dict_nts["HHoldEmploy_B01ID"] -# The NTS only reports the number of Full time and Part time employees for each household. For the SPC we also need to get the number of full time and part time workers for each household. +# The NTS only reports the number of Full time and Part time employees for each +# household. For the SPC we also need to get the number of full time and part-time +# workers for each household. # # Step 1: Create a column for Full time and a column for Part time diff --git a/scripts/README.md b/scripts/README.md index 6a79279..7b58f93 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -1,5 +1,11 @@ # Preparing synthetic population scripts +## Datasets +- [Synthetic Population Catalyst](https://github.com/alan-turing-institute/uatk-spc/blob/55-output-formats-python/python/README.md) +- [National Travel Survey](https://beta.ukdataservice.ac.uk/datacatalogue/studies/study?id=5340) +- [Rural Urban Classification 2011 classification](https://geoportal.statistics.gov.uk/datasets/53360acabd1e4567bc4b8d35081b36ff/about) +- [OA centroids](): TODO + ## Loading in the SPC synthetic population Use the code in the `Quickstart` [here](https://github.com/alan-turing-institute/uatk-spc/blob/55-output-formats-python/python/README.md) @@ -16,10 +22,16 @@ You have two options: --rng-seed 0 \ --flat-output \ --year 2020 \ - config/England/west-yorkshire.txt --year 2020 + config/England/west-yorkshire.txt ``` and replace `west-yorkshire` and `2020` with your preferred option. ## Matching -TODO +### Adding activity chains to synthetic populations +The purpose of this script is to match each individual in the synthetic population to a respondant from the [National Travel Survey (NTS)](https://beta.ukdataservice.ac.uk/datacatalogue/studies/study?id=5340). + +### Methods +We will try two methods: + 1. categorical matching: joining on relevant socio-demographic variables + 2. statistical matching, as described in [An unconstrained statistical matching algorithm for combining individual and household level geo-specific census and survey data](https://doi.org/10.1016/j.compenvurbsys.2016.11.003). From cc88c35a6ad6ca6fc183b09381a7d17aaaeaa276 Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Mon, 29 Apr 2024 11:21:15 +0100 Subject: [PATCH 05/16] Ignore line too long lint E501 for comments --- pyproject.toml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1c971dc..b6f2842 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ uatk-spc = {git = "https://github.com/alan-turing-institute/uatk-spc.git", rev = geopandas = "^0.14.3" matplotlib = "^3.8.3" scikit-learn = "^1.4.1.post1" +tqdm = "^4.66.2" [tool.poetry.dev-dependencies] pytest = ">= 6" @@ -69,7 +70,7 @@ port.exclude_lines = [ [tool.ruff] src = ["src"] exclude = [] -line-length = 88 # how long you want lines to be +line-length = 88 # how long you want lines to be [tool.ruff.format] docstring-code-format = true # code snippets in docstrings will be formatted @@ -96,9 +97,11 @@ select = [ "YTT", # flake8-2020 "EXE", # flake8-executable ] + ignore = [ "PLR", # Design related pylint codes "ISC001", # Conflicts with formatter + "E501" # Line too long ] unfixable = [ "F401", # Would remove unused imports From 806fb297097efd22dc2da1029d3b8a553b4ec198 Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Mon, 29 Apr 2024 11:25:50 +0100 Subject: [PATCH 06/16] Fixes for pre-commit --- scripts/2_match_households_and_individuals.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/scripts/2_match_households_and_individuals.py b/scripts/2_match_households_and_individuals.py index 875181e..3ab577e 100644 --- a/scripts/2_match_households_and_individuals.py +++ b/scripts/2_match_households_and_individuals.py @@ -1,9 +1,11 @@ +import itertools import os import pickle as pkl import matplotlib.pyplot as plt import numpy as np import pandas as pd +from IPython.display import display from tqdm import trange from acbm.matching import match_categorical, match_individuals @@ -871,7 +873,6 @@ def get_interim_path(file_name: str, path: str = "../data/interim/matching/") -> matching_ids, spc_matching, nts_matching ) } -matching_dfs_dict # #### Match on a subset of columns (exclude salary, tenure, and employment status) @@ -1067,8 +1068,6 @@ def get_interim_path(file_name: str, path: str = "../data/interim/matching/") -> # matches_ind -import itertools - # Output the first n items of the dictionary dict(itertools.islice(matches_ind.items(), 10)) @@ -1108,12 +1107,6 @@ def get_interim_path(file_name: str, path: str = "../data/interim/matching/") -> spc_rows_df = pd.concat(spc_rows) nts_rows_df = pd.concat(nts_rows) - -spc_rows_df - - -from IPython.display import display - display( spc_rows_df[ [ From 8f83ec846d6971c0385ca822833d692b25971cdb Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Mon, 29 Apr 2024 11:49:36 +0100 Subject: [PATCH 07/16] Convert 2.1 notebook to script, fix notebook lint --- notebooks/2.1_sandbox-match_households.ipynb | 285 ++-- scripts/2.1_sandbox-match_households.py | 1297 ++++++++++++++++++ 2 files changed, 1432 insertions(+), 150 deletions(-) create mode 100644 scripts/2.1_sandbox-match_households.py diff --git a/notebooks/2.1_sandbox-match_households.ipynb b/notebooks/2.1_sandbox-match_households.ipynb index 7333a8f..60a7d30 100644 --- a/notebooks/2.1_sandbox-match_households.ipynb +++ b/notebooks/2.1_sandbox-match_households.ipynb @@ -20,6 +20,8 @@ "metadata": {}, "outputs": [], "source": [ + "import os\n", + "\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", @@ -27,7 +29,6 @@ "from acbm.preprocessing import (\n", " count_per_group,\n", " match_coverage_col,\n", - " nts_filter_by_region,\n", " nts_filter_by_year,\n", " num_adult_child_hh,\n", " transform_by_group,\n", @@ -90,7 +91,6 @@ " id\n", " household\n", " workplace\n", - " location\n", " events\n", " weekday_diaries\n", " weekend_diaries\n", @@ -98,8 +98,8 @@ " id_tus_hh\n", " id_tus_p\n", " pid_hs\n", - " msoa\n", - " oa\n", + " msoa11cd\n", + " oa11cd\n", " members\n", " bmi\n", " has_cardiovascular_disease\n", @@ -115,6 +115,7 @@ " salary_yearly\n", " salary_hourly\n", " hid\n", + " nssec8\n", " accommodation_type\n", " communal_type\n", " num_rooms\n", @@ -124,7 +125,7 @@ " sex\n", " age_years\n", " ethnicity\n", - " nssec8\n", + " nssec8_household\n", " \n", " \n", " \n", @@ -133,7 +134,6 @@ " 0\n", " 0\n", " NaN\n", - " {'x': -1.7892179489135742, 'y': 53.91915130615...\n", " {'concert_f': 1.2791347489984115e-31, 'concert...\n", " [1583, 13161]\n", " [1582, 13160]\n", @@ -159,6 +159,7 @@ " NaN\n", " E02002183_0001\n", " 1.0\n", + " 1.0\n", " NaN\n", " 2.0\n", " True\n", @@ -174,7 +175,6 @@ " 1\n", " 1\n", " NaN\n", - " {'x': -1.8262380361557007, 'y': 53.92028045654...\n", " {'concert_f': 9.743248151956307e-21, 'concert_...\n", " [2900, 4948, 4972, 7424, 10284, 10586, 12199, ...\n", " [2901, 4949, 4973, 7425, 10285, 10585, 12198, ...\n", @@ -199,6 +199,7 @@ " NaN\n", " NaN\n", " E02002183_0002\n", + " 1.0\n", " 3.0\n", " NaN\n", " 6.0\n", @@ -215,7 +216,6 @@ " 2\n", " 1\n", " NaN\n", - " {'x': -1.8262380361557007, 'y': 53.92028045654...\n", " {'concert_f': 8.46716103992468e-16, 'concert_f...\n", " [3010, 6389, 9448, 10184, 11598]\n", " [3011, 6388, 9447, 10183, 11599]\n", @@ -240,6 +240,7 @@ " NaN\n", " NaN\n", " E02002183_0002\n", + " 1.0\n", " 3.0\n", " NaN\n", " 6.0\n", @@ -256,7 +257,6 @@ " 3\n", " 2\n", " 56126.0\n", - " {'x': -1.8749940395355225, 'y': 53.94298934936...\n", " {'concert_f': 1.8844366073608398, 'concert_fs'...\n", " [366, 867, 2096, 3678, 5212, 5450, 8145, 9254,...\n", " [365, 868, 2097, 3677, 5213, 5451, 8146, 9253,...\n", @@ -281,6 +281,7 @@ " 32857.859375\n", " 14.360952\n", " E02002183_0003\n", + " 4.0\n", " 3.0\n", " NaN\n", " 6.0\n", @@ -297,7 +298,6 @@ " 4\n", " 2\n", " NaN\n", - " {'x': -1.8749940395355225, 'y': 53.94298934936...\n", " {'concert_f': 4.877435207366943, 'concert_fs':...\n", " [1289, 12528, 12870]\n", " [1288, 12529, 12871]\n", @@ -322,6 +322,7 @@ " 18162.451172\n", " 9.439944\n", " E02002183_0003\n", + " 4.0\n", " 3.0\n", " NaN\n", " 6.0\n", @@ -345,13 +346,6 @@ "3 3 2 56126.0 \n", "4 4 2 NaN \n", "\n", - " location \\\n", - "0 {'x': -1.7892179489135742, 'y': 53.91915130615... \n", - "1 {'x': -1.8262380361557007, 'y': 53.92028045654... \n", - "2 {'x': -1.8262380361557007, 'y': 53.92028045654... \n", - "3 {'x': -1.8749940395355225, 'y': 53.94298934936... \n", - "4 {'x': -1.8749940395355225, 'y': 53.94298934936... \n", - "\n", " events \\\n", "0 {'concert_f': 1.2791347489984115e-31, 'concert... \n", "1 {'concert_f': 9.743248151956307e-21, 'concert_... \n", @@ -373,7 +367,7 @@ "3 [365, 868, 2097, 3677, 5213, 5451, 8146, 9253,... E02002183_0003_001 \n", "4 [1288, 12529, 12871] E02002183_0003_002 \n", "\n", - " id_tus_hh id_tus_p pid_hs msoa oa members bmi \\\n", + " id_tus_hh id_tus_p pid_hs msoa11cd oa11cd members bmi \\\n", "0 11291218 1 2905399 E02002183 E00053954 [0] 24.879356 \n", "1 17291219 1 2905308 E02002183 E00053953 [1, 2] 27.491207 \n", "2 17070713 2 2907681 E02002183 E00053953 [1, 2] 17.310829 \n", @@ -401,19 +395,19 @@ "3 31.0 3422.0 1 32857.859375 14.360952 E02002183_0003 \n", "4 62.0 7214.0 1 18162.451172 9.439944 E02002183_0003 \n", "\n", - " accommodation_type communal_type num_rooms central_heat tenure \\\n", - "0 1.0 NaN 2.0 True 2.0 \n", - "1 3.0 NaN 6.0 True 2.0 \n", - "2 3.0 NaN 6.0 True 2.0 \n", - "3 3.0 NaN 6.0 True 2.0 \n", - "4 3.0 NaN 6.0 True 2.0 \n", + " nssec8 accommodation_type communal_type num_rooms central_heat tenure \\\n", + "0 1.0 1.0 NaN 2.0 True 2.0 \n", + "1 1.0 3.0 NaN 6.0 True 2.0 \n", + "2 1.0 3.0 NaN 6.0 True 2.0 \n", + "3 4.0 3.0 NaN 6.0 True 2.0 \n", + "4 4.0 3.0 NaN 6.0 True 2.0 \n", "\n", - " num_cars sex age_years ethnicity nssec8 \n", - "0 2 1 86 1 1.0 \n", - "1 2 1 74 3 1.0 \n", - "2 2 2 68 1 2.0 \n", - "3 1 1 27 1 4.0 \n", - "4 1 2 26 1 6.0 " + " num_cars sex age_years ethnicity nssec8_household \n", + "0 2 1 86 1 1.0 \n", + "1 2 1 74 3 1.0 \n", + "2 2 2 68 1 2.0 \n", + "3 1 1 27 1 4.0 \n", + "4 1 2 26 1 6.0 " ] }, "execution_count": 3, @@ -434,8 +428,8 @@ "outputs": [], "source": [ "# select columns\n", - "spc = spc[['id', 'household', 'location', 'pid_hs',\n", - " 'msoa', 'oa', 'members', 'sic1d2007', 'sic2d2007',\n", + "spc = spc[['id', 'household', 'pid_hs',\n", + " 'msoa11cd', 'oa11cd', 'members', 'sic1d2007', 'sic2d2007',\n", " 'pwkstat', 'salary_yearly', 'salary_hourly', 'hid',\n", " 'accommodation_type', 'communal_type', 'num_rooms', 'central_heat',\n", " 'tenure', 'num_cars', 'sex', 'age_years', 'ethnicity', 'nssec8']]" @@ -1508,10 +1502,9 @@ " \n", " id\n", " household\n", - " location\n", " pid_hs\n", - " msoa\n", - " oa\n", + " msoa11cd\n", + " oa11cd\n", " members\n", " sic1d2007\n", " sic2d2007\n", @@ -1550,7 +1543,6 @@ " 0\n", " 0\n", " 0\n", - " {'x': -1.7892179489135742, 'y': 53.91915130615...\n", " 2905399\n", " E02002183\n", " E00053954\n", @@ -1590,7 +1582,6 @@ " 1\n", " 1\n", " 1\n", - " {'x': -1.8262380361557007, 'y': 53.92028045654...\n", " 2905308\n", " E02002183\n", " E00053953\n", @@ -1630,7 +1621,6 @@ " 2\n", " 2\n", " 1\n", - " {'x': -1.8262380361557007, 'y': 53.92028045654...\n", " 2907681\n", " E02002183\n", " E00053953\n", @@ -1650,7 +1640,7 @@ " 2\n", " 68\n", " 1\n", - " 2.0\n", + " 1.0\n", " 0.000000\n", " 1\n", " 1\n", @@ -1670,7 +1660,6 @@ " 3\n", " 3\n", " 2\n", - " {'x': -1.8749940395355225, 'y': 53.94298934936...\n", " 2902817\n", " E02002183\n", " E00053689\n", @@ -1710,7 +1699,6 @@ " 4\n", " 4\n", " 2\n", - " {'x': -1.8749940395355225, 'y': 53.94298934936...\n", " 2900884\n", " E02002183\n", " E00053689\n", @@ -1730,7 +1718,7 @@ " 2\n", " 26\n", " 1\n", - " 6.0\n", + " 4.0\n", " 51020.310547\n", " 3\n", " 1\n", @@ -1751,54 +1739,54 @@ "" ], "text/plain": [ - " id household location pid_hs \\\n", - "0 0 0 {'x': -1.7892179489135742, 'y': 53.91915130615... 2905399 \n", - "1 1 1 {'x': -1.8262380361557007, 'y': 53.92028045654... 2905308 \n", - "2 2 1 {'x': -1.8262380361557007, 'y': 53.92028045654... 2907681 \n", - "3 3 2 {'x': -1.8749940395355225, 'y': 53.94298934936... 2902817 \n", - "4 4 2 {'x': -1.8749940395355225, 'y': 53.94298934936... 2900884 \n", + " id household pid_hs msoa11cd oa11cd members sic1d2007 sic2d2007 \\\n", + "0 0 0 2905399 E02002183 E00053954 [0] J 58.0 \n", + "1 1 1 2905308 E02002183 E00053953 [1, 2] C 25.0 \n", + "2 2 1 2907681 E02002183 E00053953 [1, 2] P 85.0 \n", + "3 3 2 2902817 E02002183 E00053689 [3, 4] C 31.0 \n", + "4 4 2 2900884 E02002183 E00053689 [3, 4] J 62.0 \n", "\n", - " msoa oa members sic1d2007 sic2d2007 pwkstat salary_yearly \\\n", - "0 E02002183 E00053954 [0] J 58.0 6 NaN \n", - "1 E02002183 E00053953 [1, 2] C 25.0 6 NaN \n", - "2 E02002183 E00053953 [1, 2] P 85.0 6 NaN \n", - "3 E02002183 E00053689 [3, 4] C 31.0 1 32857.859375 \n", - "4 E02002183 E00053689 [3, 4] J 62.0 1 18162.451172 \n", + " pwkstat salary_yearly salary_hourly hid accommodation_type \\\n", + "0 6 NaN NaN E02002183_0001 1.0 \n", + "1 6 NaN NaN E02002183_0002 3.0 \n", + "2 6 NaN NaN E02002183_0002 3.0 \n", + "3 1 32857.859375 14.360952 E02002183_0003 3.0 \n", + "4 1 18162.451172 9.439944 E02002183_0003 3.0 \n", "\n", - " salary_hourly hid accommodation_type communal_type \\\n", - "0 NaN E02002183_0001 1.0 NaN \n", - "1 NaN E02002183_0002 3.0 NaN \n", - "2 NaN E02002183_0002 3.0 NaN \n", - "3 14.360952 E02002183_0003 3.0 NaN \n", - "4 9.439944 E02002183_0003 3.0 NaN \n", + " communal_type num_rooms central_heat tenure num_cars sex age_years \\\n", + "0 NaN 2.0 True 2.0 2 1 86 \n", + "1 NaN 6.0 True 2.0 2 1 74 \n", + "2 NaN 6.0 True 2.0 2 2 68 \n", + "3 NaN 6.0 True 2.0 1 1 27 \n", + "4 NaN 6.0 True 2.0 1 2 26 \n", "\n", - " num_rooms central_heat tenure num_cars sex age_years ethnicity \\\n", - "0 2.0 True 2.0 2 1 86 1 \n", - "1 6.0 True 2.0 2 1 74 3 \n", - "2 6.0 True 2.0 2 2 68 1 \n", - "3 6.0 True 2.0 1 1 27 1 \n", - "4 6.0 True 2.0 1 2 26 1 \n", + " ethnicity nssec8 salary_yearly_hh salary_yearly_hh_cat is_adult \\\n", + "0 1 1.0 0.000000 1 1 \n", + "1 3 1.0 0.000000 1 1 \n", + "2 1 1.0 0.000000 1 1 \n", + "3 1 4.0 51020.310547 3 1 \n", + "4 1 4.0 51020.310547 3 1 \n", "\n", - " nssec8 salary_yearly_hh salary_yearly_hh_cat is_adult num_adults \\\n", - "0 1.0 0.000000 1 1 1 \n", - "1 1.0 0.000000 1 1 2 \n", - "2 2.0 0.000000 1 1 2 \n", - "3 4.0 51020.310547 3 1 2 \n", - "4 6.0 51020.310547 3 1 2 \n", + " num_adults is_child num_children is_pension_age num_pension_age \\\n", + "0 1 0 0 1 1 \n", + "1 2 0 0 1 2 \n", + "2 2 0 0 1 2 \n", + "3 2 0 0 0 0 \n", + "4 2 0 0 0 0 \n", "\n", - " is_child num_children is_pension_age num_pension_age pwkstat_FT_hh \\\n", - "0 0 0 1 1 0 \n", - "1 0 0 1 2 0 \n", - "2 0 0 1 2 0 \n", - "3 0 0 0 0 2 \n", - "4 0 0 0 0 2 \n", + " pwkstat_FT_hh pwkstat_PT_hh pwkstat_NTS_match OA11CD \\\n", + "0 0 0 1 E00053954 \n", + "1 0 0 1 E00053953 \n", + "2 0 0 1 E00053953 \n", + "3 2 0 6 E00053689 \n", + "4 2 0 6 E00053689 \n", "\n", - " pwkstat_PT_hh pwkstat_NTS_match OA11CD RUC11 RUC11CD \n", - "0 0 1 E00053954 Urban city and town C1 \n", - "1 0 1 E00053953 Urban city and town C1 \n", - "2 0 1 E00053953 Urban city and town C1 \n", - "3 0 6 E00053689 Rural town and fringe D1 \n", - "4 0 6 E00053689 Rural town and fringe D1 " + " RUC11 RUC11CD \n", + "0 Urban city and town C1 \n", + "1 Urban city and town C1 \n", + "2 Urban city and town C1 \n", + "3 Rural town and fringe D1 \n", + "4 Rural town and fringe D1 " ] }, "execution_count": 23, @@ -1811,7 +1799,7 @@ "rural_urban = pd.read_csv('../data/external/census_2011_rural_urban.csv', sep=',')\n", "\n", "# merge the rural_urban data with the spc\n", - "spc_edited = spc_edited.merge(rural_urban[['OA11CD', 'RUC11', 'RUC11CD']], left_on='oa', right_on='OA11CD')\n", + "spc_edited = spc_edited.merge(rural_urban[['OA11CD', 'RUC11', 'RUC11CD']], left_on='oa11cd', right_on='OA11CD')\n", "spc_edited.head(5)\n", "\n" ] @@ -1897,10 +1885,9 @@ " \n", " id\n", " household\n", - " location\n", " pid_hs\n", - " msoa\n", - " oa\n", + " msoa11cd\n", + " oa11cd\n", " members\n", " sic1d2007\n", " sic2d2007\n", @@ -1943,7 +1930,6 @@ " 0\n", " 0\n", " 0\n", - " {'x': -1.7892179489135742, 'y': 53.91915130615...\n", " 2905399\n", " E02002183\n", " E00053954\n", @@ -1987,7 +1973,6 @@ " 1\n", " 1\n", " 1\n", - " {'x': -1.8262380361557007, 'y': 53.92028045654...\n", " 2905308\n", " E02002183\n", " E00053953\n", @@ -2031,7 +2016,6 @@ " 2\n", " 2\n", " 1\n", - " {'x': -1.8262380361557007, 'y': 53.92028045654...\n", " 2907681\n", " E02002183\n", " E00053953\n", @@ -2051,7 +2035,7 @@ " 2\n", " 68\n", " 1\n", - " 2.0\n", + " 1.0\n", " 0.000000\n", " 1\n", " 1\n", @@ -2075,7 +2059,6 @@ " 3\n", " 3\n", " 2\n", - " {'x': -1.8749940395355225, 'y': 53.94298934936...\n", " 2902817\n", " E02002183\n", " E00053689\n", @@ -2119,7 +2102,6 @@ " 4\n", " 4\n", " 2\n", - " {'x': -1.8749940395355225, 'y': 53.94298934936...\n", " 2900884\n", " E02002183\n", " E00053689\n", @@ -2139,7 +2121,7 @@ " 2\n", " 26\n", " 1\n", - " 6.0\n", + " 4.0\n", " 51020.310547\n", " 3\n", " 1\n", @@ -2164,68 +2146,68 @@ "" ], "text/plain": [ - " id household location pid_hs \\\n", - "0 0 0 {'x': -1.7892179489135742, 'y': 53.91915130615... 2905399 \n", - "1 1 1 {'x': -1.8262380361557007, 'y': 53.92028045654... 2905308 \n", - "2 2 1 {'x': -1.8262380361557007, 'y': 53.92028045654... 2907681 \n", - "3 3 2 {'x': -1.8749940395355225, 'y': 53.94298934936... 2902817 \n", - "4 4 2 {'x': -1.8749940395355225, 'y': 53.94298934936... 2900884 \n", + " id household pid_hs msoa11cd oa11cd members sic1d2007 sic2d2007 \\\n", + "0 0 0 2905399 E02002183 E00053954 [0] J 58.0 \n", + "1 1 1 2905308 E02002183 E00053953 [1, 2] C 25.0 \n", + "2 2 1 2907681 E02002183 E00053953 [1, 2] P 85.0 \n", + "3 3 2 2902817 E02002183 E00053689 [3, 4] C 31.0 \n", + "4 4 2 2900884 E02002183 E00053689 [3, 4] J 62.0 \n", "\n", - " msoa oa members sic1d2007 sic2d2007 pwkstat salary_yearly \\\n", - "0 E02002183 E00053954 [0] J 58.0 6 NaN \n", - "1 E02002183 E00053953 [1, 2] C 25.0 6 NaN \n", - "2 E02002183 E00053953 [1, 2] P 85.0 6 NaN \n", - "3 E02002183 E00053689 [3, 4] C 31.0 1 32857.859375 \n", - "4 E02002183 E00053689 [3, 4] J 62.0 1 18162.451172 \n", + " pwkstat salary_yearly salary_hourly hid accommodation_type \\\n", + "0 6 NaN NaN E02002183_0001 1.0 \n", + "1 6 NaN NaN E02002183_0002 3.0 \n", + "2 6 NaN NaN E02002183_0002 3.0 \n", + "3 1 32857.859375 14.360952 E02002183_0003 3.0 \n", + "4 1 18162.451172 9.439944 E02002183_0003 3.0 \n", "\n", - " salary_hourly hid accommodation_type communal_type \\\n", - "0 NaN E02002183_0001 1.0 NaN \n", - "1 NaN E02002183_0002 3.0 NaN \n", - "2 NaN E02002183_0002 3.0 NaN \n", - "3 14.360952 E02002183_0003 3.0 NaN \n", - "4 9.439944 E02002183_0003 3.0 NaN \n", + " communal_type num_rooms central_heat tenure num_cars sex age_years \\\n", + "0 NaN 2.0 True 2.0 2 1 86 \n", + "1 NaN 6.0 True 2.0 2 1 74 \n", + "2 NaN 6.0 True 2.0 2 2 68 \n", + "3 NaN 6.0 True 2.0 1 1 27 \n", + "4 NaN 6.0 True 2.0 1 2 26 \n", "\n", - " num_rooms central_heat tenure num_cars sex age_years ethnicity \\\n", - "0 2.0 True 2.0 2 1 86 1 \n", - "1 6.0 True 2.0 2 1 74 3 \n", - "2 6.0 True 2.0 2 2 68 1 \n", - "3 6.0 True 2.0 1 1 27 1 \n", - "4 6.0 True 2.0 1 2 26 1 \n", + " ethnicity nssec8 salary_yearly_hh salary_yearly_hh_cat is_adult \\\n", + "0 1 1.0 0.000000 1 1 \n", + "1 3 1.0 0.000000 1 1 \n", + "2 1 1.0 0.000000 1 1 \n", + "3 1 4.0 51020.310547 3 1 \n", + "4 1 4.0 51020.310547 3 1 \n", "\n", - " nssec8 salary_yearly_hh salary_yearly_hh_cat is_adult num_adults \\\n", - "0 1.0 0.000000 1 1 1 \n", - "1 1.0 0.000000 1 1 2 \n", - "2 2.0 0.000000 1 1 2 \n", - "3 4.0 51020.310547 3 1 2 \n", - "4 6.0 51020.310547 3 1 2 \n", + " num_adults is_child num_children is_pension_age num_pension_age \\\n", + "0 1 0 0 1 1 \n", + "1 2 0 0 1 2 \n", + "2 2 0 0 1 2 \n", + "3 2 0 0 0 0 \n", + "4 2 0 0 0 0 \n", "\n", - " is_child num_children is_pension_age num_pension_age pwkstat_FT_hh \\\n", - "0 0 0 1 1 0 \n", - "1 0 0 1 2 0 \n", - "2 0 0 1 2 0 \n", - "3 0 0 0 0 2 \n", - "4 0 0 0 0 2 \n", + " pwkstat_FT_hh pwkstat_PT_hh pwkstat_NTS_match OA11CD \\\n", + "0 0 0 1 E00053954 \n", + "1 0 0 1 E00053953 \n", + "2 0 0 1 E00053953 \n", + "3 2 0 6 E00053689 \n", + "4 2 0 6 E00053689 \n", "\n", - " pwkstat_PT_hh pwkstat_NTS_match OA11CD RUC11 RUC11CD \\\n", - "0 0 1 E00053954 Urban city and town C1 \n", - "1 0 1 E00053953 Urban city and town C1 \n", - "2 0 1 E00053953 Urban city and town C1 \n", - "3 0 6 E00053689 Rural town and fringe D1 \n", - "4 0 6 E00053689 Rural town and fringe D1 \n", + " RUC11 RUC11CD Settlement2011EW_B03ID_spc \\\n", + "0 Urban city and town C1 Urban \n", + "1 Urban city and town C1 Urban \n", + "2 Urban city and town C1 Urban \n", + "3 Rural town and fringe D1 Rural \n", + "4 Rural town and fringe D1 Rural \n", "\n", - " Settlement2011EW_B03ID_spc Settlement2011EW_B04ID_spc \\\n", - "0 Urban Urban City and Town \n", - "1 Urban Urban City and Town \n", - "2 Urban Urban City and Town \n", - "3 Rural Rural Town and Fringe \n", - "4 Rural Rural Town and Fringe \n", + " Settlement2011EW_B04ID_spc Settlement2011EW_B03ID_spc_CD \\\n", + "0 Urban City and Town 1 \n", + "1 Urban City and Town 1 \n", + "2 Urban City and Town 1 \n", + "3 Rural Town and Fringe 2 \n", + "4 Rural Town and Fringe 2 \n", "\n", - " Settlement2011EW_B03ID_spc_CD Settlement2011EW_B04ID_spc_CD \n", - "0 1 2 \n", - "1 1 2 \n", - "2 1 2 \n", - "3 2 3 \n", - "4 2 3 " + " Settlement2011EW_B04ID_spc_CD \n", + "0 2 \n", + "1 2 \n", + "2 2 \n", + "3 3 \n", + "4 3 " ] }, "execution_count": 25, @@ -3693,6 +3675,9 @@ } ], "source": [ + "# Make plots path\n", + "os.makedirs(\"../data/interim/matching/plots/\", exist_ok=True)\n", + "\n", "# loop over all variables in matching_dfs_dict and save a plot for each\n", "for key in list(matching_dfs_dict.keys())[1:]: # skip 1st key (hid)\n", " x = (match_coverage_1[key]\n", @@ -3957,7 +3942,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.11.8" } }, "nbformat": 4, diff --git a/scripts/2.1_sandbox-match_households.py b/scripts/2.1_sandbox-match_households.py new file mode 100644 index 0000000..ed56621 --- /dev/null +++ b/scripts/2.1_sandbox-match_households.py @@ -0,0 +1,1297 @@ +import os + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +from acbm.preprocessing import ( + count_per_group, + match_coverage_col, + nts_filter_by_year, + num_adult_child_hh, + transform_by_group, + truncate_values, +) + +pd.set_option("display.max_columns", None) + + +# ## Step 1: Load in the datasets + +# ### SPC + +# useful variables +region = "west-yorkshire" + + +# Read in the spc data (parquet format) +spc = pd.read_parquet("../data/external/spc_output/" + region + "_people_hh.parquet") +spc.head() + + +# select columns +spc = spc[ + [ + "id", + "household", + "pid_hs", + "msoa11cd", + "oa11cd", + "members", + "sic1d2007", + "sic2d2007", + "pwkstat", + "salary_yearly", + "salary_hourly", + "hid", + "accommodation_type", + "communal_type", + "num_rooms", + "central_heat", + "tenure", + "num_cars", + "sex", + "age_years", + "ethnicity", + "nssec8", + ] +] + + +# temporary reduction of the dataset for quick analysis +spc = spc.head(50000) + + +# ### NTS +# +# The NTS is split up into multiple tables. We will load in the following tables: +# - individuals +# - households +# - trips + +path_psu = "../data/external/nts/UKDA-5340-tab/tab/psu_eul_2002-2022.tab" +psu = pd.read_csv(path_psu, sep="\t") + + +# #### Individuals + +path_individuals = "../data/external/nts/UKDA-5340-tab/tab/individual_eul_2002-2022.tab" +nts_individuals = pd.read_csv( + path_individuals, + sep="\t", + usecols=[ + "IndividualID", + "HouseholdID", + "PSUID", + "Age_B01ID", + "Age_B04ID", + "Sex_B01ID", + "OfPenAge_B01ID", + "HRPRelation_B01ID", + "EdAttn1_B01ID", + "EdAttn2_B01ID", + "EdAttn3_B01ID", + "OwnCycle_B01ID", # Owns a cycle + "DrivLic_B02ID", # type of driving license + "CarAccess_B01ID", + "IndIncome2002_B02ID", + "IndWkGOR_B02ID", # Region of usual place of work + "EcoStat_B02ID", # Working status of individual + "EcoStat_B03ID", + "NSSec_B03ID", # NSSEC high level breakdown + "SC_B01ID", # Social class of individual + "Stat_B01ID", # employee or self-employed + "WkMode_B01ID", # Usual means of travel to work + "WkHome_B01ID", # Work from home + "PossHom_B01ID", # Is it possible to work from home? + "OftHome_B01ID", # How often work from home + "TravSh_B01ID", # Usual mode from main food shopping trip + "SchDly_B01ID", # Daily school journey? + "SchTrav_B01ID", # Usual mode of travel to school + "SchAcc_B01ID", # IS school trip accompanied by an adult? + "FdShp_B01ID", # How do you usually carry ot main food shop (go to shop, online etc) + ], +) + + +# #### Households + +path_households = "../data/external/nts/UKDA-5340-tab/tab/household_eul_2002-2022.tab" +nts_households = pd.read_csv( + path_households, + sep="\t", + usecols=[ + "HouseholdID", + "PSUID", + "HHIncome2002_B02ID", + "AddressType_B01ID", # type of house + "Ten1_B02ID", # type of tenure + "HHoldNumAdults", # total no. of adults in household + "HHoldNumChildren", # total no. of children in household + "HHoldNumPeople", # total no. of people in household + "NumLicHolders", # total no. of driving license holders in household + "HHoldEmploy_B01ID", # number of employed in household + "NumBike", # no. of bikes + "NumCar", # no. of cars + "NumVanLorry", # no. of vans or lorries + "NumMCycle", # no. of motorcycles + "WalkBus_B01ID", # walk time from house to nearest bus stop + "Getbus_B01ID", # frequency of bus service + "WalkRail_B01ID", # walk time from house to nearest rail station + "JTimeHosp_B01ID", # journey time to nearest hospital + "DVShop_B01ID", # person no. for main food shooper in hh + "Settlement2011EW_B03ID", # ONS Urban/Rural: 2 categories + "Settlement2011EW_B04ID", # ONS Urban/Rural: 3 categories + "HHoldOAClass2011_B03ID", # Census 2011 OA Classification + "HRPWorkStat_B02ID", # HH ref person working status + "HRPSEGWorkStat_B01ID", # HH ref person socio economic group for active workers + "W0", # Unweighted interview sample + "W1", # Unweighted diary sample + "W2", # Weighted diary sample + "W3", # Weighted interview sample + ], +) + + +# #### Trips + +path_trips = "../data/external/nts/UKDA-5340-tab/tab/trip_eul_2002-2022.tab" +nts_trips = pd.read_csv( + path_trips, + sep="\t", + usecols=[ + "TripID", + "DayID", + "IndividualID", + "HouseholdID", + "PSUID", + "PersNo", + "TravDay", + "JourSeq", + "ShortWalkTrip_B01ID", + "NumStages", + "MainMode_B03ID", + "MainMode_B04ID", + "TripPurpFrom_B01ID", + "TripPurpTo_B01ID", + "TripPurpose_B04ID", + "TripStart", + "TripEnd", + "TripTotalTime", + "TripTravTime", + "TripDisIncSW", + "TripDisExSW", + "TripOrigGOR_B02ID", + "TripDestGOR_B02ID", + "W5", + "W5xHH", + ], +) + + +# #### Filter by year +# +# We will filter the NTS data to only include data from specific years. We can choose only 1 year, or multiple years to increase our sample size and the likelihood of a match with the spc + +years = [2019, 2021, 2022] + +nts_individuals = nts_filter_by_year(nts_individuals, psu, years) +nts_households = nts_filter_by_year(nts_households, psu, years) +nts_trips = nts_filter_by_year(nts_trips, psu, years) + + +# #### Filter by geography +# +# I will not do this for categorical matching, as it reduces the sample significantly, and leads to more spc households not being matched + +# regions = ['Yorkshire and the Humber', 'North West'] + +# nts_individuals = nts_filter_by_region(nts_individuals, psu, regions) +# nts_households = nts_filter_by_region(nts_households, psu, regions) +# nts_trips = nts_filter_by_region(nts_trips, psu, regions) + + +# Create dictionaries of key value pairs + +""" +guide to the dictionaries: + +_nts_hh: from NTS households table +_nts_ind: from NTS individuals table +_spc: from SPC + +""" + + +# ---------- NTS + +# Create a dictionary for the HHIncome2002_B02ID column +income_dict_nts_hh = { + "1": "0-25k", + "2": "25k-50k", + "3": "50k+", + "-8": "NA", + # should be -10, but + # it could be a typo in household_eul_2002-2022_ukda_data_dictionary + "-1": "DEAD", +} + +# Create a dictionary for the HHoldEmploy_B01ID column +# (PT: Part time, FT: Full time) +employment_dict_nts_hh = { + "1": "None", + "2": "0 FT, 1 PT", + "3": "1 FT, 0 PT", + "4": "0 FT, 2 PT", + "5": "1 FT, 1 PT", + "6": "2 FT, 0 PT", + "7": "1 FT, 2+ PT", + "8": "2 FT, 1+ PT", + "9": "0 FT, 3+ PT", + "10": "3+ FT, 0 PT", + "11": "3+ FT, 1+ PT", + "-8": "NA", + "-10": "DEAD", +} + +# Create a dictionary for the Ten1_B02ID column +tenure_dict_nts_hh = { + "1": "Owns / buying", + "2": "Rents", + "3": "Other (including rent free)", + "-8": "NA", + "-9": "DNA", + "-10": "DEAD", +} + + +# ---------- SPC + + +# create a dictionary for the pwkstat column +employment_dict_spc = { + "0": "Not applicable (age < 16)", + "1": "Employee FT", + "2": "Employee PT", + "3": "Employee unspecified", + "4": "Self-employed", + "5": "Unemployed", + "6": "Retired", + "7": "Homemaker/Maternal leave", + "8": "Student", + "9": "Long term sickness/disability", + "10": "Other", +} + + +# Create a dictionary for the tenure column +tenure_dict_spc = { + "1": "Owned: Owned outright", + "2": "Owned: Owned with a mortgage or loan or shared ownership", + "3": "Rented or living rent free: Total", + "4": "Rented: Social rented", + "5": "Rented: Private rented or living rent free", + "-8": "NA", + "-9": "DNA", + "-10": "DEAD", +} + + +# Combine the dictionaries into a dictionary of dictionaries + +dict_nts = { + "HHIncome2002_B02ID": income_dict_nts_hh, + "HHoldEmploy_B01ID": employment_dict_nts_hh, + "Ten1_B02ID": tenure_dict_nts_hh, +} + +dict_spc = {"pwkstat": employment_dict_spc, "tenure": tenure_dict_spc} + + +# ## Step 2: Decide on matching variables +# +# We need to identify the socio-demographic characteristics that we will match on. The schema for the synthetic population can be found [here](https://github.com/alan-turing-institute/uatk-spc/blob/main/synthpop.proto). +# +# Matching between the SPC and the NTS will happen in two steps: +# +# 1. Match at the household level +# 2. Match individuals within the household +# +# ### Household level matching +# +# | Variable | Name (NTS) | Name (SPC) | Transformation (NTS) | Transformation (SPC) | +# | ------------------ | -------------------- | --------------- | -------------------- | -------------------- | +# | Household income | `HHIncome2002_BO2ID` | `salary_yearly` | NA | Group by household ID and sum | +# | Number of adults | `HHoldNumAdults` | `age_years` | NA | Group by household ID and count | +# | Number of children | `HHoldNumChildren` | `age_years` | NA | Group by household ID and count | +# | Employment status | `HHoldEmploy_B01ID` | `pwkstat` | NA | a) match to NTS categories. b) group by household ID | +# | Car ownership | `NumCar` | `num_cars` | SPC is capped at 2. We change all entries > 2 to 2 | NA | +# +# Other columns to match in the future +# | Variable | Name (NTS) | Name (SPC) | Transformation (NTS) | Transformation (SPC) | +# | ------------------ | -------------------- | --------------- | -------------------- | -------------------- | +# | Type of tenancy | `Ten1_B02ID` | `tenure` | ?? | ?? | +# | Urban-Rural classification of residence | `Settlement2011EW_B04ID` | NA | NA | Spatial join between [layer](https://www.gov.uk/government/collections/rural-urban-classification) and SPC | +# +# + +# ### 2.1 Edit SPC columns + +# #### Household Income +# +# Edit the spc so that we have household income as well as individual income. + +# add household income column for SPC +spc_edited = transform_by_group( + data=spc, + group_col="household", + transform_col="salary_yearly", + new_col="salary_yearly_hh", + transformation_type="sum", +) + + +# Check number of individuals and households with reported salaries + +# histogram for individuals and households (include NAs as 0) +fig, ax = plt.subplots(1, 2, figsize=(12, 6), sharey=True) +ax[0].hist(spc_edited["salary_yearly"].fillna(0), bins=30) +ax[0].set_title("Salary yearly (Individuals)") +ax[0].set_xlabel("Salary yearly") +ax[0].set_ylabel("Frequency") +ax[1].hist(spc_edited["salary_yearly_hh"].fillna(0), bins=30) +ax[1].set_title("Salary yearly (Households)") +ax[1].set_xlabel("Salary yearly") +plt.show() + + +# statistics + +# print the total number of rows in the spc. Add a message "Values =" +print("Individuals in SPC =", spc_edited.shape[0]) +# number of individuals without reported income +print("Individuals without reported income =", spc_edited["salary_yearly"].isna().sum()) +# % of individuals with reported income (salary_yearly not equal NA) +print( + "% of individuals with reported income =", + round((spc_edited["salary_yearly"].count() / spc_edited.shape[0]) * 100, 1), +) +print( + "Individuals with reported income: 0 =", + spc_edited[spc_edited["salary_yearly"] == 0].shape[0], +) + + +# print the total number of households +print("Households in SPC =", spc_edited["household"].nunique()) +# number of households without reported income (salary yearly_hh = 0) +print( + "Households without reported income =", + spc_edited[spc_edited["salary_yearly_hh"] == 0].shape[0], +) +# # % of households with reported income (salary_yearly not equal NA) +print( + "% of households with reported income =", + round( + ( + spc_edited[spc_edited["salary_yearly_hh"] == 0].shape[0] + / spc_edited["household"].nunique() + ) + * 100, + 1, + ), +) +print( + "Households with reported income: 0 =", + spc_edited[spc_edited["salary_yearly_hh"] == 0].shape[0], +) + + +# --- Recode column so that it matches the reported NTS values (Use income_dict_nts_hh dictionary for reference) + +# Define the bins (first ) +bins = [0, 24999, 49999, np.inf] +# Define the labels for the bins +labels = [1, 2, 3] + +spc_edited = spc_edited.copy() + +spc_edited["salary_yearly_hh_cat"] = ( + pd.cut( + spc_edited["salary_yearly_hh"], bins=bins, labels=labels, include_lowest=True + ) + .astype("str") + .astype("float") +) + + +# replace NA values with -8 (to be consistent with NTS) +spc_edited["salary_yearly_hh_cat"] = spc_edited["salary_yearly_hh_cat"].fillna(-8) + +# Convert the column to int +spc_edited["salary_yearly_hh_cat"] = spc_edited["salary_yearly_hh_cat"].astype("int") + + +# If we compare household income from the SPC and the NTS, we find that the SPC has many more households with no reported income (-8). This will create an issue when matching using household income + +# bar plot showing spc_edited.salary_yearly_hh_cat and nts_households.HHIncome2002_B02ID side by side +fig, ax = plt.subplots(1, 2, figsize=(12, 6), sharey=True) +ax[0].bar( + spc_edited["salary_yearly_hh_cat"].value_counts().index, + spc_edited["salary_yearly_hh_cat"].value_counts().values, +) +ax[0].set_title("SPC") +ax[0].set_xlabel("Income Bracket - Household level") +ax[0].set_ylabel("No of Households") +ax[1].bar( + nts_households["HHIncome2002_B02ID"].value_counts().index, + nts_households["HHIncome2002_B02ID"].value_counts().values, +) +ax[1].set_title("NTS") +ax[1].set_xlabel("Income Bracket - Household level") +plt.show() + +# same as above but (%) +fig, ax = plt.subplots(1, 2, figsize=(12, 6), sharey=True) +ax[0].bar( + spc_edited["salary_yearly_hh_cat"].value_counts(normalize=True).index, + spc_edited["salary_yearly_hh_cat"].value_counts(normalize=True).values, +) +ax[0].set_title("SPC") +ax[0].set_xlabel("Income Bracket - Household level") +ax[0].set_ylabel("Fraction of Households") +ax[1].bar( + nts_households["HHIncome2002_B02ID"].value_counts(normalize=True).index, + nts_households["HHIncome2002_B02ID"].value_counts(normalize=True).values, +) +ax[1].set_title("NTS") +ax[1].set_xlabel("Income Bracket - Household level") +plt.show() + + +# get the % of households in each income bracket for the nts +nts_households["HHIncome2002_B02ID"].value_counts(normalize=True) * 100 + + +# #### Household Composition (No. of Adults / Children) + +# Number of adults and children in the household + +spc_edited = num_adult_child_hh( + data=spc_edited, group_col="household", age_col="age_years" +) + + +# #### Employment Status + +# Employment status + +# check the colums values from our dictionary +dict_spc["pwkstat"], dict_nts["HHoldEmploy_B01ID"] + + +# The NTS only reports the number of Full time and Part time employees for each household. For the SPC we also need to get the number of full time and part time workers for each household. +# +# Step 1: Create a column for Full time and a column for Part time + +# We will only use '1' and '2' for the employment status + +counts_df = count_per_group( + df=spc_edited, + group_col="household", + count_col="pwkstat", + values=[1, 2], + value_names=["pwkstat_FT_hh", "pwkstat_PT_hh"], +) + +counts_df.head(10) + + +# Create a column that matches the NTS categories (m FT, n PT) + +# We want to match the SPC values to the NTS +dict_nts["HHoldEmploy_B01ID"] +""" +{ + '1': 'None', + '2': '0 FT, 1 PT', + '3': '1 FT, 0 PT', + '4': '0 FT, 2 PT', + '5': '1 FT, 1 PT', + '6': '2 FT, 0 PT', + '7': '1 FT, 2+ PT', + '8': '2 FT, 1+ PT', + '9': '0 FT, 3+ PT', + '10': '3+ FT, 0 PT', + '11': '3+ FT, 1+ PT', + '-8': 'NA', + '-10': 'DEAD'} + """ + +# 1) Match each row to the NTS + +# Define the conditions and outputs. +# We are using the keys in dict_nts['HHoldEmploy_B01ID'] as reference +conditions = [ + (counts_df["pwkstat_FT_hh"] == 0) & (counts_df["pwkstat_PT_hh"] == 0), + (counts_df["pwkstat_FT_hh"] == 0) & (counts_df["pwkstat_PT_hh"] == 1), + (counts_df["pwkstat_FT_hh"] == 1) & (counts_df["pwkstat_PT_hh"] == 0), + (counts_df["pwkstat_FT_hh"] == 0) & (counts_df["pwkstat_PT_hh"] == 2), + (counts_df["pwkstat_FT_hh"] == 1) & (counts_df["pwkstat_PT_hh"] == 1), + (counts_df["pwkstat_FT_hh"] == 2) & (counts_df["pwkstat_PT_hh"] == 0), + (counts_df["pwkstat_FT_hh"] == 1) & (counts_df["pwkstat_PT_hh"] >= 2), + (counts_df["pwkstat_FT_hh"] == 2) & (counts_df["pwkstat_PT_hh"] >= 1), + (counts_df["pwkstat_FT_hh"] == 0) & (counts_df["pwkstat_PT_hh"] >= 3), + (counts_df["pwkstat_FT_hh"] >= 3) & (counts_df["pwkstat_PT_hh"] == 0), + (counts_df["pwkstat_FT_hh"] >= 3) & (counts_df["pwkstat_PT_hh"] >= 1), +] + +# Define the corresponding outputs based on dict_nts['HHoldEmploy_B01ID] +outputs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + +# Create a new column using np.select +counts_df["pwkstat_NTS_match"] = np.select(conditions, outputs, default=-8) + + +# 2) merge back onto the spc +spc_edited = spc_edited.merge(counts_df, left_on="household", right_index=True) + +# check the output +spc_edited[ + ["household", "pwkstat", "pwkstat_FT_hh", "pwkstat_PT_hh", "pwkstat_NTS_match"] +].head(10) + + +# bar plot of counts_df['pwkstat_NTS_match'] and nts_households['HHoldEmploy_B01ID'] +fig, ax = plt.subplots(1, 2, figsize=(12, 6)) +ax[0].bar( + counts_df["pwkstat_NTS_match"].value_counts().index, + counts_df["pwkstat_NTS_match"].value_counts().values, +) +ax[0].set_title("SPC") +ax[0].set_xlabel("Employment status - Household level") +ax[0].set_ylabel("Frequency") +ax[1].bar( + nts_households["HHoldEmploy_B01ID"].value_counts().index, + nts_households["HHoldEmploy_B01ID"].value_counts().values, +) +ax[1].set_title("NTS") +ax[1].set_xlabel("Employment status - Household level") +plt.show() + +# same as above but percentages +fig, ax = plt.subplots(1, 2, figsize=(12, 6)) +ax[0].bar( + counts_df["pwkstat_NTS_match"].value_counts().index, + counts_df["pwkstat_NTS_match"].value_counts(normalize=True).values, +) +ax[0].set_title("SPC") +ax[0].set_xlabel("Employment status - Household level") +ax[0].set_ylabel("Frequency (normalized)") +ax[1].bar( + nts_households["HHoldEmploy_B01ID"].value_counts().index, + nts_households["HHoldEmploy_B01ID"].value_counts(normalize=True).values, +) +ax[1].set_title("NTS") +ax[1].set_xlabel("Employment status - Household level") +plt.show() + + +# #### Urban Rural Classification +# +# We use the 2011 rural urban classification to match the SPC to the NTS. The NTS has 2 columns that we can use to match to the SPC: `Settlement2011EW_B03ID` and `Settlement2011EW_B04ID`. The `Settlement2011EW_B03ID` column is more general (urban / rural only), while the `Settlement2011EW_B04ID` column is more specific. We stick to the more general column for now. + +# read the rural urban classification data +rural_urban = pd.read_csv("../data/external/census_2011_rural_urban.csv", sep=",") + +# merge the rural_urban data with the spc +spc_edited = spc_edited.merge( + rural_urban[["OA11CD", "RUC11", "RUC11CD"]], left_on="oa11cd", right_on="OA11CD" +) +spc_edited.head(5) + + +# create dictionary from the NTS `Settlement2011EW_B03ID` column +Settlement2011EW_B03ID_nts_hh = { + "1": "Urban", + "2": "Rural", + "3": "Scotland", + "-8": "NA", + "-10": "DEAD", +} + +Settlement2011EW_B04ID_nts_hh = { + "1": "Urban Conurbation", + "2": "Urban City and Town", + "3": "Rural Town and Fringe", + "4": "Rural Village, Hamlet and Isolated Dwellings", + "5": "Scotland", + "-8": "NA", + "-10": "DEAD", +} + + +census_2011_to_nts_B03ID = { + "Urban major conurbation": "Urban", + "Urban minor conurbation": "Urban", + "Urban city and town": "Urban", + "Urban city and town in a sparse setting": "Urban", + "Rural town and fringe": "Rural", + "Rural town and fringe in a sparse setting": "Rural", + "Rural village": "Rural", + "Rural village in a sparse setting": "Rural", + "Rural hamlets and isolated dwellings": "Rural", + "Rural hamlets and isolated dwellings in a sparse setting": "Rural", +} + +census_2011_to_nts_B04ID = { + "Urban major conurbation": "Urban Conurbation", + "Urban minor conurbation": "Urban Conurbation", + "Urban city and town": "Urban City and Town", + "Urban city and town in a sparse setting": "Urban City and Town", + "Rural town and fringe": "Rural Town and Fringe", + "Rural town and fringe in a sparse setting": "Rural Town and Fringe", + "Rural village": "Rural Village, Hamlet and Isolated Dwellings", + "Rural village in a sparse setting": "Rural Village, Hamlet and Isolated Dwellings", + "Rural hamlets and isolated dwellings": "Rural Village, Hamlet and Isolated Dwellings", + "Rural hamlets and isolated dwellings in a sparse setting": "Rural Village, Hamlet and Isolated Dwellings", +} + + +# add the nts Settlement2011EW_B03ID and Settlement2011EW_B04ID columns to the spc +spc_edited["Settlement2011EW_B03ID_spc"] = spc_edited["RUC11"].map( + census_2011_to_nts_B03ID +) +spc_edited["Settlement2011EW_B04ID_spc"] = spc_edited["RUC11"].map( + census_2011_to_nts_B04ID +) +spc_edited.head() + +# add the keys from nts_Settlement2011EW_B03ID and nts_Settlement2011EW_B04ID to the spc based on above mappings + +# reverse the dictionaries +Settlement2011EW_B03ID_nts_rev = { + v: k for k, v in Settlement2011EW_B03ID_nts_hh.items() +} +# map the values +spc_edited["Settlement2011EW_B03ID_spc_CD"] = ( + spc_edited["Settlement2011EW_B03ID_spc"] + .map(Settlement2011EW_B03ID_nts_rev) + .astype("int") +) + +Settlement2011EW_B04ID_nts_rev = { + v: k for k, v in Settlement2011EW_B04ID_nts_hh.items() +} +spc_edited["Settlement2011EW_B04ID_spc_CD"] = ( + spc_edited["Settlement2011EW_B04ID_spc"] + .map(Settlement2011EW_B04ID_nts_rev) + .astype("int") +) +spc_edited.head() + + +# ### 2.2 Edit NTS columns + +# #### Number of people of pension age + +nts_pensioners = count_per_group( + df=nts_individuals, + group_col="HouseholdID", + count_col="OfPenAge_B01ID", + values=[1], + value_names=["num_pension_age_nts"], +) + +nts_pensioners.head() + +# join onto the nts household df +nts_households = nts_households.merge( + nts_pensioners, left_on="HouseholdID", right_index=True, how="left" +) + + +# #### Number of cars +# +# - `SPC.num_cars` only has values [0, 1, 2]. 2 is for all households with 2 or more cars +# - `NTS.NumCar` is more detailed. It has the actual value of the number of cars. We will cap this at 2. + +# Create a new column in NTS +nts_households.loc[:, "NumCar_SPC_match"] = nts_households["NumCar"].apply( + truncate_values, upper=2 +) + +nts_households[["NumCar", "NumCar_SPC_match"]].head(20) + + +# #### Type of tenancy +# +# Breakdown between NTS and SPC is different. + +dict_nts["Ten1_B02ID"], dict_spc["tenure"] + + +# Create dictionaries to map tenure onto the spc and nts dfs + +# Dictionary showing how we want the final columns to look like +tenure_dict_nts_spc = { + 1: "Owned", + 2: "Rented or rent free", + -8: "NA", + -9: "DNA", + -10: "DEAD", +} + +# Matching NTS to tenure_dict_nts_spc + +# Create a new dictionary for matching +matching_dict_nts_tenure = {1: 1, 2: 2, 3: 2} + +matching_dict_spc_tenure = { + 1: 1, #'Owned: Owned outright' : 'Owned' + 2: 1, #'Owned: Owned with a mortgage or loan or shared ownership', : 'Owned' + 3: 2, #'Rented or living rent free: Total', : 'Rented or rent free' + 4: 2, #'Rented: Social rented', : 'Rented or rent free' + 5: 2, #'Rented: Private rented or living rent free', : 'Rented or rent free' +} + + +# map dictionaries to create comparable columns + +# Create a new column in nts_households +nts_households["tenure_nts_for_matching"] = ( + nts_households["Ten1_B02ID"] + .map(matching_dict_nts_tenure) # map the values to the new dictionary + .fillna(nts_households["Ten1_B02ID"]) +) # fill the NaNs with the original values + +# Create a new column in spc +spc_edited["tenure_spc_for_matching"] = ( + spc_edited["tenure"] + .map(matching_dict_spc_tenure) # map the values to the new dictionary + .fillna(spc_edited["tenure"]) +) # fill the NaNs with the original values + + +# ## Step 3: Matching at Household Level +# +# Now that we've prepared all the columns, we can start matching. + +# ### 3.1 Categorical matching +# +# We will match on (a subset of) the following columns: +# +# | Matching variable | NTS column | SPC column | +# | ------------------| ---------- | ---------- | +# | Household income | `HHIncome2002_BO2ID` | `salary_yearly_hh_cat` | +# | Number of adults | `HHoldNumAdults` | `num_adults` | +# | Number of children | `HHoldNumChildren` | `num_children` | +# | Employment status | `HHoldEmploy_B01ID` | `pwkstat_NTS_match` | +# | Car ownership | `NumCar_SPC_match` | `num_cars` | +# | Type of tenancy | `tenure_nts_for_matching` | `tenure_spc_for_matching` | +# | Rural/Urban Classification | `Settlement2011EW_B03ID` | `Settlement2011EW_B03ID_spc_CD` | + +# Prepare SPC df for matching + +# Select multiple columns +spc_matching = spc_edited[ + [ + "hid", + "salary_yearly_hh_cat", + "num_adults", + "num_children", + "num_pension_age", + "pwkstat_NTS_match", + "num_cars", + "tenure_spc_for_matching", + "Settlement2011EW_B03ID_spc_CD", + "Settlement2011EW_B04ID_spc_CD", + ] +] + +# edit the df so that we have one row per hid +spc_matching = spc_matching.drop_duplicates(subset="hid") + +spc_matching.head(10) + + +# Prepare NTS df for matching + +nts_matching = nts_households[ + [ + "HouseholdID", + "HHIncome2002_B02ID", + "HHoldNumAdults", + "HHoldNumChildren", + "num_pension_age_nts", + "HHoldEmploy_B01ID", + "NumCar_SPC_match", + "tenure_nts_for_matching", + "Settlement2011EW_B03ID", + "Settlement2011EW_B04ID", + ] +] + +nts_matching.head(10) + + +# Dictionary of matching columns. We extract column names from this dictioary when matching on a subset of the columns + +# column_names (keys) for the dictionary +matching_ids = [ + "household_id", + "yearly_income", + "number_adults", + "number_children", + "num_pension_age", + "employment_status", + "number_cars", + "tenure_status", + "rural_urban_2_categories", + "rural_urban_4_categories", +] + +# i want the value to be a list with spc_matching and nts_matching +matching_dfs_dict = { + column_name: [spc_value, nts_value] + for column_name, spc_value, nts_value in zip( + matching_ids, spc_matching, nts_matching + ) +} + + +# Attempt 1: Match on all possible columns + +# columns for matching +keys = [ + "yearly_income", + "number_adults", + "number_children", + "num_pension_age", + "employment_status", + "number_cars", + "tenure_status", + "rural_urban_2_categories", +] + + +spc_cols = [matching_dfs_dict[key][0] for key in keys] +nts_cols = [matching_dfs_dict[key][1] for key in keys] + +# match +spc_nts_1 = spc_matching.merge( + nts_matching, left_on=spc_cols, right_on=nts_cols, how="left" +) + +# Calculate how many rows from nts_matching are matched onto each hid in spc_matching, +spc_nts_1["count"] = spc_nts_1.groupby("hid")["HouseholdID"].transform("count") + +spc_nts_1_hist = spc_nts_1.drop_duplicates(subset="hid") + + +# plot a histogram of the counts and label the axis and title +plt.hist(spc_nts_1_hist["count"], bins=50) +plt.xlabel("Number of matches per household") +plt.ylabel("Number of households") +plt.title("Categorical Matching") + +print( + spc_nts_1_hist[spc_nts_1_hist["count"] == 0].shape[0], + "households in the SPC had no match", +) +print( + round( + ( + spc_nts_1_hist[spc_nts_1_hist["count"] == 0].shape[0] + / spc_matching["hid"].unique().shape[0] + ) + * 100, + 1, + ), + "% of households in the SPC had no match", +) + + +# calculate matching coverage for all columns + +match_coverage_1 = { + key: match_coverage_col( + data=spc_nts_1, id_x="hid", id_y="HouseholdID", column=matching_dfs_dict[key][0] + ) + for key in matching_dfs_dict +} + +# extract any df from the list +match_coverage_1["number_children"] + + +# Attempt 2: Match on a subset of columns (exclude salary) + +# columns for matching +keys = [ + "number_adults", + "number_children", + "num_pension_age", + "employment_status", + "number_cars", + "tenure_status", + "rural_urban_2_categories", +] +# extract equivalent column names from dictionary +spc_cols = [matching_dfs_dict[key][0] for key in keys] +nts_cols = [matching_dfs_dict[key][1] for key in keys] + +# match +spc_nts_2 = spc_matching.merge( + nts_matching, left_on=spc_cols, right_on=nts_cols, how="left" +) + +# Calculate how many rows from nts_matching are matched onto each hid in spc_matching, +spc_nts_2["count"] = spc_nts_2.groupby("hid")["HouseholdID"].transform("count") + +spc_nts_2_hist = spc_nts_2.drop_duplicates(subset="hid") + + +# plot a histogram of the counts and label the axis and title +plt.hist(spc_nts_2_hist["count"], bins=50) +plt.xlabel("Number of matches per household") +plt.ylabel("Number of households") +plt.title("Categorical Matching") + + +print( + spc_nts_2_hist[spc_nts_2_hist["count"] == 0].shape[0], + "households in the SPC had no match", +) +print( + round( + ( + spc_nts_2_hist[spc_nts_2_hist["count"] == 0].shape[0] + / spc_matching["hid"].unique().shape[0] + ) + * 100, + 1, + ), + "% of households in the SPC had no match", +) + + +# calculate matching coverage for all columns + +match_coverage_2 = { + key: match_coverage_col( + data=spc_nts_2, id_x="hid", id_y="HouseholdID", column=matching_dfs_dict[key][0] + ) + for key in matching_dfs_dict +} + +# extract any df from the list +# match_coverage_2['number_cars'] + + +# Attempt 3: Match on a subset of columns (exclude salary and tenure) + +# columns for matching +keys = [ + "number_adults", + "number_children", + "num_pension_age", + "employment_status", + "number_cars", + "rural_urban_2_categories", +] +# extract equivalent column names from dictionary +spc_cols = [matching_dfs_dict[key][0] for key in keys] +nts_cols = [matching_dfs_dict[key][1] for key in keys] + +# match +spc_nts_3 = spc_matching.merge( + nts_matching, left_on=spc_cols, right_on=nts_cols, how="left" +) + +# Calculate how many rows from nts_matching are matched onto each hid in spc_matching, +spc_nts_3["count"] = spc_nts_3.groupby("hid")["HouseholdID"].transform("count") + +spc_nts_3_hist = spc_nts_3.drop_duplicates(subset="hid") + + +# plot a histogram of the counts and label the axis and title +plt.hist(spc_nts_3_hist["count"], bins=50) +plt.xlabel("Number of matches per household") +plt.ylabel("Number of households") +plt.title("Categorical Matching") + + +print( + spc_nts_3_hist[spc_nts_3_hist["count"] == 0].shape[0], + "households in the SPC had no match", +) +print( + round( + ( + spc_nts_3_hist[spc_nts_3_hist["count"] == 0].shape[0] + / spc_matching["hid"].unique().shape[0] + ) + * 100, + 1, + ), + "% of households in the SPC had no match", +) + + +# calculate matching coverage for all columns + +match_coverage_3 = { + key: match_coverage_col( + data=spc_nts_3, id_x="hid", id_y="HouseholdID", column=matching_dfs_dict[key][0] + ) + for key in matching_dfs_dict +} + +# extract any df from the list +# match_coverage_2['number_cars'] + + +# Attempt 4: Match on a subset of columns (exclude salary, tenure, and employment status) + +# columns for matching +keys = [ + "number_adults", + "number_children", + "num_pension_age", + "number_cars", + "rural_urban_2_categories", +] +# extract equivalent column names from dictionary +spc_cols = [matching_dfs_dict[key][0] for key in keys] +nts_cols = [matching_dfs_dict[key][1] for key in keys] + +# matc +spc_nts_4 = spc_matching.merge( + nts_matching, left_on=spc_cols, right_on=nts_cols, how="left" +) + +# Calculate how many rows from nts_matching are matched onto each hid in spc_matching, +spc_nts_4["count"] = spc_nts_4.groupby("hid")["HouseholdID"].transform("count") + +spc_nts_4_hist = spc_nts_4.drop_duplicates(subset="hid") + + +# plot a histogram of the counts and label the axis and title +plt.hist(spc_nts_4_hist["count"], bins=50) +plt.xlabel("Number of matches per household") +plt.ylabel("Number of households") +plt.title("Categorical Matching") + + +print( + spc_nts_4_hist[spc_nts_4_hist["count"] == 0].shape[0], + "households in the SPC had no match", +) +print( + round( + ( + spc_nts_4_hist[spc_nts_4_hist["count"] == 0].shape[0] + / spc_matching["hid"].unique().shape[0] + ) + * 100, + 1, + ), + "% of households in the SPC had no match", +) + + +# calculate matching coverage for all columns + +match_coverage_4 = { + key: match_coverage_col( + data=spc_nts_4, id_x="hid", id_y="HouseholdID", column=matching_dfs_dict[key][0] + ) + for key in matching_dfs_dict +} + +# extract any df from the list +# match_coverage_2['number_cars'] + + +# Removing salary has a significant impact on matching + +print(spc_matching["hid"].nunique(), "Total households in SPC") + +# Attempt 1 +print( + spc_nts_1_hist[spc_nts_1_hist["count"] == 0].shape[0], + "Unmatched households - matching on all categories", +) +# Attempt 2 +print( + spc_nts_2_hist[spc_nts_2_hist["count"] == 0].shape[0], + "Unmatched households - exclusing Salary from matching", +) +# Attempt 3 +print( + spc_nts_3_hist[spc_nts_3_hist["count"] == 0].shape[0], + "Unmatched households - exclusing Salary and Tenure from matching", +) +# Attempt 4 +print( + spc_nts_4_hist[spc_nts_4_hist["count"] == 0].shape[0], + "Unmatched households - exclusing Salary, Tenure and Employment status from matching", +) + + +# Plot matching coverage for each attempt + variable (key) combination +# +# This will show us, for each matching key, the % of spc households from each unique category that were matched to the NTS + +# Make plots path +os.makedirs("../data/interim/matching/plots/", exist_ok=True) + +# loop over all variables in matching_dfs_dict and save a plot for each +for key in list(matching_dfs_dict.keys())[1:]: # skip 1st key (hid) + x = ( + match_coverage_1[key] + .merge( + match_coverage_2[key], on=matching_dfs_dict[key][0], suffixes=("_1", "_2") + ) + .merge( + match_coverage_3[key], on=matching_dfs_dict[key][0], suffixes=("_2", "_3") + ) + .merge( + match_coverage_4[key], on=matching_dfs_dict[key][0], suffixes=("_3", "_4") + ) + ) + # keep % columns only + x = x[[col for col in x.columns if "Percentage" in col]] + # plot bar chart of Percentage of households matched for each category + fig, ax = plt.subplots(1, 1, figsize=(12, 6)) + x.plot(kind="bar", ax=ax) + plt.ylabel("% of households matched") + plt.title("Matching coverage for " + key) + plt.show() + # save the plot + fig.savefig(f"../data/interim/matching/plots/matching_coverage_hh_{key}.png") + + +# Plot matching coverage for each attempt + variable (key) combination +# +# This will show us, for each matching key, the % of spc households from each unique category that were matched to the NTS + +# loop over all variables in matching_dfs_dict and save a plot for each +for key in list(matching_dfs_dict.keys())[1:]: # skip 1st key (hid) + x = ( + match_coverage_1[key] + .merge( + match_coverage_2[key], on=matching_dfs_dict[key][0], suffixes=("_1", "_2") + ) + .merge( + match_coverage_3[key], on=matching_dfs_dict[key][0], suffixes=("_2", "_3") + ) + .merge( + match_coverage_4[key], on=matching_dfs_dict[key][0], suffixes=("_3", "_4") + ) + ) + # keep % columns only + x = x[[col for col in x.columns if "Percentage" in col]] + # plot bar chart of Percentage of households matched for each category + fig, ax = plt.subplots(1, 1, figsize=(12, 6)) + x.plot(kind="bar", ax=ax) + plt.ylabel("% of households matched") + plt.title("Matching coverage for " + key) + plt.show() + # save the plot + fig.savefig(f"../data/interim/matching/plots/matching_coverage_hh_{key}.png") + + +# #### Treat different households differently +# +# Salary is a useful matching variable, so it's a shame not to use it all. We can try to: +# - match on salary for households with 0 pensioners +# - match without salary for households with one or more pensioners + +# match on different subset of column depending on yearly_income value +keys = [ + "yearly_income", + "number_adults", + "number_children", + "num_pension_age", + "employment_status", + "number_cars", + "tenure_status", + "rural_urban_2_categories", +] +# remove yearly income from the list +# new list without yearly income, without modifying the original list +keys_no_salary = keys.copy() +keys_no_salary.remove("yearly_income") + + +#### ------ Split the two datasets into households with no salary and households with a salary + +# get spc column name that matches yearly_income in matching_dfs_dict +spc_col = matching_dfs_dict["num_pension_age"][0] +nts_col = matching_dfs_dict["num_pension_age"][1] + +# dfs: households with no salary +spc_matching_no_salary = spc_matching[spc_matching[spc_col] > 0] +nts_matching_no_salary = nts_matching[nts_matching[nts_col] > 0] + +# dfs: households with a salary +spc_matching_salary = spc_matching[spc_matching[spc_col] != 0] +nts_matching_salary = nts_matching[nts_matching[nts_col] != 0] + + +#### ------ Match the two datasets separately + +# extract equivalent column names from dictionary +spc_cols = [matching_dfs_dict[key][0] for key in keys] +nts_cols = [matching_dfs_dict[key][1] for key in keys] + +# extract equivalent column names from dictionary +spc_cols_no_salary = [matching_dfs_dict[key][0] for key in keys_no_salary] +nts_cols_no_salary = [matching_dfs_dict[key][1] for key in keys_no_salary] + +# match +spc_nts_no_salary = spc_matching_no_salary.merge( + nts_matching_no_salary, + left_on=spc_cols_no_salary, + right_on=nts_cols_no_salary, + how="left", +) + +spc_nts_salary = spc_matching_salary.merge( + nts_matching_salary, left_on=spc_cols, right_on=nts_cols, how="left" +) + +# bind the rows of the two dataframes +spc_nts_x = pd.concat([spc_nts_no_salary, spc_nts_salary]) + + +# Calculate how many rows from nts_matching are matched onto each hid in spc_matching, +spc_nts_x["count"] = spc_nts_x.groupby("hid")["HouseholdID"].transform("count") + +spc_nts_x_hist = spc_nts_x.drop_duplicates(subset="hid") + + +# plot a histogram of the counts and label the axis and title +plt.hist(spc_nts_x_hist["count"], bins=50) +plt.xlabel("Number of matches per household") +plt.ylabel("Number of households") +plt.title("Categorical Matching") + + +print( + spc_nts_x_hist[spc_nts_x_hist["count"] == 0].shape[0], + "households in the SPC had no match", +) +print( + round( + ( + spc_nts_x_hist[spc_nts_x_hist["count"] == 0].shape[0] + / spc_matching["hid"].unique().shape[0] + ) + * 100, + 1, + ), + "% of households in the SPC had no match", +) From 6565c731c15c6ac9086f79fe2fefcef196462a42 Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Mon, 29 Apr 2024 11:59:21 +0100 Subject: [PATCH 08/16] Remove plt.show() from script --- scripts/2.1_sandbox-match_households.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/scripts/2.1_sandbox-match_households.py b/scripts/2.1_sandbox-match_households.py index ed56621..d6d8941 100644 --- a/scripts/2.1_sandbox-match_households.py +++ b/scripts/2.1_sandbox-match_households.py @@ -362,7 +362,6 @@ ax[1].hist(spc_edited["salary_yearly_hh"].fillna(0), bins=30) ax[1].set_title("Salary yearly (Households)") ax[1].set_xlabel("Salary yearly") -plt.show() # statistics @@ -449,7 +448,6 @@ ) ax[1].set_title("NTS") ax[1].set_xlabel("Income Bracket - Household level") -plt.show() # same as above but (%) fig, ax = plt.subplots(1, 2, figsize=(12, 6), sharey=True) @@ -466,7 +464,6 @@ ) ax[1].set_title("NTS") ax[1].set_xlabel("Income Bracket - Household level") -plt.show() # get the % of households in each income bracket for the nts @@ -577,7 +574,7 @@ ) ax[1].set_title("NTS") ax[1].set_xlabel("Employment status - Household level") -plt.show() + # same as above but percentages fig, ax = plt.subplots(1, 2, figsize=(12, 6)) @@ -594,7 +591,6 @@ ) ax[1].set_title("NTS") ax[1].set_xlabel("Employment status - Household level") -plt.show() # #### Urban Rural Classification @@ -1168,7 +1164,7 @@ x.plot(kind="bar", ax=ax) plt.ylabel("% of households matched") plt.title("Matching coverage for " + key) - plt.show() + # save the plot fig.savefig(f"../data/interim/matching/plots/matching_coverage_hh_{key}.png") @@ -1198,7 +1194,7 @@ x.plot(kind="bar", ax=ax) plt.ylabel("% of households matched") plt.title("Matching coverage for " + key) - plt.show() + # save the plot fig.savefig(f"../data/interim/matching/plots/matching_coverage_hh_{key}.png") From 841617f035ed8b45c2992610fc4298aff76330ff Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Mon, 29 Apr 2024 15:24:43 +0100 Subject: [PATCH 09/16] Add poetry lock --- poetry.lock | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/poetry.lock b/poetry.lock index 5a2799e..c0a1786 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "appnope" @@ -1807,6 +1807,26 @@ files = [ {file = "tornado-6.4.tar.gz", hash = "sha256:72291fa6e6bc84e626589f1c29d90a5a6d593ef5ae68052ee2ef000dfd273dee"}, ] +[[package]] +name = "tqdm" +version = "4.66.2" +description = "Fast, Extensible Progress Meter" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tqdm-4.66.2-py3-none-any.whl", hash = "sha256:1ee4f8a893eb9bef51c6e35730cebf234d5d0b6bd112b0271e10ed7c24a02bd9"}, + {file = "tqdm-4.66.2.tar.gz", hash = "sha256:6cd52cdf0fef0e0f543299cfc96fec90d7b8a7e88745f411ec33eb44d5ed3531"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + [[package]] name = "traitlets" version = "5.14.2" @@ -1890,4 +1910,4 @@ test = ["pytest", "pytest-cov"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "aeac81dea1a17779ab21da9da54949996f4d0623aee12d807c3df20ab32acbd2" +content-hash = "c3ab3a63fee72a60d3726ba3ca220c34d74c856f40d82d677e6ed1661b0ddc68" From 77b660247255734d3d7543d845eb5b0e90e65888 Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Tue, 30 Apr 2024 15:12:45 +0100 Subject: [PATCH 10/16] Remove CD workflow --- .github/workflows/cd.yml | 53 ---------------------------------------- 1 file changed, 53 deletions(-) delete mode 100644 .github/workflows/cd.yml diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml deleted file mode 100644 index 18d6dad..0000000 --- a/.github/workflows/cd.yml +++ /dev/null @@ -1,53 +0,0 @@ -name: CD - -on: - workflow_dispatch: - pull_request: - push: - branches: - - main - release: - types: - - published - -jobs: - dist: - needs: [pre-commit] - name: Distribution build - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Build sdist and wheel - run: pipx run build - - - uses: actions/upload-artifact@v4 - with: - path: dist - - - name: Check products - run: pipx run twine check dist/* - - publish: - needs: [dist] - name: Publish to PyPI - environment: pypi - permissions: - id-token: write - runs-on: ubuntu-latest - if: github.event_name == 'release' && github.event.action == 'published' - - steps: - - uses: actions/download-artifact@v4 - with: - name: artifact - path: dist - - - uses: pypa/gh-action-pypi-publish@release/v1 - if: github.event_name == 'release' && github.event.action == 'published' - with: - # Remove this line to publish to PyPI - repository-url: https://test.pypi.org/legacy/ From d2f9e747c3d55148316661b13b1650fac4a5a4ad Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Thu, 2 May 2024 21:35:31 +0100 Subject: [PATCH 11/16] Fix notebooks for pre-commit --- notebooks/1_prep_synthpop.ipynb | 3 --- notebooks/2_match_households_and_individuals.ipynb | 12 ++++++------ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/notebooks/1_prep_synthpop.ipynb b/notebooks/1_prep_synthpop.ipynb index 241f07a..4809592 100644 --- a/notebooks/1_prep_synthpop.ipynb +++ b/notebooks/1_prep_synthpop.ipynb @@ -20,9 +20,6 @@ "metadata": {}, "outputs": [], "source": [ - "#import json\n", - "import pandas as pd\n", - "\n", "#https://github.com/alan-turing-institute/uatk-spc/blob/55-output-formats-python/python/examples/spc_builder_example.ipynb\n", "from uatk_spc.builder import Builder" ] diff --git a/notebooks/2_match_households_and_individuals.ipynb b/notebooks/2_match_households_and_individuals.ipynb index 5eb0f9b..d21fd39 100644 --- a/notebooks/2_match_households_and_individuals.ipynb +++ b/notebooks/2_match_households_and_individuals.ipynb @@ -6,14 +6,14 @@ "source": [ "# Adding activity chains to synthetic populations \n", "\n", - "The purpose of this script is to match each individual in the synthetic population to a respondant from the [National Travel Survey (NTS)](https://beta.ukdataservice.ac.uk/datacatalogue/studies/study?id=5340). \n", + "The purpose of this script is to match each individual in the synthetic population to a respondant from the [National Travel Survey (NTS)](https://beta.ukdataservice.ac.uk/datacatalogue/studies/study?id=5340).\n", "\n", "### Methods\n", "\n", "We will try two methods\n", "\n", "1. categorical matching: joining on relevant socio-demographic variables\n", - "2. statistical matching, as described in [An unconstrained statistical matching algorithm for combining individual and household level geo-specific census and survey data](https://doi.org/10.1016/j.compenvurbsys.2016.11.003). " + "2. statistical matching, as described in [An unconstrained statistical matching algorithm for combining individual and household level geo-specific census and survey data](https://doi.org/10.1016/j.compenvurbsys.2016.11.003)." ] }, { @@ -28,7 +28,7 @@ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", - "from tqdm import tqdm, trange\n", + "from tqdm import trange\n", "\n", "# from tqdm.notebook import trange\n", "from acbm.matching import match_categorical, match_individuals\n", @@ -3664,10 +3664,10 @@ "\n", "'''\n", "- iterate over each key-value pair in the matches_hh_result dictionary.\n", - "- For each key-value pair, use np.random.choice(value) to randomly select \n", + "- For each key-value pair, use np.random.choice(value) to randomly select\n", "one item from the list of values associated with the current key.\n", - "- create a new dictionary hid_to_HouseholdID_sample where each key from the \n", - "original dictionary is associated with one randomly selected value from the \n", + "- create a new dictionary hid_to_HouseholdID_sample where each key from the\n", + "original dictionary is associated with one randomly selected value from the\n", "original list of values.\n", "\n", "'''\n", From 78f064720b9c66bc4cadbcd028155f834051e6c7 Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Thu, 2 May 2024 21:51:26 +0100 Subject: [PATCH 12/16] Fix preprocessing for pre-commit --- src/acbm/preprocessing.py | 41 ++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/src/acbm/preprocessing.py b/src/acbm/preprocessing.py index 77caceb..e707f5c 100644 --- a/src/acbm/preprocessing.py +++ b/src/acbm/preprocessing.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np import pandas as pd @@ -24,7 +26,7 @@ def nts_filter_by_year( if not set(years).issubset(unique_years): # If not, print the years that do exist and stop execution print( - f"At least one of the chosen year(s) do not exist in the PSU table. Years that exist in the PSU table are: {sorted(list(unique_years))}" + f"At least one of the chosen year(s) do not exist in the PSU table. Years that exist in the PSU table are: {sorted(unique_years)}" ) return None @@ -32,9 +34,7 @@ def nts_filter_by_year( psu_id_years = psu[psu["SurveyYear"].isin(years)]["PSUID"].unique() # Filter 'data' based on the chosen year - data_years = data[data["PSUID"].isin(psu_id_years)] - - return data_years + return data[data["PSUID"].isin(psu_id_years)] def nts_filter_by_region( @@ -80,7 +80,7 @@ def nts_filter_by_region( if not set(regions).issubset(unique_regions): # If not, print the years that do exist and stop execution print( - f"At least one of the chosen region(s) do not exist in the PSU table. Regions that exist in the PSU table are: {sorted(list(unique_regions))}" + f"At least one of the chosen region(s) do not exist in the PSU table. Regions that exist in the PSU table are: {sorted(unique_regions)}" ) return None @@ -89,9 +89,7 @@ def nts_filter_by_region( # Get the 'PSUID' values for the chosen year(s) psu_id_regions = psu[psu["region_name"].isin(regions)]["PSUID"].unique() # Filter 'data' based on the chosen year - data_regions = data[data["PSUID"].isin(psu_id_regions)] - - return data_regions + return data[data["PSUID"].isin(psu_id_regions)] def transform_by_group( @@ -122,8 +120,10 @@ def transform_by_group( try: data_copy[transform_col] = pd.to_numeric(data_copy[transform_col]) # if transformation fails, return the original data_copy - except: - print(f"The column '{transform_col}' could not be transformed to numeric") + except Exception as e: + print( + f"The column '{transform_col}' could not be transformed to numeric with exception: {e}" + ) return data_copy # Group the data by 'group_col' and apply the 'transformation_type' to the 'transform_col' for each group. # The result is stored in a new column called 'new_col' @@ -153,7 +153,7 @@ def num_adult_child_hh( data: pandas DataFrame The original dataframe with these new columns: is'adult', 'num_adults', 'is_child', 'num_children', 'is_pension_age', 'num_pension_age' """ - data = data.assign( + return data.assign( is_adult=(data[age_col] >= 16).astype(int), num_adults=lambda df: df.groupby(group_col)["is_adult"].transform("sum"), is_child=(data[age_col] < 16).astype(int), @@ -164,8 +164,6 @@ def num_adult_child_hh( ), ) - return data - def count_per_group( df: pd.DataFrame, group_col: str, count_col: str, values: list, value_names: list @@ -210,7 +208,9 @@ def count_per_group( return result -def truncate_values(x: int, lower: int = None, upper: int = None) -> int: +def truncate_values( + x: int, lower: Optional[int] = None, upper: Optional[int] = None +) -> int: """ Limit the value of x to the range [lower, upper] @@ -228,12 +228,10 @@ def truncate_values(x: int, lower: int = None, upper: int = None) -> int: int The value of x, limited to the range [lower, upper] """ - if upper is not None: - if x > upper: - return upper - if lower is not None: - if x < lower: - return lower + if upper is not None and x > upper: + return upper + if lower is not None and x < lower: + return lower return x @@ -292,9 +290,8 @@ def match_coverage_col( percentage_matched = round(matched / total * 100) # combined total, matched in one df - total_matched = pd.concat( + return pd.concat( [total, matched, percentage_matched], axis=1, keys=["Total", "Matched", "Percentage Matched"], ) - return total_matched From 1d3c1f5f611bc8aa9e0413694ae246ca77709b67 Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Thu, 2 May 2024 22:01:57 +0100 Subject: [PATCH 13/16] Update lock and python versions in CI --- .github/workflows/ci.yml | 2 +- poetry.lock | 877 ++++++++++++++++++++------------------- 2 files changed, 443 insertions(+), 436 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 95ae343..5908888 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,7 +29,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.10, 3.12] # test oldest and latest supported versions + python-version: ['3.10', '3.11', '3.12'] # test oldest and latest supported versions runs-on: [ubuntu-latest] # can be extended to other OSes, e.g. [ubuntu-latest, macos-latest] steps: diff --git a/poetry.lock b/poetry.lock index c0a1786..c87e52f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -264,63 +264,63 @@ test-no-images = ["pytest", "pytest-cov", "pytest-xdist", "wurlitzer"] [[package]] name = "coverage" -version = "7.4.1" +version = "7.5.0" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.8" files = [ - {file = "coverage-7.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:077d366e724f24fc02dbfe9d946534357fda71af9764ff99d73c3c596001bbd7"}, - {file = "coverage-7.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0193657651f5399d433c92f8ae264aff31fc1d066deee4b831549526433f3f61"}, - {file = "coverage-7.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d17bbc946f52ca67adf72a5ee783cd7cd3477f8f8796f59b4974a9b59cacc9ee"}, - {file = "coverage-7.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a3277f5fa7483c927fe3a7b017b39351610265308f5267ac6d4c2b64cc1d8d25"}, - {file = "coverage-7.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6dceb61d40cbfcf45f51e59933c784a50846dc03211054bd76b421a713dcdf19"}, - {file = "coverage-7.4.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6008adeca04a445ea6ef31b2cbaf1d01d02986047606f7da266629afee982630"}, - {file = "coverage-7.4.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c61f66d93d712f6e03369b6a7769233bfda880b12f417eefdd4f16d1deb2fc4c"}, - {file = "coverage-7.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b9bb62fac84d5f2ff523304e59e5c439955fb3b7f44e3d7b2085184db74d733b"}, - {file = "coverage-7.4.1-cp310-cp310-win32.whl", hash = "sha256:f86f368e1c7ce897bf2457b9eb61169a44e2ef797099fb5728482b8d69f3f016"}, - {file = "coverage-7.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:869b5046d41abfea3e381dd143407b0d29b8282a904a19cb908fa24d090cc018"}, - {file = "coverage-7.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b8ffb498a83d7e0305968289441914154fb0ef5d8b3157df02a90c6695978295"}, - {file = "coverage-7.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3cacfaefe6089d477264001f90f55b7881ba615953414999c46cc9713ff93c8c"}, - {file = "coverage-7.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d6850e6e36e332d5511a48a251790ddc545e16e8beaf046c03985c69ccb2676"}, - {file = "coverage-7.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18e961aa13b6d47f758cc5879383d27b5b3f3dcd9ce8cdbfdc2571fe86feb4dd"}, - {file = "coverage-7.4.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dfd1e1b9f0898817babf840b77ce9fe655ecbe8b1b327983df485b30df8cc011"}, - {file = "coverage-7.4.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6b00e21f86598b6330f0019b40fb397e705135040dbedc2ca9a93c7441178e74"}, - {file = "coverage-7.4.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:536d609c6963c50055bab766d9951b6c394759190d03311f3e9fcf194ca909e1"}, - {file = "coverage-7.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7ac8f8eb153724f84885a1374999b7e45734bf93a87d8df1e7ce2146860edef6"}, - {file = "coverage-7.4.1-cp311-cp311-win32.whl", hash = "sha256:f3771b23bb3675a06f5d885c3630b1d01ea6cac9e84a01aaf5508706dba546c5"}, - {file = "coverage-7.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:9d2f9d4cc2a53b38cabc2d6d80f7f9b7e3da26b2f53d48f05876fef7956b6968"}, - {file = "coverage-7.4.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f68ef3660677e6624c8cace943e4765545f8191313a07288a53d3da188bd8581"}, - {file = "coverage-7.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:23b27b8a698e749b61809fb637eb98ebf0e505710ec46a8aa6f1be7dc0dc43a6"}, - {file = "coverage-7.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e3424c554391dc9ef4a92ad28665756566a28fecf47308f91841f6c49288e66"}, - {file = "coverage-7.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e0860a348bf7004c812c8368d1fc7f77fe8e4c095d661a579196a9533778e156"}, - {file = "coverage-7.4.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe558371c1bdf3b8fa03e097c523fb9645b8730399c14fe7721ee9c9e2a545d3"}, - {file = "coverage-7.4.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3468cc8720402af37b6c6e7e2a9cdb9f6c16c728638a2ebc768ba1ef6f26c3a1"}, - {file = "coverage-7.4.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:02f2edb575d62172aa28fe00efe821ae31f25dc3d589055b3fb64d51e52e4ab1"}, - {file = "coverage-7.4.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ca6e61dc52f601d1d224526360cdeab0d0712ec104a2ce6cc5ccef6ed9a233bc"}, - {file = "coverage-7.4.1-cp312-cp312-win32.whl", hash = "sha256:ca7b26a5e456a843b9b6683eada193fc1f65c761b3a473941efe5a291f604c74"}, - {file = "coverage-7.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:85ccc5fa54c2ed64bd91ed3b4a627b9cce04646a659512a051fa82a92c04a448"}, - {file = "coverage-7.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8bdb0285a0202888d19ec6b6d23d5990410decb932b709f2b0dfe216d031d218"}, - {file = "coverage-7.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:918440dea04521f499721c039863ef95433314b1db00ff826a02580c1f503e45"}, - {file = "coverage-7.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:379d4c7abad5afbe9d88cc31ea8ca262296480a86af945b08214eb1a556a3e4d"}, - {file = "coverage-7.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b094116f0b6155e36a304ff912f89bbb5067157aff5f94060ff20bbabdc8da06"}, - {file = "coverage-7.4.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2f5968608b1fe2a1d00d01ad1017ee27efd99b3437e08b83ded9b7af3f6f766"}, - {file = "coverage-7.4.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:10e88e7f41e6197ea0429ae18f21ff521d4f4490aa33048f6c6f94c6045a6a75"}, - {file = "coverage-7.4.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a4a3907011d39dbc3e37bdc5df0a8c93853c369039b59efa33a7b6669de04c60"}, - {file = "coverage-7.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6d224f0c4c9c98290a6990259073f496fcec1b5cc613eecbd22786d398ded3ad"}, - {file = "coverage-7.4.1-cp38-cp38-win32.whl", hash = "sha256:23f5881362dcb0e1a92b84b3c2809bdc90db892332daab81ad8f642d8ed55042"}, - {file = "coverage-7.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:a07f61fc452c43cd5328b392e52555f7d1952400a1ad09086c4a8addccbd138d"}, - {file = "coverage-7.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8e738a492b6221f8dcf281b67129510835461132b03024830ac0e554311a5c54"}, - {file = "coverage-7.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:46342fed0fff72efcda77040b14728049200cbba1279e0bf1188f1f2078c1d70"}, - {file = "coverage-7.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9641e21670c68c7e57d2053ddf6c443e4f0a6e18e547e86af3fad0795414a628"}, - {file = "coverage-7.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aeb2c2688ed93b027eb0d26aa188ada34acb22dceea256d76390eea135083950"}, - {file = "coverage-7.4.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d12c923757de24e4e2110cf8832d83a886a4cf215c6e61ed506006872b43a6d1"}, - {file = "coverage-7.4.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0491275c3b9971cdbd28a4595c2cb5838f08036bca31765bad5e17edf900b2c7"}, - {file = "coverage-7.4.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8dfc5e195bbef80aabd81596ef52a1277ee7143fe419efc3c4d8ba2754671756"}, - {file = "coverage-7.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1a78b656a4d12b0490ca72651fe4d9f5e07e3c6461063a9b6265ee45eb2bdd35"}, - {file = "coverage-7.4.1-cp39-cp39-win32.whl", hash = "sha256:f90515974b39f4dea2f27c0959688621b46d96d5a626cf9c53dbc653a895c05c"}, - {file = "coverage-7.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:64e723ca82a84053dd7bfcc986bdb34af8d9da83c521c19d6b472bc6880e191a"}, - {file = "coverage-7.4.1-pp38.pp39.pp310-none-any.whl", hash = "sha256:32a8d985462e37cfdab611a6f95b09d7c091d07668fdc26e47a725ee575fe166"}, - {file = "coverage-7.4.1.tar.gz", hash = "sha256:1ed4b95480952b1a26d863e546fa5094564aa0065e1e5f0d4d0041f293251d04"}, + {file = "coverage-7.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:432949a32c3e3f820af808db1833d6d1631664d53dd3ce487aa25d574e18ad1c"}, + {file = "coverage-7.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2bd7065249703cbeb6d4ce679c734bef0ee69baa7bff9724361ada04a15b7e3b"}, + {file = "coverage-7.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbfe6389c5522b99768a93d89aca52ef92310a96b99782973b9d11e80511f932"}, + {file = "coverage-7.5.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:39793731182c4be939b4be0cdecde074b833f6171313cf53481f869937129ed3"}, + {file = "coverage-7.5.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85a5dbe1ba1bf38d6c63b6d2c42132d45cbee6d9f0c51b52c59aa4afba057517"}, + {file = "coverage-7.5.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:357754dcdfd811462a725e7501a9b4556388e8ecf66e79df6f4b988fa3d0b39a"}, + {file = "coverage-7.5.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a81eb64feded34f40c8986869a2f764f0fe2db58c0530d3a4afbcde50f314880"}, + {file = "coverage-7.5.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:51431d0abbed3a868e967f8257c5faf283d41ec882f58413cf295a389bb22e58"}, + {file = "coverage-7.5.0-cp310-cp310-win32.whl", hash = "sha256:f609ebcb0242d84b7adeee2b06c11a2ddaec5464d21888b2c8255f5fd6a98ae4"}, + {file = "coverage-7.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:6782cd6216fab5a83216cc39f13ebe30adfac2fa72688c5a4d8d180cd52e8f6a"}, + {file = "coverage-7.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e768d870801f68c74c2b669fc909839660180c366501d4cc4b87efd6b0eee375"}, + {file = "coverage-7.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:84921b10aeb2dd453247fd10de22907984eaf80901b578a5cf0bb1e279a587cb"}, + {file = "coverage-7.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:710c62b6e35a9a766b99b15cdc56d5aeda0914edae8bb467e9c355f75d14ee95"}, + {file = "coverage-7.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c379cdd3efc0658e652a14112d51a7668f6bfca7445c5a10dee7eabecabba19d"}, + {file = "coverage-7.5.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fea9d3ca80bcf17edb2c08a4704259dadac196fe5e9274067e7a20511fad1743"}, + {file = "coverage-7.5.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:41327143c5b1d715f5f98a397608f90ab9ebba606ae4e6f3389c2145410c52b1"}, + {file = "coverage-7.5.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:565b2e82d0968c977e0b0f7cbf25fd06d78d4856289abc79694c8edcce6eb2de"}, + {file = "coverage-7.5.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cf3539007202ebfe03923128fedfdd245db5860a36810136ad95a564a2fdffff"}, + {file = "coverage-7.5.0-cp311-cp311-win32.whl", hash = "sha256:bf0b4b8d9caa8d64df838e0f8dcf68fb570c5733b726d1494b87f3da85db3a2d"}, + {file = "coverage-7.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:9c6384cc90e37cfb60435bbbe0488444e54b98700f727f16f64d8bfda0b84656"}, + {file = "coverage-7.5.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fed7a72d54bd52f4aeb6c6e951f363903bd7d70bc1cad64dd1f087980d309ab9"}, + {file = "coverage-7.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cbe6581fcff7c8e262eb574244f81f5faaea539e712a058e6707a9d272fe5b64"}, + {file = "coverage-7.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad97ec0da94b378e593ef532b980c15e377df9b9608c7c6da3506953182398af"}, + {file = "coverage-7.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bd4bacd62aa2f1a1627352fe68885d6ee694bdaebb16038b6e680f2924a9b2cc"}, + {file = "coverage-7.5.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:adf032b6c105881f9d77fa17d9eebe0ad1f9bfb2ad25777811f97c5362aa07f2"}, + {file = "coverage-7.5.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4ba01d9ba112b55bfa4b24808ec431197bb34f09f66f7cb4fd0258ff9d3711b1"}, + {file = "coverage-7.5.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f0bfe42523893c188e9616d853c47685e1c575fe25f737adf473d0405dcfa7eb"}, + {file = "coverage-7.5.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a9a7ef30a1b02547c1b23fa9a5564f03c9982fc71eb2ecb7f98c96d7a0db5cf2"}, + {file = "coverage-7.5.0-cp312-cp312-win32.whl", hash = "sha256:3c2b77f295edb9fcdb6a250f83e6481c679335ca7e6e4a955e4290350f2d22a4"}, + {file = "coverage-7.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:427e1e627b0963ac02d7c8730ca6d935df10280d230508c0ba059505e9233475"}, + {file = "coverage-7.5.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9dd88fce54abbdbf4c42fb1fea0e498973d07816f24c0e27a1ecaf91883ce69e"}, + {file = "coverage-7.5.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a898c11dca8f8c97b467138004a30133974aacd572818c383596f8d5b2eb04a9"}, + {file = "coverage-7.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:07dfdd492d645eea1bd70fb1d6febdcf47db178b0d99161d8e4eed18e7f62fe7"}, + {file = "coverage-7.5.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d3d117890b6eee85887b1eed41eefe2e598ad6e40523d9f94c4c4b213258e4a4"}, + {file = "coverage-7.5.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6afd2e84e7da40fe23ca588379f815fb6dbbb1b757c883935ed11647205111cb"}, + {file = "coverage-7.5.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:a9960dd1891b2ddf13a7fe45339cd59ecee3abb6b8326d8b932d0c5da208104f"}, + {file = "coverage-7.5.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:ced268e82af993d7801a9db2dbc1d2322e786c5dc76295d8e89473d46c6b84d4"}, + {file = "coverage-7.5.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e7c211f25777746d468d76f11719e64acb40eed410d81c26cefac641975beb88"}, + {file = "coverage-7.5.0-cp38-cp38-win32.whl", hash = "sha256:262fffc1f6c1a26125d5d573e1ec379285a3723363f3bd9c83923c9593a2ac25"}, + {file = "coverage-7.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:eed462b4541c540d63ab57b3fc69e7d8c84d5957668854ee4e408b50e92ce26a"}, + {file = "coverage-7.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d0194d654e360b3e6cc9b774e83235bae6b9b2cac3be09040880bb0e8a88f4a1"}, + {file = "coverage-7.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:33c020d3322662e74bc507fb11488773a96894aa82a622c35a5a28673c0c26f5"}, + {file = "coverage-7.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbdf2cae14a06827bec50bd58e49249452d211d9caddd8bd80e35b53cb04631"}, + {file = "coverage-7.5.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3235d7c781232e525b0761730e052388a01548bd7f67d0067a253887c6e8df46"}, + {file = "coverage-7.5.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2de4e546f0ec4b2787d625e0b16b78e99c3e21bc1722b4977c0dddf11ca84e"}, + {file = "coverage-7.5.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4d0e206259b73af35c4ec1319fd04003776e11e859936658cb6ceffdeba0f5be"}, + {file = "coverage-7.5.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:2055c4fb9a6ff624253d432aa471a37202cd8f458c033d6d989be4499aed037b"}, + {file = "coverage-7.5.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:075299460948cd12722a970c7eae43d25d37989da682997687b34ae6b87c0ef0"}, + {file = "coverage-7.5.0-cp39-cp39-win32.whl", hash = "sha256:280132aada3bc2f0fac939a5771db4fbb84f245cb35b94fae4994d4c1f80dae7"}, + {file = "coverage-7.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:c58536f6892559e030e6924896a44098bc1290663ea12532c78cef71d0df8493"}, + {file = "coverage-7.5.0-pp38.pp39.pp310-none-any.whl", hash = "sha256:2b57780b51084d5223eee7b59f0d4911c31c16ee5aa12737c7a02455829ff067"}, + {file = "coverage-7.5.0.tar.gz", hash = "sha256:cf62d17310f34084c59c01e027259076479128d11e4661bb6c9acb38c5e19bb8"}, ] [package.dependencies] @@ -388,13 +388,13 @@ files = [ [[package]] name = "exceptiongroup" -version = "1.2.0" +version = "1.2.1" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" files = [ - {file = "exceptiongroup-1.2.0-py3-none-any.whl", hash = "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14"}, - {file = "exceptiongroup-1.2.0.tar.gz", hash = "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68"}, + {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"}, + {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"}, ] [package.extras] @@ -463,53 +463,53 @@ test = ["fiona[s3]", "pytest (>=7)", "pytest-cov", "pytz"] [[package]] name = "fonttools" -version = "4.50.0" +version = "4.51.0" description = "Tools to manipulate font files" optional = false python-versions = ">=3.8" files = [ - {file = "fonttools-4.50.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:effd303fb422f8ce06543a36ca69148471144c534cc25f30e5be752bc4f46736"}, - {file = "fonttools-4.50.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7913992ab836f621d06aabac118fc258b9947a775a607e1a737eb3a91c360335"}, - {file = "fonttools-4.50.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e0a1c5bd2f63da4043b63888534b52c5a1fd7ae187c8ffc64cbb7ae475b9dab"}, - {file = "fonttools-4.50.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d40fc98540fa5360e7ecf2c56ddf3c6e7dd04929543618fd7b5cc76e66390562"}, - {file = "fonttools-4.50.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9fff65fbb7afe137bac3113827855e0204482727bddd00a806034ab0d3951d0d"}, - {file = "fonttools-4.50.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b1aeae3dd2ee719074a9372c89ad94f7c581903306d76befdaca2a559f802472"}, - {file = "fonttools-4.50.0-cp310-cp310-win32.whl", hash = "sha256:e9623afa319405da33b43c85cceb0585a6f5d3a1d7c604daf4f7e1dd55c03d1f"}, - {file = "fonttools-4.50.0-cp310-cp310-win_amd64.whl", hash = "sha256:778c5f43e7e654ef7fe0605e80894930bc3a7772e2f496238e57218610140f54"}, - {file = "fonttools-4.50.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3dfb102e7f63b78c832e4539969167ffcc0375b013080e6472350965a5fe8048"}, - {file = "fonttools-4.50.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9e58fe34cb379ba3d01d5d319d67dd3ce7ca9a47ad044ea2b22635cd2d1247fc"}, - {file = "fonttools-4.50.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c673ab40d15a442a4e6eb09bf007c1dda47c84ac1e2eecbdf359adacb799c24"}, - {file = "fonttools-4.50.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b3ac35cdcd1a4c90c23a5200212c1bb74fa05833cc7c14291d7043a52ca2aaa"}, - {file = "fonttools-4.50.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8844e7a2c5f7ecf977e82eb6b3014f025c8b454e046d941ece05b768be5847ae"}, - {file = "fonttools-4.50.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f849bd3c5c2249b49c98eca5aaebb920d2bfd92b3c69e84ca9bddf133e9f83f0"}, - {file = "fonttools-4.50.0-cp311-cp311-win32.whl", hash = "sha256:39293ff231b36b035575e81c14626dfc14407a20de5262f9596c2cbb199c3625"}, - {file = "fonttools-4.50.0-cp311-cp311-win_amd64.whl", hash = "sha256:c33d5023523b44d3481624f840c8646656a1def7630ca562f222eb3ead16c438"}, - {file = "fonttools-4.50.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b4a886a6dbe60100ba1cd24de962f8cd18139bd32808da80de1fa9f9f27bf1dc"}, - {file = "fonttools-4.50.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b2ca1837bfbe5eafa11313dbc7edada79052709a1fffa10cea691210af4aa1fa"}, - {file = "fonttools-4.50.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0493dd97ac8977e48ffc1476b932b37c847cbb87fd68673dee5182004906828"}, - {file = "fonttools-4.50.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77844e2f1b0889120b6c222fc49b2b75c3d88b930615e98893b899b9352a27ea"}, - {file = "fonttools-4.50.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3566bfb8c55ed9100afe1ba6f0f12265cd63a1387b9661eb6031a1578a28bad1"}, - {file = "fonttools-4.50.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:35e10ddbc129cf61775d58a14f2d44121178d89874d32cae1eac722e687d9019"}, - {file = "fonttools-4.50.0-cp312-cp312-win32.whl", hash = "sha256:cc8140baf9fa8f9b903f2b393a6c413a220fa990264b215bf48484f3d0bf8710"}, - {file = "fonttools-4.50.0-cp312-cp312-win_amd64.whl", hash = "sha256:0ccc85fd96373ab73c59833b824d7a73846670a0cb1f3afbaee2b2c426a8f931"}, - {file = "fonttools-4.50.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e270a406219af37581d96c810172001ec536e29e5593aa40d4c01cca3e145aa6"}, - {file = "fonttools-4.50.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ac2463de667233372e9e1c7e9de3d914b708437ef52a3199fdbf5a60184f190c"}, - {file = "fonttools-4.50.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47abd6669195abe87c22750dbcd366dc3a0648f1b7c93c2baa97429c4dc1506e"}, - {file = "fonttools-4.50.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:074841375e2e3d559aecc86e1224caf78e8b8417bb391e7d2506412538f21adc"}, - {file = "fonttools-4.50.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0743fd2191ad7ab43d78cd747215b12033ddee24fa1e088605a3efe80d6984de"}, - {file = "fonttools-4.50.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:3d7080cce7be5ed65bee3496f09f79a82865a514863197ff4d4d177389e981b0"}, - {file = "fonttools-4.50.0-cp38-cp38-win32.whl", hash = "sha256:a467ba4e2eadc1d5cc1a11d355abb945f680473fbe30d15617e104c81f483045"}, - {file = "fonttools-4.50.0-cp38-cp38-win_amd64.whl", hash = "sha256:f77e048f805e00870659d6318fd89ef28ca4ee16a22b4c5e1905b735495fc422"}, - {file = "fonttools-4.50.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b6245eafd553c4e9a0708e93be51392bd2288c773523892fbd616d33fd2fda59"}, - {file = "fonttools-4.50.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a4062cc7e8de26f1603323ef3ae2171c9d29c8a9f5e067d555a2813cd5c7a7e0"}, - {file = "fonttools-4.50.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:34692850dfd64ba06af61e5791a441f664cb7d21e7b544e8f385718430e8f8e4"}, - {file = "fonttools-4.50.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:678dd95f26a67e02c50dcb5bf250f95231d455642afbc65a3b0bcdacd4e4dd38"}, - {file = "fonttools-4.50.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4f2ce7b0b295fe64ac0a85aef46a0f2614995774bd7bc643b85679c0283287f9"}, - {file = "fonttools-4.50.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d346f4dc2221bfb7ab652d1e37d327578434ce559baf7113b0f55768437fe6a0"}, - {file = "fonttools-4.50.0-cp39-cp39-win32.whl", hash = "sha256:a51eeaf52ba3afd70bf489be20e52fdfafe6c03d652b02477c6ce23c995222f4"}, - {file = "fonttools-4.50.0-cp39-cp39-win_amd64.whl", hash = "sha256:8639be40d583e5d9da67795aa3eeeda0488fb577a1d42ae11a5036f18fb16d93"}, - {file = "fonttools-4.50.0-py3-none-any.whl", hash = "sha256:48fa36da06247aa8282766cfd63efff1bb24e55f020f29a335939ed3844d20d3"}, - {file = "fonttools-4.50.0.tar.gz", hash = "sha256:fa5cf61058c7dbb104c2ac4e782bf1b2016a8cf2f69de6e4dd6a865d2c969bb5"}, + {file = "fonttools-4.51.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:84d7751f4468dd8cdd03ddada18b8b0857a5beec80bce9f435742abc9a851a74"}, + {file = "fonttools-4.51.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8b4850fa2ef2cfbc1d1f689bc159ef0f45d8d83298c1425838095bf53ef46308"}, + {file = "fonttools-4.51.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5b48a1121117047d82695d276c2af2ee3a24ffe0f502ed581acc2673ecf1037"}, + {file = "fonttools-4.51.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:180194c7fe60c989bb627d7ed5011f2bef1c4d36ecf3ec64daec8302f1ae0716"}, + {file = "fonttools-4.51.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:96a48e137c36be55e68845fc4284533bda2980f8d6f835e26bca79d7e2006438"}, + {file = "fonttools-4.51.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:806e7912c32a657fa39d2d6eb1d3012d35f841387c8fc6cf349ed70b7c340039"}, + {file = "fonttools-4.51.0-cp310-cp310-win32.whl", hash = "sha256:32b17504696f605e9e960647c5f64b35704782a502cc26a37b800b4d69ff3c77"}, + {file = "fonttools-4.51.0-cp310-cp310-win_amd64.whl", hash = "sha256:c7e91abdfae1b5c9e3a543f48ce96013f9a08c6c9668f1e6be0beabf0a569c1b"}, + {file = "fonttools-4.51.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a8feca65bab31479d795b0d16c9a9852902e3a3c0630678efb0b2b7941ea9c74"}, + {file = "fonttools-4.51.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8ac27f436e8af7779f0bb4d5425aa3535270494d3bc5459ed27de3f03151e4c2"}, + {file = "fonttools-4.51.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e19bd9e9964a09cd2433a4b100ca7f34e34731e0758e13ba9a1ed6e5468cc0f"}, + {file = "fonttools-4.51.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2b92381f37b39ba2fc98c3a45a9d6383bfc9916a87d66ccb6553f7bdd129097"}, + {file = "fonttools-4.51.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:5f6bc991d1610f5c3bbe997b0233cbc234b8e82fa99fc0b2932dc1ca5e5afec0"}, + {file = "fonttools-4.51.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9696fe9f3f0c32e9a321d5268208a7cc9205a52f99b89479d1b035ed54c923f1"}, + {file = "fonttools-4.51.0-cp311-cp311-win32.whl", hash = "sha256:3bee3f3bd9fa1d5ee616ccfd13b27ca605c2b4270e45715bd2883e9504735034"}, + {file = "fonttools-4.51.0-cp311-cp311-win_amd64.whl", hash = "sha256:0f08c901d3866a8905363619e3741c33f0a83a680d92a9f0e575985c2634fcc1"}, + {file = "fonttools-4.51.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:4060acc2bfa2d8e98117828a238889f13b6f69d59f4f2d5857eece5277b829ba"}, + {file = "fonttools-4.51.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:1250e818b5f8a679ad79660855528120a8f0288f8f30ec88b83db51515411fcc"}, + {file = "fonttools-4.51.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76f1777d8b3386479ffb4a282e74318e730014d86ce60f016908d9801af9ca2a"}, + {file = "fonttools-4.51.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b5ad456813d93b9c4b7ee55302208db2b45324315129d85275c01f5cb7e61a2"}, + {file = "fonttools-4.51.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:68b3fb7775a923be73e739f92f7e8a72725fd333eab24834041365d2278c3671"}, + {file = "fonttools-4.51.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8e2f1a4499e3b5ee82c19b5ee57f0294673125c65b0a1ff3764ea1f9db2f9ef5"}, + {file = "fonttools-4.51.0-cp312-cp312-win32.whl", hash = "sha256:278e50f6b003c6aed19bae2242b364e575bcb16304b53f2b64f6551b9c000e15"}, + {file = "fonttools-4.51.0-cp312-cp312-win_amd64.whl", hash = "sha256:b3c61423f22165541b9403ee39874dcae84cd57a9078b82e1dce8cb06b07fa2e"}, + {file = "fonttools-4.51.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:1621ee57da887c17312acc4b0e7ac30d3a4fb0fec6174b2e3754a74c26bbed1e"}, + {file = "fonttools-4.51.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e9d9298be7a05bb4801f558522adbe2feea1b0b103d5294ebf24a92dd49b78e5"}, + {file = "fonttools-4.51.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee1af4be1c5afe4c96ca23badd368d8dc75f611887fb0c0dac9f71ee5d6f110e"}, + {file = "fonttools-4.51.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c18b49adc721a7d0b8dfe7c3130c89b8704baf599fb396396d07d4aa69b824a1"}, + {file = "fonttools-4.51.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:de7c29bdbdd35811f14493ffd2534b88f0ce1b9065316433b22d63ca1cd21f14"}, + {file = "fonttools-4.51.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:cadf4e12a608ef1d13e039864f484c8a968840afa0258b0b843a0556497ea9ed"}, + {file = "fonttools-4.51.0-cp38-cp38-win32.whl", hash = "sha256:aefa011207ed36cd280babfaa8510b8176f1a77261833e895a9d96e57e44802f"}, + {file = "fonttools-4.51.0-cp38-cp38-win_amd64.whl", hash = "sha256:865a58b6e60b0938874af0968cd0553bcd88e0b2cb6e588727117bd099eef836"}, + {file = "fonttools-4.51.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:60a3409c9112aec02d5fb546f557bca6efa773dcb32ac147c6baf5f742e6258b"}, + {file = "fonttools-4.51.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f7e89853d8bea103c8e3514b9f9dc86b5b4120afb4583b57eb10dfa5afbe0936"}, + {file = "fonttools-4.51.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:56fc244f2585d6c00b9bcc59e6593e646cf095a96fe68d62cd4da53dd1287b55"}, + {file = "fonttools-4.51.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d145976194a5242fdd22df18a1b451481a88071feadf251221af110ca8f00ce"}, + {file = "fonttools-4.51.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c5b8cab0c137ca229433570151b5c1fc6af212680b58b15abd797dcdd9dd5051"}, + {file = "fonttools-4.51.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:54dcf21a2f2d06ded676e3c3f9f74b2bafded3a8ff12f0983160b13e9f2fb4a7"}, + {file = "fonttools-4.51.0-cp39-cp39-win32.whl", hash = "sha256:0118ef998a0699a96c7b28457f15546815015a2710a1b23a7bf6c1be60c01636"}, + {file = "fonttools-4.51.0-cp39-cp39-win_amd64.whl", hash = "sha256:599bdb75e220241cedc6faebfafedd7670335d2e29620d207dd0378a4e9ccc5a"}, + {file = "fonttools-4.51.0-py3-none-any.whl", hash = "sha256:15c94eeef6b095831067f72c825eb0e2d48bb4cea0647c1b05c981ecba2bf39f"}, + {file = "fonttools-4.51.0.tar.gz", hash = "sha256:dc0673361331566d7a663d7ce0f6fdcbfbdc1f59c6e3ed1165ad7202ca183c68"}, ] [package.extras] @@ -528,17 +528,18 @@ woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"] [[package]] name = "geopandas" -version = "0.14.3" +version = "0.14.4" description = "Geographic pandas extensions" optional = false python-versions = ">=3.9" files = [ - {file = "geopandas-0.14.3-py3-none-any.whl", hash = "sha256:41b31ad39e21bc9e8c4254f78f8dc4ce3d33d144e22e630a00bb336c83160204"}, - {file = "geopandas-0.14.3.tar.gz", hash = "sha256:748af035d4a068a4ae00cab384acb61d387685c833b0022e0729aa45216b23ac"}, + {file = "geopandas-0.14.4-py3-none-any.whl", hash = "sha256:3bb6473cb59d51e1a7fe2dbc24a1a063fb0ebdeddf3ce08ddbf8c7ddc99689aa"}, + {file = "geopandas-0.14.4.tar.gz", hash = "sha256:56765be9d58e2c743078085db3bd07dc6be7719f0dbe1dfdc1d705cb80be7c25"}, ] [package.dependencies] fiona = ">=1.8.21" +numpy = ">=1.22" packaging = "*" pandas = ">=1.4.0" pyproj = ">=3.3.0" @@ -590,13 +591,13 @@ test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio [[package]] name = "ipython" -version = "8.23.0" +version = "8.24.0" description = "IPython: Productive Interactive Computing" optional = false python-versions = ">=3.10" files = [ - {file = "ipython-8.23.0-py3-none-any.whl", hash = "sha256:07232af52a5ba146dc3372c7bf52a0f890a23edf38d77caef8d53f9cdc2584c1"}, - {file = "ipython-8.23.0.tar.gz", hash = "sha256:7468edaf4f6de3e1b912e57f66c241e6fd3c7099f2ec2136e239e142e800274d"}, + {file = "ipython-8.24.0-py3-none-any.whl", hash = "sha256:d7bf2f6c4314984e3e02393213bab8703cf163ede39672ce5918c51fe253a2a3"}, + {file = "ipython-8.24.0.tar.gz", hash = "sha256:010db3f8a728a578bb641fdd06c063b9fb8e96a9464c63aec6310fbcb5e80501"}, ] [package.dependencies] @@ -610,7 +611,7 @@ prompt-toolkit = ">=3.0.41,<3.1.0" pygments = ">=2.4.0" stack-data = "*" traitlets = ">=5.13.0" -typing-extensions = {version = "*", markers = "python_version < \"3.12\""} +typing-extensions = {version = ">=4.6", markers = "python_version < \"3.12\""} [package.extras] all = ["ipython[black,doc,kernel,matplotlib,nbconvert,nbformat,notebook,parallel,qtconsole]", "ipython[test,test-extra]"] @@ -623,7 +624,7 @@ nbformat = ["nbformat"] notebook = ["ipywidgets", "notebook"] parallel = ["ipyparallel"] qtconsole = ["qtconsole"] -test = ["pickleshare", "pytest (<8)", "pytest-asyncio (<0.22)", "testpath"] +test = ["pickleshare", "pytest", "pytest-asyncio (<0.22)", "testpath"] test-extra = ["curio", "ipython[test]", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.23)", "pandas", "trio"] [[package]] @@ -647,13 +648,13 @@ testing = ["Django", "attrs", "colorama", "docopt", "pytest (<7.0.0)"] [[package]] name = "joblib" -version = "1.3.2" +version = "1.4.2" description = "Lightweight pipelining with Python functions" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "joblib-1.3.2-py3-none-any.whl", hash = "sha256:ef4331c65f239985f3f2220ecc87db222f08fd22097a3dd5698f693875f8cbb9"}, - {file = "joblib-1.3.2.tar.gz", hash = "sha256:92f865e621e17784e7955080b6d042489e3b8e294949cc44c6eac304f59772b1"}, + {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"}, + {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, ] [[package]] @@ -813,39 +814,39 @@ files = [ [[package]] name = "matplotlib" -version = "3.8.3" +version = "3.8.4" description = "Python plotting package" optional = false python-versions = ">=3.9" files = [ - {file = "matplotlib-3.8.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:cf60138ccc8004f117ab2a2bad513cc4d122e55864b4fe7adf4db20ca68a078f"}, - {file = "matplotlib-3.8.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5f557156f7116be3340cdeef7f128fa99b0d5d287d5f41a16e169819dcf22357"}, - {file = "matplotlib-3.8.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f386cf162b059809ecfac3bcc491a9ea17da69fa35c8ded8ad154cd4b933d5ec"}, - {file = "matplotlib-3.8.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3c5f96f57b0369c288bf6f9b5274ba45787f7e0589a34d24bdbaf6d3344632f"}, - {file = "matplotlib-3.8.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:83e0f72e2c116ca7e571c57aa29b0fe697d4c6425c4e87c6e994159e0c008635"}, - {file = "matplotlib-3.8.3-cp310-cp310-win_amd64.whl", hash = "sha256:1c5c8290074ba31a41db1dc332dc2b62def469ff33766cbe325d32a3ee291aea"}, - {file = "matplotlib-3.8.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:5184e07c7e1d6d1481862ee361905b7059f7fe065fc837f7c3dc11eeb3f2f900"}, - {file = "matplotlib-3.8.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d7e7e0993d0758933b1a241a432b42c2db22dfa37d4108342ab4afb9557cbe3e"}, - {file = "matplotlib-3.8.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:04b36ad07eac9740fc76c2aa16edf94e50b297d6eb4c081e3add863de4bb19a7"}, - {file = "matplotlib-3.8.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c42dae72a62f14982f1474f7e5c9959fc4bc70c9de11cc5244c6e766200ba65"}, - {file = "matplotlib-3.8.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:bf5932eee0d428192c40b7eac1399d608f5d995f975cdb9d1e6b48539a5ad8d0"}, - {file = "matplotlib-3.8.3-cp311-cp311-win_amd64.whl", hash = "sha256:40321634e3a05ed02abf7c7b47a50be50b53ef3eaa3a573847431a545585b407"}, - {file = "matplotlib-3.8.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:09074f8057917d17ab52c242fdf4916f30e99959c1908958b1fc6032e2d0f6d4"}, - {file = "matplotlib-3.8.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5745f6d0fb5acfabbb2790318db03809a253096e98c91b9a31969df28ee604aa"}, - {file = "matplotlib-3.8.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b97653d869a71721b639714b42d87cda4cfee0ee74b47c569e4874c7590c55c5"}, - {file = "matplotlib-3.8.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:242489efdb75b690c9c2e70bb5c6550727058c8a614e4c7716f363c27e10bba1"}, - {file = "matplotlib-3.8.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:83c0653c64b73926730bd9ea14aa0f50f202ba187c307a881673bad4985967b7"}, - {file = "matplotlib-3.8.3-cp312-cp312-win_amd64.whl", hash = "sha256:ef6c1025a570354297d6c15f7d0f296d95f88bd3850066b7f1e7b4f2f4c13a39"}, - {file = "matplotlib-3.8.3-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:c4af3f7317f8a1009bbb2d0bf23dfaba859eb7dd4ccbd604eba146dccaaaf0a4"}, - {file = "matplotlib-3.8.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4c6e00a65d017d26009bac6808f637b75ceade3e1ff91a138576f6b3065eeeba"}, - {file = "matplotlib-3.8.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7b49ab49a3bea17802df6872f8d44f664ba8f9be0632a60c99b20b6db2165b7"}, - {file = "matplotlib-3.8.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6728dde0a3997396b053602dbd907a9bd64ec7d5cf99e728b404083698d3ca01"}, - {file = "matplotlib-3.8.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:813925d08fb86aba139f2d31864928d67511f64e5945ca909ad5bc09a96189bb"}, - {file = "matplotlib-3.8.3-cp39-cp39-win_amd64.whl", hash = "sha256:cd3a0c2be76f4e7be03d34a14d49ded6acf22ef61f88da600a18a5cd8b3c5f3c"}, - {file = "matplotlib-3.8.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:fa93695d5c08544f4a0dfd0965f378e7afc410d8672816aff1e81be1f45dbf2e"}, - {file = "matplotlib-3.8.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9764df0e8778f06414b9d281a75235c1e85071f64bb5d71564b97c1306a2afc"}, - {file = "matplotlib-3.8.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:5e431a09e6fab4012b01fc155db0ce6dccacdbabe8198197f523a4ef4805eb26"}, - {file = "matplotlib-3.8.3.tar.gz", hash = "sha256:7b416239e9ae38be54b028abbf9048aff5054a9aba5416bef0bd17f9162ce161"}, + {file = "matplotlib-3.8.4-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:abc9d838f93583650c35eca41cfcec65b2e7cb50fd486da6f0c49b5e1ed23014"}, + {file = "matplotlib-3.8.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f65c9f002d281a6e904976007b2d46a1ee2bcea3a68a8c12dda24709ddc9106"}, + {file = "matplotlib-3.8.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce1edd9f5383b504dbc26eeea404ed0a00656c526638129028b758fd43fc5f10"}, + {file = "matplotlib-3.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ecd79298550cba13a43c340581a3ec9c707bd895a6a061a78fa2524660482fc0"}, + {file = "matplotlib-3.8.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:90df07db7b599fe7035d2f74ab7e438b656528c68ba6bb59b7dc46af39ee48ef"}, + {file = "matplotlib-3.8.4-cp310-cp310-win_amd64.whl", hash = "sha256:ac24233e8f2939ac4fd2919eed1e9c0871eac8057666070e94cbf0b33dd9c338"}, + {file = "matplotlib-3.8.4-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:72f9322712e4562e792b2961971891b9fbbb0e525011e09ea0d1f416c4645661"}, + {file = "matplotlib-3.8.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:232ce322bfd020a434caaffbd9a95333f7c2491e59cfc014041d95e38ab90d1c"}, + {file = "matplotlib-3.8.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6addbd5b488aedb7f9bc19f91cd87ea476206f45d7116fcfe3d31416702a82fa"}, + {file = "matplotlib-3.8.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc4ccdc64e3039fc303defd119658148f2349239871db72cd74e2eeaa9b80b71"}, + {file = "matplotlib-3.8.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:b7a2a253d3b36d90c8993b4620183b55665a429da8357a4f621e78cd48b2b30b"}, + {file = "matplotlib-3.8.4-cp311-cp311-win_amd64.whl", hash = "sha256:8080d5081a86e690d7688ffa542532e87f224c38a6ed71f8fbed34dd1d9fedae"}, + {file = "matplotlib-3.8.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:6485ac1f2e84676cff22e693eaa4fbed50ef5dc37173ce1f023daef4687df616"}, + {file = "matplotlib-3.8.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c89ee9314ef48c72fe92ce55c4e95f2f39d70208f9f1d9db4e64079420d8d732"}, + {file = "matplotlib-3.8.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50bac6e4d77e4262c4340d7a985c30912054745ec99756ce213bfbc3cb3808eb"}, + {file = "matplotlib-3.8.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f51c4c869d4b60d769f7b4406eec39596648d9d70246428745a681c327a8ad30"}, + {file = "matplotlib-3.8.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:b12ba985837e4899b762b81f5b2845bd1a28f4fdd1a126d9ace64e9c4eb2fb25"}, + {file = "matplotlib-3.8.4-cp312-cp312-win_amd64.whl", hash = "sha256:7a6769f58ce51791b4cb8b4d7642489df347697cd3e23d88266aaaee93b41d9a"}, + {file = "matplotlib-3.8.4-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:843cbde2f0946dadd8c5c11c6d91847abd18ec76859dc319362a0964493f0ba6"}, + {file = "matplotlib-3.8.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1c13f041a7178f9780fb61cc3a2b10423d5e125480e4be51beaf62b172413b67"}, + {file = "matplotlib-3.8.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb44f53af0a62dc80bba4443d9b27f2fde6acfdac281d95bc872dc148a6509cc"}, + {file = "matplotlib-3.8.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:606e3b90897554c989b1e38a258c626d46c873523de432b1462f295db13de6f9"}, + {file = "matplotlib-3.8.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9bb0189011785ea794ee827b68777db3ca3f93f3e339ea4d920315a0e5a78d54"}, + {file = "matplotlib-3.8.4-cp39-cp39-win_amd64.whl", hash = "sha256:6209e5c9aaccc056e63b547a8152661324404dd92340a6e479b3a7f24b42a5d0"}, + {file = "matplotlib-3.8.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c7064120a59ce6f64103c9cefba8ffe6fba87f2c61d67c401186423c9a20fd35"}, + {file = "matplotlib-3.8.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0e47eda4eb2614300fc7bb4657fced3e83d6334d03da2173b09e447418d499f"}, + {file = "matplotlib-3.8.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:493e9f6aa5819156b58fce42b296ea31969f2aab71c5b680b4ea7a3cb5c07d94"}, + {file = "matplotlib-3.8.4.tar.gz", hash = "sha256:8aac397d5e9ec158960e31c381c5ffc52ddd52bd9a47717e2a694038167dffea"}, ] [package.dependencies] @@ -853,7 +854,7 @@ contourpy = ">=1.0.1" cycler = ">=0.10" fonttools = ">=4.22.0" kiwisolver = ">=1.3.1" -numpy = ">=1.21,<2" +numpy = ">=1.21" packaging = ">=20.0" pillow = ">=8" pyparsing = ">=2.3.1" @@ -861,13 +862,13 @@ python-dateutil = ">=2.7" [[package]] name = "matplotlib-inline" -version = "0.1.6" +version = "0.1.7" description = "Inline Matplotlib backend for Jupyter" optional = false -python-versions = ">=3.5" +python-versions = ">=3.8" files = [ - {file = "matplotlib-inline-0.1.6.tar.gz", hash = "sha256:f887e5f10ba98e8d2b150ddcf4702c1e5f8b3a20005eb0f74bfdbd360ee6f304"}, - {file = "matplotlib_inline-0.1.6-py3-none-any.whl", hash = "sha256:f1f41aab5328aa5aaea9b16d083b128102f8712542f819fe7e6a420ff581b311"}, + {file = "matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca"}, + {file = "matplotlib_inline-0.1.7.tar.gz", hash = "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90"}, ] [package.dependencies] @@ -931,58 +932,58 @@ files = [ [[package]] name = "packaging" -version = "23.2" +version = "24.0" description = "Core utilities for Python packages" optional = false python-versions = ">=3.7" files = [ - {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, - {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, + {file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"}, + {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"}, ] [[package]] name = "pandas" -version = "2.2.0" +version = "2.2.2" description = "Powerful data structures for data analysis, time series, and statistics" optional = false python-versions = ">=3.9" files = [ - {file = "pandas-2.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8108ee1712bb4fa2c16981fba7e68b3f6ea330277f5ca34fa8d557e986a11670"}, - {file = "pandas-2.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:736da9ad4033aeab51d067fc3bd69a0ba36f5a60f66a527b3d72e2030e63280a"}, - {file = "pandas-2.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38e0b4fc3ddceb56ec8a287313bc22abe17ab0eb184069f08fc6a9352a769b18"}, - {file = "pandas-2.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20404d2adefe92aed3b38da41d0847a143a09be982a31b85bc7dd565bdba0f4e"}, - {file = "pandas-2.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7ea3ee3f125032bfcade3a4cf85131ed064b4f8dd23e5ce6fa16473e48ebcaf5"}, - {file = "pandas-2.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f9670b3ac00a387620489dfc1bca66db47a787f4e55911f1293063a78b108df1"}, - {file = "pandas-2.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:5a946f210383c7e6d16312d30b238fd508d80d927014f3b33fb5b15c2f895430"}, - {file = "pandas-2.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a1b438fa26b208005c997e78672f1aa8138f67002e833312e6230f3e57fa87d5"}, - {file = "pandas-2.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8ce2fbc8d9bf303ce54a476116165220a1fedf15985b09656b4b4275300e920b"}, - {file = "pandas-2.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2707514a7bec41a4ab81f2ccce8b382961a29fbe9492eab1305bb075b2b1ff4f"}, - {file = "pandas-2.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85793cbdc2d5bc32620dc8ffa715423f0c680dacacf55056ba13454a5be5de88"}, - {file = "pandas-2.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:cfd6c2491dc821b10c716ad6776e7ab311f7df5d16038d0b7458bc0b67dc10f3"}, - {file = "pandas-2.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a146b9dcacc3123aa2b399df1a284de5f46287a4ab4fbfc237eac98a92ebcb71"}, - {file = "pandas-2.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:fbc1b53c0e1fdf16388c33c3cca160f798d38aea2978004dd3f4d3dec56454c9"}, - {file = "pandas-2.2.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a41d06f308a024981dcaa6c41f2f2be46a6b186b902c94c2674e8cb5c42985bc"}, - {file = "pandas-2.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:159205c99d7a5ce89ecfc37cb08ed179de7783737cea403b295b5eda8e9c56d1"}, - {file = "pandas-2.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb1e1f3861ea9132b32f2133788f3b14911b68102d562715d71bd0013bc45440"}, - {file = "pandas-2.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:761cb99b42a69005dec2b08854fb1d4888fdf7b05db23a8c5a099e4b886a2106"}, - {file = "pandas-2.2.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a20628faaf444da122b2a64b1e5360cde100ee6283ae8effa0d8745153809a2e"}, - {file = "pandas-2.2.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f5be5d03ea2073627e7111f61b9f1f0d9625dc3c4d8dda72cc827b0c58a1d042"}, - {file = "pandas-2.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:a626795722d893ed6aacb64d2401d017ddc8a2341b49e0384ab9bf7112bdec30"}, - {file = "pandas-2.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9f66419d4a41132eb7e9a73dcec9486cf5019f52d90dd35547af11bc58f8637d"}, - {file = "pandas-2.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:57abcaeda83fb80d447f28ab0cc7b32b13978f6f733875ebd1ed14f8fbc0f4ab"}, - {file = "pandas-2.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e60f1f7dba3c2d5ca159e18c46a34e7ca7247a73b5dd1a22b6d59707ed6b899a"}, - {file = "pandas-2.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb61dc8567b798b969bcc1fc964788f5a68214d333cade8319c7ab33e2b5d88a"}, - {file = "pandas-2.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:52826b5f4ed658fa2b729264d63f6732b8b29949c7fd234510d57c61dbeadfcd"}, - {file = "pandas-2.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bde2bc699dbd80d7bc7f9cab1e23a95c4375de615860ca089f34e7c64f4a8de7"}, - {file = "pandas-2.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:3de918a754bbf2da2381e8a3dcc45eede8cd7775b047b923f9006d5f876802ae"}, - {file = "pandas-2.2.0.tar.gz", hash = "sha256:30b83f7c3eb217fb4d1b494a57a2fda5444f17834f5df2de6b2ffff68dc3c8e2"}, + {file = "pandas-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce"}, + {file = "pandas-2.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238"}, + {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4abfe0be0d7221be4f12552995e58723c7422c80a659da13ca382697de830c08"}, + {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8635c16bf3d99040fdf3ca3db669a7250ddf49c55dc4aa8fe0ae0fa8d6dcc1f0"}, + {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:40ae1dffb3967a52203105a077415a86044a2bea011b5f321c6aa64b379a3f51"}, + {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8e5a0b00e1e56a842f922e7fae8ae4077aee4af0acb5ae3622bd4b4c30aedf99"}, + {file = "pandas-2.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:ddf818e4e6c7c6f4f7c8a12709696d193976b591cc7dc50588d3d1a6b5dc8772"}, + {file = "pandas-2.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:696039430f7a562b74fa45f540aca068ea85fa34c244d0deee539cb6d70aa288"}, + {file = "pandas-2.2.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8e90497254aacacbc4ea6ae5e7a8cd75629d6ad2b30025a4a8b09aa4faf55151"}, + {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58b84b91b0b9f4bafac2a0ac55002280c094dfc6402402332c0913a59654ab2b"}, + {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d2123dc9ad6a814bcdea0f099885276b31b24f7edf40f6cdbc0912672e22eee"}, + {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:2925720037f06e89af896c70bca73459d7e6a4be96f9de79e2d440bd499fe0db"}, + {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0cace394b6ea70c01ca1595f839cf193df35d1575986e484ad35c4aeae7266c1"}, + {file = "pandas-2.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:873d13d177501a28b2756375d59816c365e42ed8417b41665f346289adc68d24"}, + {file = "pandas-2.2.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9dfde2a0ddef507a631dc9dc4af6a9489d5e2e740e226ad426a05cabfbd7c8ef"}, + {file = "pandas-2.2.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e9b79011ff7a0f4b1d6da6a61aa1aa604fb312d6647de5bad20013682d1429ce"}, + {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cb51fe389360f3b5a4d57dbd2848a5f033350336ca3b340d1c53a1fad33bcad"}, + {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eee3a87076c0756de40b05c5e9a6069c035ba43e8dd71c379e68cab2c20f16ad"}, + {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3e374f59e440d4ab45ca2fffde54b81ac3834cf5ae2cdfa69c90bc03bde04d76"}, + {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:43498c0bdb43d55cb162cdc8c06fac328ccb5d2eabe3cadeb3529ae6f0517c32"}, + {file = "pandas-2.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:d187d355ecec3629624fccb01d104da7d7f391db0311145817525281e2804d23"}, + {file = "pandas-2.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0ca6377b8fca51815f382bd0b697a0814c8bda55115678cbc94c30aacbb6eff2"}, + {file = "pandas-2.2.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9057e6aa78a584bc93a13f0a9bf7e753a5e9770a30b4d758b8d5f2a62a9433cd"}, + {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:001910ad31abc7bf06f49dcc903755d2f7f3a9186c0c040b827e522e9cef0863"}, + {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921"}, + {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a77e9d1c386196879aa5eb712e77461aaee433e54c68cf253053a73b7e49c33a"}, + {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92fd6b027924a7e178ac202cfbe25e53368db90d56872d20ffae94b96c7acc57"}, + {file = "pandas-2.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:640cef9aa381b60e296db324337a554aeeb883ead99dc8f6c18e81a93942f5f4"}, + {file = "pandas-2.2.2.tar.gz", hash = "sha256:9e79019aba43cb4fda9e4d983f8e88ca0373adbb697ae9c6c43093218de28b54"}, ] [package.dependencies] numpy = [ - {version = ">=1.22.4,<2", markers = "python_version < \"3.11\""}, - {version = ">=1.23.2,<2", markers = "python_version == \"3.11\""}, - {version = ">=1.26.0,<2", markers = "python_version >= \"3.12\""}, + {version = ">=1.22.4", markers = "python_version < \"3.11\""}, + {version = ">=1.23.2", markers = "python_version == \"3.11\""}, + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -1007,6 +1008,7 @@ parquet = ["pyarrow (>=10.0.1)"] performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"] plot = ["matplotlib (>=3.6.3)"] postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"] +pyarrow = ["pyarrow (>=10.0.1)"] spss = ["pyreadstat (>=1.2.0)"] sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"] test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] @@ -1014,18 +1016,18 @@ xml = ["lxml (>=4.9.2)"] [[package]] name = "parso" -version = "0.8.3" +version = "0.8.4" description = "A Python Parser" optional = false python-versions = ">=3.6" files = [ - {file = "parso-0.8.3-py2.py3-none-any.whl", hash = "sha256:c001d4636cd3aecdaf33cbb40aebb59b094be2a74c556778ef5576c175e19e75"}, - {file = "parso-0.8.3.tar.gz", hash = "sha256:8c07be290bb59f03588915921e29e8a50002acaf2cdc5fa0e0114f91709fafa0"}, + {file = "parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18"}, + {file = "parso-0.8.4.tar.gz", hash = "sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d"}, ] [package.extras] -qa = ["flake8 (==3.8.3)", "mypy (==0.782)"] -testing = ["docopt", "pytest (<6.0.0)"] +qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"] +testing = ["docopt", "pytest"] [[package]] name = "pexpect" @@ -1129,28 +1131,29 @@ xmp = ["defusedxml"] [[package]] name = "platformdirs" -version = "4.2.0" -description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +version = "4.2.1" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false python-versions = ">=3.8" files = [ - {file = "platformdirs-4.2.0-py3-none-any.whl", hash = "sha256:0614df2a2f37e1a662acbd8e2b25b92ccf8632929bc6d43467e17fe89c75e068"}, - {file = "platformdirs-4.2.0.tar.gz", hash = "sha256:ef0cc731df711022c174543cb70a9b5bd22e5a9337c8624ef2c2ceb8ddad8768"}, + {file = "platformdirs-4.2.1-py3-none-any.whl", hash = "sha256:17d5a1161b3fd67b390023cb2d3b026bbd40abde6fdb052dfbd3a29c3ba22ee1"}, + {file = "platformdirs-4.2.1.tar.gz", hash = "sha256:031cd18d4ec63ec53e82dceaac0417d218a6863f7745dfcc9efe7793b7039bdf"}, ] [package.extras] docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"] +type = ["mypy (>=1.8)"] [[package]] name = "pluggy" -version = "1.4.0" +version = "1.5.0" description = "plugin and hook calling mechanisms for python" optional = false python-versions = ">=3.8" files = [ - {file = "pluggy-1.4.0-py3-none-any.whl", hash = "sha256:7db9f7b503d67d1c5b95f59773ebb58a8c1c288129a88665838012cfb07b8981"}, - {file = "pluggy-1.4.0.tar.gz", hash = "sha256:8c85c2876142a764e5b7548e7d9a0e0ddb46f5185161049a79b7e974454223be"}, + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, ] [package.extras] @@ -1159,25 +1162,27 @@ testing = ["pytest", "pytest-benchmark"] [[package]] name = "polars" -version = "0.20.8" +version = "0.20.23" description = "Blazingly fast DataFrame library" optional = false python-versions = ">=3.8" files = [ - {file = "polars-0.20.8-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:73f1d369aeddda5f11411b6497f697f2471bbe6ae55fd936677a10a40995c83c"}, - {file = "polars-0.20.8-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:dc3a446fe606095b3ad6df3cf3dddd8ad54be7745f255fedb29f8bdf71a60760"}, - {file = "polars-0.20.8-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d3d58ebc7a24d26930535d06b8772e125038a87a6abab4c5dfd87ea19bba61f3"}, - {file = "polars-0.20.8-cp38-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:5b733816ac61156c12bd0edd6d7c1a5e63859830ce0e425b6450b335024f0cd5"}, - {file = "polars-0.20.8-cp38-abi3-win_amd64.whl", hash = "sha256:2300f48ff7120eefe2cac2113990d0b0b5beedad93266b9fedfc8df133e7b13b"}, - {file = "polars-0.20.8.tar.gz", hash = "sha256:a34f6ce1c5469872b291aaf90467e632e81f92dec6c2e18136bc40cd92877411"}, + {file = "polars-0.20.23-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:9b1001a29e785126f0e189786223c45bf9c7696ed3d221a61dd629ff5e8229d3"}, + {file = "polars-0.20.23-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:7de724f9b7f94c76008023b1ef9319e7dccada97e98d48d548be487be8dc2ea6"}, + {file = "polars-0.20.23-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f109512a456f9f8bdc20e5b19e5794471d4a1a507f99daf1afe1b41eb3227c41"}, + {file = "polars-0.20.23-cp38-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:bfd10ffabafecba3bb836f9e267405abc8259da5fb8d5e74096d46eea802a295"}, + {file = "polars-0.20.23-cp38-abi3-win_amd64.whl", hash = "sha256:de69adcfe4a92821f28c0223b801e56a36682a7aac32df8e860e6df7678f4c8a"}, + {file = "polars-0.20.23.tar.gz", hash = "sha256:4503c446c7771d5b52d5bff4f2dbf2e999a87a1cc3c89931db255cff43218436"}, ] [package.extras] -adbc = ["adbc_driver_sqlite"] -all = ["polars[adbc,cloudpickle,connectorx,deltalake,fsspec,gevent,numpy,pandas,plot,pyarrow,pydantic,pyiceberg,sqlalchemy,timezone,xlsx2csv,xlsxwriter]"] +adbc = ["adbc-driver-manager", "adbc-driver-sqlite"] +all = ["polars[adbc,async,cloudpickle,connectorx,deltalake,fastexcel,fsspec,gevent,numpy,pandas,plot,pyarrow,pydantic,pyiceberg,sqlalchemy,timezone,xlsx2csv,xlsxwriter]"] +async = ["nest-asyncio"] cloudpickle = ["cloudpickle"] connectorx = ["connectorx (>=0.3.2)"] -deltalake = ["deltalake (>=0.14.0)"] +deltalake = ["deltalake (>=0.15.0)"] +fastexcel = ["fastexcel (>=0.9)"] fsspec = ["fsspec"] gevent = ["gevent"] matplotlib = ["matplotlib"] @@ -1190,7 +1195,7 @@ pydantic = ["pydantic"] pyiceberg = ["pyiceberg (>=0.5.0)"] pyxlsb = ["pyxlsb (>=1.0)"] sqlalchemy = ["pandas", "sqlalchemy"] -timezone = ["backports.zoneinfo", "tzdata"] +timezone = ["backports-zoneinfo", "tzdata"] xlsx2csv = ["xlsx2csv (>=0.8.0)"] xlsxwriter = ["xlsxwriter"] @@ -1210,22 +1215,22 @@ wcwidth = "*" [[package]] name = "protobuf" -version = "4.25.2" +version = "4.25.3" description = "" optional = false python-versions = ">=3.8" files = [ - {file = "protobuf-4.25.2-cp310-abi3-win32.whl", hash = "sha256:b50c949608682b12efb0b2717f53256f03636af5f60ac0c1d900df6213910fd6"}, - {file = "protobuf-4.25.2-cp310-abi3-win_amd64.whl", hash = "sha256:8f62574857ee1de9f770baf04dde4165e30b15ad97ba03ceac65f760ff018ac9"}, - {file = "protobuf-4.25.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:2db9f8fa64fbdcdc93767d3cf81e0f2aef176284071507e3ede160811502fd3d"}, - {file = "protobuf-4.25.2-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:10894a2885b7175d3984f2be8d9850712c57d5e7587a2410720af8be56cdaf62"}, - {file = "protobuf-4.25.2-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:fc381d1dd0516343f1440019cedf08a7405f791cd49eef4ae1ea06520bc1c020"}, - {file = "protobuf-4.25.2-cp38-cp38-win32.whl", hash = "sha256:33a1aeef4b1927431d1be780e87b641e322b88d654203a9e9d93f218ee359e61"}, - {file = "protobuf-4.25.2-cp38-cp38-win_amd64.whl", hash = "sha256:47f3de503fe7c1245f6f03bea7e8d3ec11c6c4a2ea9ef910e3221c8a15516d62"}, - {file = "protobuf-4.25.2-cp39-cp39-win32.whl", hash = "sha256:5e5c933b4c30a988b52e0b7c02641760a5ba046edc5e43d3b94a74c9fc57c1b3"}, - {file = "protobuf-4.25.2-cp39-cp39-win_amd64.whl", hash = "sha256:d66a769b8d687df9024f2985d5137a337f957a0916cf5464d1513eee96a63ff0"}, - {file = "protobuf-4.25.2-py3-none-any.whl", hash = "sha256:a8b7a98d4ce823303145bf3c1a8bdb0f2f4642a414b196f04ad9853ed0c8f830"}, - {file = "protobuf-4.25.2.tar.gz", hash = "sha256:fe599e175cb347efc8ee524bcd4b902d11f7262c0e569ececcb89995c15f0a5e"}, + {file = "protobuf-4.25.3-cp310-abi3-win32.whl", hash = "sha256:d4198877797a83cbfe9bffa3803602bbe1625dc30d8a097365dbc762e5790faa"}, + {file = "protobuf-4.25.3-cp310-abi3-win_amd64.whl", hash = "sha256:209ba4cc916bab46f64e56b85b090607a676f66b473e6b762e6f1d9d591eb2e8"}, + {file = "protobuf-4.25.3-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:f1279ab38ecbfae7e456a108c5c0681e4956d5b1090027c1de0f934dfdb4b35c"}, + {file = "protobuf-4.25.3-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:e7cb0ae90dd83727f0c0718634ed56837bfeeee29a5f82a7514c03ee1364c019"}, + {file = "protobuf-4.25.3-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:7c8daa26095f82482307bc717364e7c13f4f1c99659be82890dcfc215194554d"}, + {file = "protobuf-4.25.3-cp38-cp38-win32.whl", hash = "sha256:f4f118245c4a087776e0a8408be33cf09f6c547442c00395fbfb116fac2f8ac2"}, + {file = "protobuf-4.25.3-cp38-cp38-win_amd64.whl", hash = "sha256:c053062984e61144385022e53678fbded7aea14ebb3e0305ae3592fb219ccfa4"}, + {file = "protobuf-4.25.3-cp39-cp39-win32.whl", hash = "sha256:19b270aeaa0099f16d3ca02628546b8baefe2955bbe23224aaf856134eccf1e4"}, + {file = "protobuf-4.25.3-cp39-cp39-win_amd64.whl", hash = "sha256:e3c97a1555fd6388f857770ff8b9703083de6bf1f9274a002a332d65fbb56c8c"}, + {file = "protobuf-4.25.3-py3-none-any.whl", hash = "sha256:f0700d54bcf45424477e46a9f0944155b46fb0639d69728739c0e47bab83f2b9"}, + {file = "protobuf-4.25.3.tar.gz", hash = "sha256:25b5d0b42fd000320bd7830b349e3b696435f3b329810427a6bcce6a5492cc5c"}, ] [[package]] @@ -1283,44 +1288,51 @@ tests = ["pytest"] [[package]] name = "pyarrow" -version = "13.0.0" +version = "15.0.2" description = "Python library for Apache Arrow" optional = false python-versions = ">=3.8" files = [ - {file = "pyarrow-13.0.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:1afcc2c33f31f6fb25c92d50a86b7a9f076d38acbcb6f9e74349636109550148"}, - {file = "pyarrow-13.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:70fa38cdc66b2fc1349a082987f2b499d51d072faaa6b600f71931150de2e0e3"}, - {file = "pyarrow-13.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd57b13a6466822498238877892a9b287b0a58c2e81e4bdb0b596dbb151cbb73"}, - {file = "pyarrow-13.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8ce69f7bf01de2e2764e14df45b8404fc6f1a5ed9871e8e08a12169f87b7a26"}, - {file = "pyarrow-13.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:588f0d2da6cf1b1680974d63be09a6530fd1bd825dc87f76e162404779a157dc"}, - {file = "pyarrow-13.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:6241afd72b628787b4abea39e238e3ff9f34165273fad306c7acf780dd850956"}, - {file = "pyarrow-13.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:fda7857e35993673fcda603c07d43889fca60a5b254052a462653f8656c64f44"}, - {file = "pyarrow-13.0.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:aac0ae0146a9bfa5e12d87dda89d9ef7c57a96210b899459fc2f785303dcbb67"}, - {file = "pyarrow-13.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d7759994217c86c161c6a8060509cfdf782b952163569606bb373828afdd82e8"}, - {file = "pyarrow-13.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:868a073fd0ff6468ae7d869b5fc1f54de5c4255b37f44fb890385eb68b68f95d"}, - {file = "pyarrow-13.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51be67e29f3cfcde263a113c28e96aa04362ed8229cb7c6e5f5c719003659d33"}, - {file = "pyarrow-13.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:d1b4e7176443d12610874bb84d0060bf080f000ea9ed7c84b2801df851320295"}, - {file = "pyarrow-13.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:69b6f9a089d116a82c3ed819eea8fe67dae6105f0d81eaf0fdd5e60d0c6e0944"}, - {file = "pyarrow-13.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:ab1268db81aeb241200e321e220e7cd769762f386f92f61b898352dd27e402ce"}, - {file = "pyarrow-13.0.0-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:ee7490f0f3f16a6c38f8c680949551053c8194e68de5046e6c288e396dccee80"}, - {file = "pyarrow-13.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e3ad79455c197a36eefbd90ad4aa832bece7f830a64396c15c61a0985e337287"}, - {file = "pyarrow-13.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68fcd2dc1b7d9310b29a15949cdd0cb9bc34b6de767aff979ebf546020bf0ba0"}, - {file = "pyarrow-13.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc6fd330fd574c51d10638e63c0d00ab456498fc804c9d01f2a61b9264f2c5b2"}, - {file = "pyarrow-13.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:e66442e084979a97bb66939e18f7b8709e4ac5f887e636aba29486ffbf373763"}, - {file = "pyarrow-13.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:0f6eff839a9e40e9c5610d3ff8c5bdd2f10303408312caf4c8003285d0b49565"}, - {file = "pyarrow-13.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b30a27f1cddf5c6efcb67e598d7823a1e253d743d92ac32ec1eb4b6a1417867"}, - {file = "pyarrow-13.0.0-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:09552dad5cf3de2dc0aba1c7c4b470754c69bd821f5faafc3d774bedc3b04bb7"}, - {file = "pyarrow-13.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3896ae6c205d73ad192d2fc1489cd0edfab9f12867c85b4c277af4d37383c18c"}, - {file = "pyarrow-13.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6647444b21cb5e68b593b970b2a9a07748dd74ea457c7dadaa15fd469c48ada1"}, - {file = "pyarrow-13.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47663efc9c395e31d09c6aacfa860f4473815ad6804311c5433f7085415d62a7"}, - {file = "pyarrow-13.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:b9ba6b6d34bd2563345488cf444510588ea42ad5613df3b3509f48eb80250afd"}, - {file = "pyarrow-13.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:d00d374a5625beeb448a7fa23060df79adb596074beb3ddc1838adb647b6ef09"}, - {file = "pyarrow-13.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:c51afd87c35c8331b56f796eff954b9c7f8d4b7fef5903daf4e05fcf017d23a8"}, - {file = "pyarrow-13.0.0.tar.gz", hash = "sha256:83333726e83ed44b0ac94d8d7a21bbdee4a05029c3b1e8db58a863eec8fd8a33"}, + {file = "pyarrow-15.0.2-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:88b340f0a1d05b5ccc3d2d986279045655b1fe8e41aba6ca44ea28da0d1455d8"}, + {file = "pyarrow-15.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:eaa8f96cecf32da508e6c7f69bb8401f03745c050c1dd42ec2596f2e98deecac"}, + {file = "pyarrow-15.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23c6753ed4f6adb8461e7c383e418391b8d8453c5d67e17f416c3a5d5709afbd"}, + {file = "pyarrow-15.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f639c059035011db8c0497e541a8a45d98a58dbe34dc8fadd0ef128f2cee46e5"}, + {file = "pyarrow-15.0.2-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:290e36a59a0993e9a5224ed2fb3e53375770f07379a0ea03ee2fce2e6d30b423"}, + {file = "pyarrow-15.0.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:06c2bb2a98bc792f040bef31ad3e9be6a63d0cb39189227c08a7d955db96816e"}, + {file = "pyarrow-15.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:f7a197f3670606a960ddc12adbe8075cea5f707ad7bf0dffa09637fdbb89f76c"}, + {file = "pyarrow-15.0.2-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:5f8bc839ea36b1f99984c78e06e7a06054693dc2af8920f6fb416b5bca9944e4"}, + {file = "pyarrow-15.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f5e81dfb4e519baa6b4c80410421528c214427e77ca0ea9461eb4097c328fa33"}, + {file = "pyarrow-15.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a4f240852b302a7af4646c8bfe9950c4691a419847001178662a98915fd7ee7"}, + {file = "pyarrow-15.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e7d9cfb5a1e648e172428c7a42b744610956f3b70f524aa3a6c02a448ba853e"}, + {file = "pyarrow-15.0.2-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2d4f905209de70c0eb5b2de6763104d5a9a37430f137678edfb9a675bac9cd98"}, + {file = "pyarrow-15.0.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:90adb99e8ce5f36fbecbbc422e7dcbcbed07d985eed6062e459e23f9e71fd197"}, + {file = "pyarrow-15.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:b116e7fd7889294cbd24eb90cd9bdd3850be3738d61297855a71ac3b8124ee38"}, + {file = "pyarrow-15.0.2-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:25335e6f1f07fdaa026a61c758ee7d19ce824a866b27bba744348fa73bb5a440"}, + {file = "pyarrow-15.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:90f19e976d9c3d8e73c80be84ddbe2f830b6304e4c576349d9360e335cd627fc"}, + {file = "pyarrow-15.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a22366249bf5fd40ddacc4f03cd3160f2d7c247692945afb1899bab8a140ddfb"}, + {file = "pyarrow-15.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c2a335198f886b07e4b5ea16d08ee06557e07db54a8400cc0d03c7f6a22f785f"}, + {file = "pyarrow-15.0.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3e6d459c0c22f0b9c810a3917a1de3ee704b021a5fb8b3bacf968eece6df098f"}, + {file = "pyarrow-15.0.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:033b7cad32198754d93465dcfb71d0ba7cb7cd5c9afd7052cab7214676eec38b"}, + {file = "pyarrow-15.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:29850d050379d6e8b5a693098f4de7fd6a2bea4365bfd073d7c57c57b95041ee"}, + {file = "pyarrow-15.0.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:7167107d7fb6dcadb375b4b691b7e316f4368f39f6f45405a05535d7ad5e5058"}, + {file = "pyarrow-15.0.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e85241b44cc3d365ef950432a1b3bd44ac54626f37b2e3a0cc89c20e45dfd8bf"}, + {file = "pyarrow-15.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:248723e4ed3255fcd73edcecc209744d58a9ca852e4cf3d2577811b6d4b59818"}, + {file = "pyarrow-15.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ff3bdfe6f1b81ca5b73b70a8d482d37a766433823e0c21e22d1d7dde76ca33f"}, + {file = "pyarrow-15.0.2-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:f3d77463dee7e9f284ef42d341689b459a63ff2e75cee2b9302058d0d98fe142"}, + {file = "pyarrow-15.0.2-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:8c1faf2482fb89766e79745670cbca04e7018497d85be9242d5350cba21357e1"}, + {file = "pyarrow-15.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:28f3016958a8e45a1069303a4a4f6a7d4910643fc08adb1e2e4a7ff056272ad3"}, + {file = "pyarrow-15.0.2-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:89722cb64286ab3d4daf168386f6968c126057b8c7ec3ef96302e81d8cdb8ae4"}, + {file = "pyarrow-15.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cd0ba387705044b3ac77b1b317165c0498299b08261d8122c96051024f953cd5"}, + {file = "pyarrow-15.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad2459bf1f22b6a5cdcc27ebfd99307d5526b62d217b984b9f5c974651398832"}, + {file = "pyarrow-15.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58922e4bfece8b02abf7159f1f53a8f4d9f8e08f2d988109126c17c3bb261f22"}, + {file = "pyarrow-15.0.2-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:adccc81d3dc0478ea0b498807b39a8d41628fa9210729b2f718b78cb997c7c91"}, + {file = "pyarrow-15.0.2-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:8bd2baa5fe531571847983f36a30ddbf65261ef23e496862ece83bdceb70420d"}, + {file = "pyarrow-15.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:6669799a1d4ca9da9c7e06ef48368320f5856f36f9a4dd31a11839dda3f6cc8c"}, + {file = "pyarrow-15.0.2.tar.gz", hash = "sha256:9c9bc803cb3b7bfacc1e96ffbfd923601065d9d3f911179d81e72d99fd74a3d9"}, ] [package.dependencies] -numpy = ">=1.16.6" +numpy = ">=1.16.6,<2" [[package]] name = "pycparser" @@ -1403,13 +1415,13 @@ certifi = "*" [[package]] name = "pytest" -version = "7.4.4" +version = "8.2.0" description = "pytest: simple powerful testing with Python" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"}, - {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"}, + {file = "pytest-8.2.0-py3-none-any.whl", hash = "sha256:1733f0620f6cda4095bbf0d9ff8022486e91892245bb9e7d5542c018f612f233"}, + {file = "pytest-8.2.0.tar.gz", hash = "sha256:d507d4482197eac0ba2bae2e9babf0672eb333017bcedaa5fb1a3d42c1174b3f"}, ] [package.dependencies] @@ -1417,21 +1429,21 @@ colorama = {version = "*", markers = "sys_platform == \"win32\""} exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} iniconfig = "*" packaging = "*" -pluggy = ">=0.12,<2.0" -tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} +pluggy = ">=1.5,<2.0" +tomli = {version = ">=1", markers = "python_version < \"3.11\""} [package.extras] -testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] [[package]] name = "pytest-cov" -version = "4.1.0" +version = "5.0.0" description = "Pytest plugin for measuring coverage." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "pytest-cov-4.1.0.tar.gz", hash = "sha256:3904b13dfbfec47f003b8e77fd5b589cd11904a21ddf1ab38a64f204d6a10ef6"}, - {file = "pytest_cov-4.1.0-py3-none-any.whl", hash = "sha256:6ba70b9e97e69fcc3fb45bfeab2d0a138fb65c4d0d6a41ef33983ad114be8c3a"}, + {file = "pytest-cov-5.0.0.tar.gz", hash = "sha256:5837b58e9f6ebd335b0f8060eecce69b662415b16dc503883a02f45dfeb14857"}, + {file = "pytest_cov-5.0.0-py3-none-any.whl", hash = "sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652"}, ] [package.dependencies] @@ -1439,17 +1451,17 @@ coverage = {version = ">=5.2.1", extras = ["toml"]} pytest = ">=4.6" [package.extras] -testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"] +testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"] [[package]] name = "python-dateutil" -version = "2.8.2" +version = "2.9.0.post0" description = "Extensions to the standard Python datetime module" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ - {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, - {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, + {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, + {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, ] [package.dependencies] @@ -1491,104 +1503,99 @@ files = [ [[package]] name = "pyzmq" -version = "25.1.2" +version = "26.0.3" description = "Python bindings for 0MQ" optional = false -python-versions = ">=3.6" +python-versions = ">=3.7" files = [ - {file = "pyzmq-25.1.2-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:e624c789359f1a16f83f35e2c705d07663ff2b4d4479bad35621178d8f0f6ea4"}, - {file = "pyzmq-25.1.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:49151b0efece79f6a79d41a461d78535356136ee70084a1c22532fc6383f4ad0"}, - {file = "pyzmq-25.1.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9a5f194cf730f2b24d6af1f833c14c10f41023da46a7f736f48b6d35061e76e"}, - {file = "pyzmq-25.1.2-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:faf79a302f834d9e8304fafdc11d0d042266667ac45209afa57e5efc998e3872"}, - {file = "pyzmq-25.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f51a7b4ead28d3fca8dda53216314a553b0f7a91ee8fc46a72b402a78c3e43d"}, - {file = "pyzmq-25.1.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:0ddd6d71d4ef17ba5a87becf7ddf01b371eaba553c603477679ae817a8d84d75"}, - {file = "pyzmq-25.1.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:246747b88917e4867e2367b005fc8eefbb4a54b7db363d6c92f89d69abfff4b6"}, - {file = "pyzmq-25.1.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:00c48ae2fd81e2a50c3485de1b9d5c7c57cd85dc8ec55683eac16846e57ac979"}, - {file = "pyzmq-25.1.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5a68d491fc20762b630e5db2191dd07ff89834086740f70e978bb2ef2668be08"}, - {file = "pyzmq-25.1.2-cp310-cp310-win32.whl", hash = "sha256:09dfe949e83087da88c4a76767df04b22304a682d6154de2c572625c62ad6886"}, - {file = "pyzmq-25.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:fa99973d2ed20417744fca0073390ad65ce225b546febb0580358e36aa90dba6"}, - {file = "pyzmq-25.1.2-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:82544e0e2d0c1811482d37eef297020a040c32e0687c1f6fc23a75b75db8062c"}, - {file = "pyzmq-25.1.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:01171fc48542348cd1a360a4b6c3e7d8f46cdcf53a8d40f84db6707a6768acc1"}, - {file = "pyzmq-25.1.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc69c96735ab501419c432110016329bf0dea8898ce16fab97c6d9106dc0b348"}, - {file = "pyzmq-25.1.2-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3e124e6b1dd3dfbeb695435dff0e383256655bb18082e094a8dd1f6293114642"}, - {file = "pyzmq-25.1.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7598d2ba821caa37a0f9d54c25164a4fa351ce019d64d0b44b45540950458840"}, - {file = "pyzmq-25.1.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d1299d7e964c13607efd148ca1f07dcbf27c3ab9e125d1d0ae1d580a1682399d"}, - {file = "pyzmq-25.1.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4e6f689880d5ad87918430957297c975203a082d9a036cc426648fcbedae769b"}, - {file = "pyzmq-25.1.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cc69949484171cc961e6ecd4a8911b9ce7a0d1f738fcae717177c231bf77437b"}, - {file = "pyzmq-25.1.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9880078f683466b7f567b8624bfc16cad65077be046b6e8abb53bed4eeb82dd3"}, - {file = "pyzmq-25.1.2-cp311-cp311-win32.whl", hash = "sha256:4e5837af3e5aaa99a091302df5ee001149baff06ad22b722d34e30df5f0d9097"}, - {file = "pyzmq-25.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:25c2dbb97d38b5ac9fd15586e048ec5eb1e38f3d47fe7d92167b0c77bb3584e9"}, - {file = "pyzmq-25.1.2-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:11e70516688190e9c2db14fcf93c04192b02d457b582a1f6190b154691b4c93a"}, - {file = "pyzmq-25.1.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:313c3794d650d1fccaaab2df942af9f2c01d6217c846177cfcbc693c7410839e"}, - {file = "pyzmq-25.1.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b3cbba2f47062b85fe0ef9de5b987612140a9ba3a9c6d2543c6dec9f7c2ab27"}, - {file = "pyzmq-25.1.2-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fc31baa0c32a2ca660784d5af3b9487e13b61b3032cb01a115fce6588e1bed30"}, - {file = "pyzmq-25.1.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02c9087b109070c5ab0b383079fa1b5f797f8d43e9a66c07a4b8b8bdecfd88ee"}, - {file = "pyzmq-25.1.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:f8429b17cbb746c3e043cb986328da023657e79d5ed258b711c06a70c2ea7537"}, - {file = "pyzmq-25.1.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:5074adeacede5f810b7ef39607ee59d94e948b4fd954495bdb072f8c54558181"}, - {file = "pyzmq-25.1.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:7ae8f354b895cbd85212da245f1a5ad8159e7840e37d78b476bb4f4c3f32a9fe"}, - {file = "pyzmq-25.1.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:b264bf2cc96b5bc43ce0e852be995e400376bd87ceb363822e2cb1964fcdc737"}, - {file = "pyzmq-25.1.2-cp312-cp312-win32.whl", hash = "sha256:02bbc1a87b76e04fd780b45e7f695471ae6de747769e540da909173d50ff8e2d"}, - {file = "pyzmq-25.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:ced111c2e81506abd1dc142e6cd7b68dd53747b3b7ae5edbea4578c5eeff96b7"}, - {file = "pyzmq-25.1.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:7b6d09a8962a91151f0976008eb7b29b433a560fde056ec7a3db9ec8f1075438"}, - {file = "pyzmq-25.1.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:967668420f36878a3c9ecb5ab33c9d0ff8d054f9c0233d995a6d25b0e95e1b6b"}, - {file = "pyzmq-25.1.2-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5edac3f57c7ddaacdb4d40f6ef2f9e299471fc38d112f4bc6d60ab9365445fb0"}, - {file = "pyzmq-25.1.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:0dabfb10ef897f3b7e101cacba1437bd3a5032ee667b7ead32bbcdd1a8422fe7"}, - {file = "pyzmq-25.1.2-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:2c6441e0398c2baacfe5ba30c937d274cfc2dc5b55e82e3749e333aabffde561"}, - {file = "pyzmq-25.1.2-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:16b726c1f6c2e7625706549f9dbe9b06004dfbec30dbed4bf50cbdfc73e5b32a"}, - {file = "pyzmq-25.1.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:a86c2dd76ef71a773e70551a07318b8e52379f58dafa7ae1e0a4be78efd1ff16"}, - {file = "pyzmq-25.1.2-cp36-cp36m-win32.whl", hash = "sha256:359f7f74b5d3c65dae137f33eb2bcfa7ad9ebefd1cab85c935f063f1dbb245cc"}, - {file = "pyzmq-25.1.2-cp36-cp36m-win_amd64.whl", hash = "sha256:55875492f820d0eb3417b51d96fea549cde77893ae3790fd25491c5754ea2f68"}, - {file = "pyzmq-25.1.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b8c8a419dfb02e91b453615c69568442e897aaf77561ee0064d789705ff37a92"}, - {file = "pyzmq-25.1.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8807c87fa893527ae8a524c15fc505d9950d5e856f03dae5921b5e9aa3b8783b"}, - {file = "pyzmq-25.1.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5e319ed7d6b8f5fad9b76daa0a68497bc6f129858ad956331a5835785761e003"}, - {file = "pyzmq-25.1.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:3c53687dde4d9d473c587ae80cc328e5b102b517447456184b485587ebd18b62"}, - {file = "pyzmq-25.1.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:9add2e5b33d2cd765ad96d5eb734a5e795a0755f7fc49aa04f76d7ddda73fd70"}, - {file = "pyzmq-25.1.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:e690145a8c0c273c28d3b89d6fb32c45e0d9605b2293c10e650265bf5c11cfec"}, - {file = "pyzmq-25.1.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:00a06faa7165634f0cac1abb27e54d7a0b3b44eb9994530b8ec73cf52e15353b"}, - {file = "pyzmq-25.1.2-cp37-cp37m-win32.whl", hash = "sha256:0f97bc2f1f13cb16905a5f3e1fbdf100e712d841482b2237484360f8bc4cb3d7"}, - {file = "pyzmq-25.1.2-cp37-cp37m-win_amd64.whl", hash = "sha256:6cc0020b74b2e410287e5942e1e10886ff81ac77789eb20bec13f7ae681f0fdd"}, - {file = "pyzmq-25.1.2-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:bef02cfcbded83473bdd86dd8d3729cd82b2e569b75844fb4ea08fee3c26ae41"}, - {file = "pyzmq-25.1.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e10a4b5a4b1192d74853cc71a5e9fd022594573926c2a3a4802020360aa719d8"}, - {file = "pyzmq-25.1.2-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8c5f80e578427d4695adac6fdf4370c14a2feafdc8cb35549c219b90652536ae"}, - {file = "pyzmq-25.1.2-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5dde6751e857910c1339890f3524de74007958557593b9e7e8c5f01cd919f8a7"}, - {file = "pyzmq-25.1.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea1608dd169da230a0ad602d5b1ebd39807ac96cae1845c3ceed39af08a5c6df"}, - {file = "pyzmq-25.1.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0f513130c4c361201da9bc69df25a086487250e16b5571ead521b31ff6b02220"}, - {file = "pyzmq-25.1.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:019744b99da30330798bb37df33549d59d380c78e516e3bab9c9b84f87a9592f"}, - {file = "pyzmq-25.1.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2e2713ef44be5d52dd8b8e2023d706bf66cb22072e97fc71b168e01d25192755"}, - {file = "pyzmq-25.1.2-cp38-cp38-win32.whl", hash = "sha256:07cd61a20a535524906595e09344505a9bd46f1da7a07e504b315d41cd42eb07"}, - {file = "pyzmq-25.1.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb7e49a17fb8c77d3119d41a4523e432eb0c6932187c37deb6fbb00cc3028088"}, - {file = "pyzmq-25.1.2-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:94504ff66f278ab4b7e03e4cba7e7e400cb73bfa9d3d71f58d8972a8dc67e7a6"}, - {file = "pyzmq-25.1.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6dd0d50bbf9dca1d0bdea219ae6b40f713a3fb477c06ca3714f208fd69e16fd8"}, - {file = "pyzmq-25.1.2-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:004ff469d21e86f0ef0369717351073e0e577428e514c47c8480770d5e24a565"}, - {file = "pyzmq-25.1.2-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c0b5ca88a8928147b7b1e2dfa09f3b6c256bc1135a1338536cbc9ea13d3b7add"}, - {file = "pyzmq-25.1.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c9a79f1d2495b167119d02be7448bfba57fad2a4207c4f68abc0bab4b92925b"}, - {file = "pyzmq-25.1.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:518efd91c3d8ac9f9b4f7dd0e2b7b8bf1a4fe82a308009016b07eaa48681af82"}, - {file = "pyzmq-25.1.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:1ec23bd7b3a893ae676d0e54ad47d18064e6c5ae1fadc2f195143fb27373f7f6"}, - {file = "pyzmq-25.1.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:db36c27baed588a5a8346b971477b718fdc66cf5b80cbfbd914b4d6d355e44e2"}, - {file = "pyzmq-25.1.2-cp39-cp39-win32.whl", hash = "sha256:39b1067f13aba39d794a24761e385e2eddc26295826530a8c7b6c6c341584289"}, - {file = "pyzmq-25.1.2-cp39-cp39-win_amd64.whl", hash = "sha256:8e9f3fabc445d0ce320ea2c59a75fe3ea591fdbdeebec5db6de530dd4b09412e"}, - {file = "pyzmq-25.1.2-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a8c1d566344aee826b74e472e16edae0a02e2a044f14f7c24e123002dcff1c05"}, - {file = "pyzmq-25.1.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:759cfd391a0996345ba94b6a5110fca9c557ad4166d86a6e81ea526c376a01e8"}, - {file = "pyzmq-25.1.2-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7c61e346ac34b74028ede1c6b4bcecf649d69b707b3ff9dc0fab453821b04d1e"}, - {file = "pyzmq-25.1.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4cb8fc1f8d69b411b8ec0b5f1ffbcaf14c1db95b6bccea21d83610987435f1a4"}, - {file = "pyzmq-25.1.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:3c00c9b7d1ca8165c610437ca0c92e7b5607b2f9076f4eb4b095c85d6e680a1d"}, - {file = "pyzmq-25.1.2-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:df0c7a16ebb94452d2909b9a7b3337940e9a87a824c4fc1c7c36bb4404cb0cde"}, - {file = "pyzmq-25.1.2-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:45999e7f7ed5c390f2e87ece7f6c56bf979fb213550229e711e45ecc7d42ccb8"}, - {file = "pyzmq-25.1.2-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ac170e9e048b40c605358667aca3d94e98f604a18c44bdb4c102e67070f3ac9b"}, - {file = "pyzmq-25.1.2-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1b604734bec94f05f81b360a272fc824334267426ae9905ff32dc2be433ab96"}, - {file = "pyzmq-25.1.2-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:a793ac733e3d895d96f865f1806f160696422554e46d30105807fdc9841b9f7d"}, - {file = "pyzmq-25.1.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0806175f2ae5ad4b835ecd87f5f85583316b69f17e97786f7443baaf54b9bb98"}, - {file = "pyzmq-25.1.2-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:ef12e259e7bc317c7597d4f6ef59b97b913e162d83b421dd0db3d6410f17a244"}, - {file = "pyzmq-25.1.2-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ea253b368eb41116011add00f8d5726762320b1bda892f744c91997b65754d73"}, - {file = "pyzmq-25.1.2-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b9b1f2ad6498445a941d9a4fee096d387fee436e45cc660e72e768d3d8ee611"}, - {file = "pyzmq-25.1.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:8b14c75979ce932c53b79976a395cb2a8cd3aaf14aef75e8c2cb55a330b9b49d"}, - {file = "pyzmq-25.1.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:889370d5174a741a62566c003ee8ddba4b04c3f09a97b8000092b7ca83ec9c49"}, - {file = "pyzmq-25.1.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9a18fff090441a40ffda8a7f4f18f03dc56ae73f148f1832e109f9bffa85df15"}, - {file = "pyzmq-25.1.2-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:99a6b36f95c98839ad98f8c553d8507644c880cf1e0a57fe5e3a3f3969040882"}, - {file = "pyzmq-25.1.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4345c9a27f4310afbb9c01750e9461ff33d6fb74cd2456b107525bbeebcb5be3"}, - {file = "pyzmq-25.1.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:3516e0b6224cf6e43e341d56da15fd33bdc37fa0c06af4f029f7d7dfceceabbc"}, - {file = "pyzmq-25.1.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:146b9b1f29ead41255387fb07be56dc29639262c0f7344f570eecdcd8d683314"}, - {file = "pyzmq-25.1.2.tar.gz", hash = "sha256:93f1aa311e8bb912e34f004cf186407a4e90eec4f0ecc0efd26056bf7eda0226"}, + {file = "pyzmq-26.0.3-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:44dd6fc3034f1eaa72ece33588867df9e006a7303725a12d64c3dff92330f625"}, + {file = "pyzmq-26.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:acb704195a71ac5ea5ecf2811c9ee19ecdc62b91878528302dd0be1b9451cc90"}, + {file = "pyzmq-26.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5dbb9c997932473a27afa93954bb77a9f9b786b4ccf718d903f35da3232317de"}, + {file = "pyzmq-26.0.3-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6bcb34f869d431799c3ee7d516554797f7760cb2198ecaa89c3f176f72d062be"}, + {file = "pyzmq-26.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38ece17ec5f20d7d9b442e5174ae9f020365d01ba7c112205a4d59cf19dc38ee"}, + {file = "pyzmq-26.0.3-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:ba6e5e6588e49139a0979d03a7deb9c734bde647b9a8808f26acf9c547cab1bf"}, + {file = "pyzmq-26.0.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3bf8b000a4e2967e6dfdd8656cd0757d18c7e5ce3d16339e550bd462f4857e59"}, + {file = "pyzmq-26.0.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:2136f64fbb86451dbbf70223635a468272dd20075f988a102bf8a3f194a411dc"}, + {file = "pyzmq-26.0.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e8918973fbd34e7814f59143c5f600ecd38b8038161239fd1a3d33d5817a38b8"}, + {file = "pyzmq-26.0.3-cp310-cp310-win32.whl", hash = "sha256:0aaf982e68a7ac284377d051c742610220fd06d330dcd4c4dbb4cdd77c22a537"}, + {file = "pyzmq-26.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:f1a9b7d00fdf60b4039f4455afd031fe85ee8305b019334b72dcf73c567edc47"}, + {file = "pyzmq-26.0.3-cp310-cp310-win_arm64.whl", hash = "sha256:80b12f25d805a919d53efc0a5ad7c0c0326f13b4eae981a5d7b7cc343318ebb7"}, + {file = "pyzmq-26.0.3-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:a72a84570f84c374b4c287183debc776dc319d3e8ce6b6a0041ce2e400de3f32"}, + {file = "pyzmq-26.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7ca684ee649b55fd8f378127ac8462fb6c85f251c2fb027eb3c887e8ee347bcd"}, + {file = "pyzmq-26.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e222562dc0f38571c8b1ffdae9d7adb866363134299264a1958d077800b193b7"}, + {file = "pyzmq-26.0.3-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f17cde1db0754c35a91ac00b22b25c11da6eec5746431d6e5092f0cd31a3fea9"}, + {file = "pyzmq-26.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b7c0c0b3244bb2275abe255d4a30c050d541c6cb18b870975553f1fb6f37527"}, + {file = "pyzmq-26.0.3-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:ac97a21de3712afe6a6c071abfad40a6224fd14fa6ff0ff8d0c6e6cd4e2f807a"}, + {file = "pyzmq-26.0.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:88b88282e55fa39dd556d7fc04160bcf39dea015f78e0cecec8ff4f06c1fc2b5"}, + {file = "pyzmq-26.0.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:72b67f966b57dbd18dcc7efbc1c7fc9f5f983e572db1877081f075004614fcdd"}, + {file = "pyzmq-26.0.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f4b6cecbbf3b7380f3b61de3a7b93cb721125dc125c854c14ddc91225ba52f83"}, + {file = "pyzmq-26.0.3-cp311-cp311-win32.whl", hash = "sha256:eed56b6a39216d31ff8cd2f1d048b5bf1700e4b32a01b14379c3b6dde9ce3aa3"}, + {file = "pyzmq-26.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:3191d312c73e3cfd0f0afdf51df8405aafeb0bad71e7ed8f68b24b63c4f36500"}, + {file = "pyzmq-26.0.3-cp311-cp311-win_arm64.whl", hash = "sha256:b6907da3017ef55139cf0e417c5123a84c7332520e73a6902ff1f79046cd3b94"}, + {file = "pyzmq-26.0.3-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:068ca17214038ae986d68f4a7021f97e187ed278ab6dccb79f837d765a54d753"}, + {file = "pyzmq-26.0.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:7821d44fe07335bea256b9f1f41474a642ca55fa671dfd9f00af8d68a920c2d4"}, + {file = "pyzmq-26.0.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eeb438a26d87c123bb318e5f2b3d86a36060b01f22fbdffd8cf247d52f7c9a2b"}, + {file = "pyzmq-26.0.3-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:69ea9d6d9baa25a4dc9cef5e2b77b8537827b122214f210dd925132e34ae9b12"}, + {file = "pyzmq-26.0.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7daa3e1369355766dea11f1d8ef829905c3b9da886ea3152788dc25ee6079e02"}, + {file = "pyzmq-26.0.3-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:6ca7a9a06b52d0e38ccf6bca1aeff7be178917893f3883f37b75589d42c4ac20"}, + {file = "pyzmq-26.0.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1b7d0e124948daa4d9686d421ef5087c0516bc6179fdcf8828b8444f8e461a77"}, + {file = "pyzmq-26.0.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:e746524418b70f38550f2190eeee834db8850088c834d4c8406fbb9bc1ae10b2"}, + {file = "pyzmq-26.0.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:6b3146f9ae6af82c47a5282ac8803523d381b3b21caeae0327ed2f7ecb718798"}, + {file = "pyzmq-26.0.3-cp312-cp312-win32.whl", hash = "sha256:2b291d1230845871c00c8462c50565a9cd6026fe1228e77ca934470bb7d70ea0"}, + {file = "pyzmq-26.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:926838a535c2c1ea21c903f909a9a54e675c2126728c21381a94ddf37c3cbddf"}, + {file = "pyzmq-26.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:5bf6c237f8c681dfb91b17f8435b2735951f0d1fad10cc5dfd96db110243370b"}, + {file = "pyzmq-26.0.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0c0991f5a96a8e620f7691e61178cd8f457b49e17b7d9cfa2067e2a0a89fc1d5"}, + {file = "pyzmq-26.0.3-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:dbf012d8fcb9f2cf0643b65df3b355fdd74fc0035d70bb5c845e9e30a3a4654b"}, + {file = "pyzmq-26.0.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:01fbfbeb8249a68d257f601deb50c70c929dc2dfe683b754659569e502fbd3aa"}, + {file = "pyzmq-26.0.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c8eb19abe87029c18f226d42b8a2c9efdd139d08f8bf6e085dd9075446db450"}, + {file = "pyzmq-26.0.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:5344b896e79800af86ad643408ca9aa303a017f6ebff8cee5a3163c1e9aec987"}, + {file = "pyzmq-26.0.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:204e0f176fd1d067671157d049466869b3ae1fc51e354708b0dc41cf94e23a3a"}, + {file = "pyzmq-26.0.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a42db008d58530efa3b881eeee4991146de0b790e095f7ae43ba5cc612decbc5"}, + {file = "pyzmq-26.0.3-cp37-cp37m-win32.whl", hash = "sha256:8d7a498671ca87e32b54cb47c82a92b40130a26c5197d392720a1bce1b3c77cf"}, + {file = "pyzmq-26.0.3-cp37-cp37m-win_amd64.whl", hash = "sha256:3b4032a96410bdc760061b14ed6a33613ffb7f702181ba999df5d16fb96ba16a"}, + {file = "pyzmq-26.0.3-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:2cc4e280098c1b192c42a849de8de2c8e0f3a84086a76ec5b07bfee29bda7d18"}, + {file = "pyzmq-26.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5bde86a2ed3ce587fa2b207424ce15b9a83a9fa14422dcc1c5356a13aed3df9d"}, + {file = "pyzmq-26.0.3-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:34106f68e20e6ff253c9f596ea50397dbd8699828d55e8fa18bd4323d8d966e6"}, + {file = "pyzmq-26.0.3-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ebbbd0e728af5db9b04e56389e2299a57ea8b9dd15c9759153ee2455b32be6ad"}, + {file = "pyzmq-26.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6b1d1c631e5940cac5a0b22c5379c86e8df6a4ec277c7a856b714021ab6cfad"}, + {file = "pyzmq-26.0.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e891ce81edd463b3b4c3b885c5603c00141151dd9c6936d98a680c8c72fe5c67"}, + {file = "pyzmq-26.0.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:9b273ecfbc590a1b98f014ae41e5cf723932f3b53ba9367cfb676f838038b32c"}, + {file = "pyzmq-26.0.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b32bff85fb02a75ea0b68f21e2412255b5731f3f389ed9aecc13a6752f58ac97"}, + {file = "pyzmq-26.0.3-cp38-cp38-win32.whl", hash = "sha256:f6c21c00478a7bea93caaaef9e7629145d4153b15a8653e8bb4609d4bc70dbfc"}, + {file = "pyzmq-26.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:3401613148d93ef0fd9aabdbddb212de3db7a4475367f49f590c837355343972"}, + {file = "pyzmq-26.0.3-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:2ed8357f4c6e0daa4f3baf31832df8a33334e0fe5b020a61bc8b345a3db7a606"}, + {file = "pyzmq-26.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c1c8f2a2ca45292084c75bb6d3a25545cff0ed931ed228d3a1810ae3758f975f"}, + {file = "pyzmq-26.0.3-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:b63731993cdddcc8e087c64e9cf003f909262b359110070183d7f3025d1c56b5"}, + {file = "pyzmq-26.0.3-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b3cd31f859b662ac5d7f4226ec7d8bd60384fa037fc02aee6ff0b53ba29a3ba8"}, + {file = "pyzmq-26.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:115f8359402fa527cf47708d6f8a0f8234f0e9ca0cab7c18c9c189c194dbf620"}, + {file = "pyzmq-26.0.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:715bdf952b9533ba13dfcf1f431a8f49e63cecc31d91d007bc1deb914f47d0e4"}, + {file = "pyzmq-26.0.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:e1258c639e00bf5e8a522fec6c3eaa3e30cf1c23a2f21a586be7e04d50c9acab"}, + {file = "pyzmq-26.0.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:15c59e780be8f30a60816a9adab900c12a58d79c1ac742b4a8df044ab2a6d920"}, + {file = "pyzmq-26.0.3-cp39-cp39-win32.whl", hash = "sha256:d0cdde3c78d8ab5b46595054e5def32a755fc028685add5ddc7403e9f6de9879"}, + {file = "pyzmq-26.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:ce828058d482ef860746bf532822842e0ff484e27f540ef5c813d516dd8896d2"}, + {file = "pyzmq-26.0.3-cp39-cp39-win_arm64.whl", hash = "sha256:788f15721c64109cf720791714dc14afd0f449d63f3a5487724f024345067381"}, + {file = "pyzmq-26.0.3-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2c18645ef6294d99b256806e34653e86236eb266278c8ec8112622b61db255de"}, + {file = "pyzmq-26.0.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7e6bc96ebe49604df3ec2c6389cc3876cabe475e6bfc84ced1bf4e630662cb35"}, + {file = "pyzmq-26.0.3-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:971e8990c5cc4ddcff26e149398fc7b0f6a042306e82500f5e8db3b10ce69f84"}, + {file = "pyzmq-26.0.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8416c23161abd94cc7da80c734ad7c9f5dbebdadfdaa77dad78244457448223"}, + {file = "pyzmq-26.0.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:082a2988364b60bb5de809373098361cf1dbb239623e39e46cb18bc035ed9c0c"}, + {file = "pyzmq-26.0.3-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:d57dfbf9737763b3a60d26e6800e02e04284926329aee8fb01049635e957fe81"}, + {file = "pyzmq-26.0.3-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:77a85dca4c2430ac04dc2a2185c2deb3858a34fe7f403d0a946fa56970cf60a1"}, + {file = "pyzmq-26.0.3-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4c82a6d952a1d555bf4be42b6532927d2a5686dd3c3e280e5f63225ab47ac1f5"}, + {file = "pyzmq-26.0.3-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4496b1282c70c442809fc1b151977c3d967bfb33e4e17cedbf226d97de18f709"}, + {file = "pyzmq-26.0.3-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:e4946d6bdb7ba972dfda282f9127e5756d4f299028b1566d1245fa0d438847e6"}, + {file = "pyzmq-26.0.3-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:03c0ae165e700364b266876d712acb1ac02693acd920afa67da2ebb91a0b3c09"}, + {file = "pyzmq-26.0.3-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:3e3070e680f79887d60feeda051a58d0ac36622e1759f305a41059eff62c6da7"}, + {file = "pyzmq-26.0.3-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:6ca08b840fe95d1c2bd9ab92dac5685f949fc6f9ae820ec16193e5ddf603c3b2"}, + {file = "pyzmq-26.0.3-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e76654e9dbfb835b3518f9938e565c7806976c07b37c33526b574cc1a1050480"}, + {file = "pyzmq-26.0.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:871587bdadd1075b112e697173e946a07d722459d20716ceb3d1bd6c64bd08ce"}, + {file = "pyzmq-26.0.3-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:d0a2d1bd63a4ad79483049b26514e70fa618ce6115220da9efdff63688808b17"}, + {file = "pyzmq-26.0.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0270b49b6847f0d106d64b5086e9ad5dc8a902413b5dbbb15d12b60f9c1747a4"}, + {file = "pyzmq-26.0.3-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:703c60b9910488d3d0954ca585c34f541e506a091a41930e663a098d3b794c67"}, + {file = "pyzmq-26.0.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:74423631b6be371edfbf7eabb02ab995c2563fee60a80a30829176842e71722a"}, + {file = "pyzmq-26.0.3-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:4adfbb5451196842a88fda3612e2c0414134874bffb1c2ce83ab4242ec9e027d"}, + {file = "pyzmq-26.0.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:3516119f4f9b8671083a70b6afaa0a070f5683e431ab3dc26e9215620d7ca1ad"}, + {file = "pyzmq-26.0.3.tar.gz", hash = "sha256:dba7d9f2e047dfa2bca3b01f4f84aa5246725203d6284e3790f2ca15fba6b40a"}, ] [package.dependencies] @@ -1596,37 +1603,37 @@ cffi = {version = "*", markers = "implementation_name == \"pypy\""} [[package]] name = "scikit-learn" -version = "1.4.1.post1" +version = "1.4.2" description = "A set of python modules for machine learning and data mining" optional = false python-versions = ">=3.9" files = [ - {file = "scikit-learn-1.4.1.post1.tar.gz", hash = "sha256:93d3d496ff1965470f9977d05e5ec3376fb1e63b10e4fda5e39d23c2d8969a30"}, - {file = "scikit_learn-1.4.1.post1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c540aaf44729ab5cd4bd5e394f2b375e65ceaea9cdd8c195788e70433d91bbc5"}, - {file = "scikit_learn-1.4.1.post1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:4310bff71aa98b45b46cd26fa641309deb73a5d1c0461d181587ad4f30ea3c36"}, - {file = "scikit_learn-1.4.1.post1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f43dd527dabff5521af2786a2f8de5ba381e182ec7292663508901cf6ceaf6e"}, - {file = "scikit_learn-1.4.1.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c02e27d65b0c7dc32f2c5eb601aaf5530b7a02bfbe92438188624524878336f2"}, - {file = "scikit_learn-1.4.1.post1-cp310-cp310-win_amd64.whl", hash = "sha256:629e09f772ad42f657ca60a1a52342eef786218dd20cf1369a3b8d085e55ef8f"}, - {file = "scikit_learn-1.4.1.post1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6145dfd9605b0b50ae72cdf72b61a2acd87501369a763b0d73d004710ebb76b5"}, - {file = "scikit_learn-1.4.1.post1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:1afed6951bc9d2053c6ee9a518a466cbc9b07c6a3f9d43bfe734192b6125d508"}, - {file = "scikit_learn-1.4.1.post1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce03506ccf5f96b7e9030fea7eb148999b254c44c10182ac55857bc9b5d4815f"}, - {file = "scikit_learn-1.4.1.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ba516fcdc73d60e7f48cbb0bccb9acbdb21807de3651531208aac73c758e3ab"}, - {file = "scikit_learn-1.4.1.post1-cp311-cp311-win_amd64.whl", hash = "sha256:78cd27b4669513b50db4f683ef41ea35b5dddc797bd2bbd990d49897fd1c8a46"}, - {file = "scikit_learn-1.4.1.post1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a1e289f33f613cefe6707dead50db31930530dc386b6ccff176c786335a7b01c"}, - {file = "scikit_learn-1.4.1.post1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:0df87de9ce1c0140f2818beef310fb2e2afdc1e66fc9ad587965577f17733649"}, - {file = "scikit_learn-1.4.1.post1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:712c1c69c45b58ef21635360b3d0a680ff7d83ac95b6f9b82cf9294070cda710"}, - {file = "scikit_learn-1.4.1.post1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1754b0c2409d6ed5a3380512d0adcf182a01363c669033a2b55cca429ed86a81"}, - {file = "scikit_learn-1.4.1.post1-cp312-cp312-win_amd64.whl", hash = "sha256:1d491ef66e37f4e812db7e6c8286520c2c3fc61b34bf5e59b67b4ce528de93af"}, - {file = "scikit_learn-1.4.1.post1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:aa0029b78ef59af22cfbd833e8ace8526e4df90212db7ceccbea582ebb5d6794"}, - {file = "scikit_learn-1.4.1.post1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:14e4c88436ac96bf69eb6d746ac76a574c314a23c6961b7d344b38877f20fee1"}, - {file = "scikit_learn-1.4.1.post1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7cd3a77c32879311f2aa93466d3c288c955ef71d191503cf0677c3340ae8ae0"}, - {file = "scikit_learn-1.4.1.post1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2a3ee19211ded1a52ee37b0a7b373a8bfc66f95353af058a210b692bd4cda0dd"}, - {file = "scikit_learn-1.4.1.post1-cp39-cp39-win_amd64.whl", hash = "sha256:234b6bda70fdcae9e4abbbe028582ce99c280458665a155eed0b820599377d25"}, + {file = "scikit-learn-1.4.2.tar.gz", hash = "sha256:daa1c471d95bad080c6e44b4946c9390a4842adc3082572c20e4f8884e39e959"}, + {file = "scikit_learn-1.4.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8539a41b3d6d1af82eb629f9c57f37428ff1481c1e34dddb3b9d7af8ede67ac5"}, + {file = "scikit_learn-1.4.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:68b8404841f944a4a1459b07198fa2edd41a82f189b44f3e1d55c104dbc2e40c"}, + {file = "scikit_learn-1.4.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81bf5d8bbe87643103334032dd82f7419bc8c8d02a763643a6b9a5c7288c5054"}, + {file = "scikit_learn-1.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36f0ea5d0f693cb247a073d21a4123bdf4172e470e6d163c12b74cbb1536cf38"}, + {file = "scikit_learn-1.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:87440e2e188c87db80ea4023440923dccbd56fbc2d557b18ced00fef79da0727"}, + {file = "scikit_learn-1.4.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:45dee87ac5309bb82e3ea633955030df9bbcb8d2cdb30383c6cd483691c546cc"}, + {file = "scikit_learn-1.4.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:1d0b25d9c651fd050555aadd57431b53d4cf664e749069da77f3d52c5ad14b3b"}, + {file = "scikit_learn-1.4.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b0203c368058ab92efc6168a1507d388d41469c873e96ec220ca8e74079bf62e"}, + {file = "scikit_learn-1.4.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:44c62f2b124848a28fd695db5bc4da019287abf390bfce602ddc8aa1ec186aae"}, + {file = "scikit_learn-1.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:5cd7b524115499b18b63f0c96f4224eb885564937a0b3477531b2b63ce331904"}, + {file = "scikit_learn-1.4.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:90378e1747949f90c8f385898fff35d73193dfcaec3dd75d6b542f90c4e89755"}, + {file = "scikit_learn-1.4.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:ff4effe5a1d4e8fed260a83a163f7dbf4f6087b54528d8880bab1d1377bd78be"}, + {file = "scikit_learn-1.4.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:671e2f0c3f2c15409dae4f282a3a619601fa824d2c820e5b608d9d775f91780c"}, + {file = "scikit_learn-1.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d36d0bc983336bbc1be22f9b686b50c964f593c8a9a913a792442af9bf4f5e68"}, + {file = "scikit_learn-1.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:d762070980c17ba3e9a4a1e043ba0518ce4c55152032f1af0ca6f39b376b5928"}, + {file = "scikit_learn-1.4.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d9993d5e78a8148b1d0fdf5b15ed92452af5581734129998c26f481c46586d68"}, + {file = "scikit_learn-1.4.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:426d258fddac674fdf33f3cb2d54d26f49406e2599dbf9a32b4d1696091d4256"}, + {file = "scikit_learn-1.4.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5460a1a5b043ae5ae4596b3126a4ec33ccba1b51e7ca2c5d36dac2169f62ab1d"}, + {file = "scikit_learn-1.4.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49d64ef6cb8c093d883e5a36c4766548d974898d378e395ba41a806d0e824db8"}, + {file = "scikit_learn-1.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:c97a50b05c194be9146d61fe87dbf8eac62b203d9e87a3ccc6ae9aed2dfaf361"}, ] [package.dependencies] joblib = ">=1.2.0" -numpy = ">=1.19.5,<2.0" +numpy = ">=1.19.5" scipy = ">=1.6.0" threadpoolctl = ">=2.0.0" @@ -1680,56 +1687,56 @@ test = ["array-api-strict", "asv", "gmpy2", "hypothesis (>=6.30)", "mpmath", "po [[package]] name = "shapely" -version = "2.0.3" +version = "2.0.4" description = "Manipulation and analysis of geometric objects" optional = false python-versions = ">=3.7" files = [ - {file = "shapely-2.0.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:af7e9abe180b189431b0f490638281b43b84a33a960620e6b2e8d3e3458b61a1"}, - {file = "shapely-2.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:98040462b36ced9671e266b95c326b97f41290d9d17504a1ee4dc313a7667b9c"}, - {file = "shapely-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:71eb736ef2843f23473c6e37f6180f90f0a35d740ab284321548edf4e55d9a52"}, - {file = "shapely-2.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:881eb9dbbb4a6419667e91fcb20313bfc1e67f53dbb392c6840ff04793571ed1"}, - {file = "shapely-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f10d2ccf0554fc0e39fad5886c839e47e207f99fdf09547bc687a2330efda35b"}, - {file = "shapely-2.0.3-cp310-cp310-win32.whl", hash = "sha256:6dfdc077a6fcaf74d3eab23a1ace5abc50c8bce56ac7747d25eab582c5a2990e"}, - {file = "shapely-2.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:64c5013dacd2d81b3bb12672098a0b2795c1bf8190cfc2980e380f5ef9d9e4d9"}, - {file = "shapely-2.0.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:56cee3e4e8159d6f2ce32e421445b8e23154fd02a0ac271d6a6c0b266a8e3cce"}, - {file = "shapely-2.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:619232c8276fded09527d2a9fd91a7885ff95c0ff9ecd5e3cb1e34fbb676e2ae"}, - {file = "shapely-2.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b2a7d256db6f5b4b407dc0c98dd1b2fcf1c9c5814af9416e5498d0a2e4307a4b"}, - {file = "shapely-2.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e45f0c8cd4583647db3216d965d49363e6548c300c23fd7e57ce17a03f824034"}, - {file = "shapely-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13cb37d3826972a82748a450328fe02a931dcaed10e69a4d83cc20ba021bc85f"}, - {file = "shapely-2.0.3-cp311-cp311-win32.whl", hash = "sha256:9302d7011e3e376d25acd30d2d9e70d315d93f03cc748784af19b00988fc30b1"}, - {file = "shapely-2.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:6b464f2666b13902835f201f50e835f2f153f37741db88f68c7f3b932d3505fa"}, - {file = "shapely-2.0.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:e86e7cb8e331a4850e0c2a8b2d66dc08d7a7b301b8d1d34a13060e3a5b4b3b55"}, - {file = "shapely-2.0.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c91981c99ade980fc49e41a544629751a0ccd769f39794ae913e53b07b2f78b9"}, - {file = "shapely-2.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bd45d456983dc60a42c4db437496d3f08a4201fbf662b69779f535eb969660af"}, - {file = "shapely-2.0.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:882fb1ffc7577e88c1194f4f1757e277dc484ba096a3b94844319873d14b0f2d"}, - {file = "shapely-2.0.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9f2d93bff2ea52fa93245798cddb479766a18510ea9b93a4fb9755c79474889"}, - {file = "shapely-2.0.3-cp312-cp312-win32.whl", hash = "sha256:99abad1fd1303b35d991703432c9481e3242b7b3a393c186cfb02373bf604004"}, - {file = "shapely-2.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:6f555fe3304a1f40398977789bc4fe3c28a11173196df9ece1e15c5bc75a48db"}, - {file = "shapely-2.0.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a983cc418c1fa160b7d797cfef0e0c9f8c6d5871e83eae2c5793fce6a837fad9"}, - {file = "shapely-2.0.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18bddb8c327f392189a8d5d6b9a858945722d0bb95ccbd6a077b8e8fc4c7890d"}, - {file = "shapely-2.0.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:442f4dcf1eb58c5a4e3428d88e988ae153f97ab69a9f24e07bf4af8038536325"}, - {file = "shapely-2.0.3-cp37-cp37m-win32.whl", hash = "sha256:31a40b6e3ab00a4fd3a1d44efb2482278642572b8e0451abdc8e0634b787173e"}, - {file = "shapely-2.0.3-cp37-cp37m-win_amd64.whl", hash = "sha256:59b16976c2473fec85ce65cc9239bef97d4205ab3acead4e6cdcc72aee535679"}, - {file = "shapely-2.0.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:705efbce1950a31a55b1daa9c6ae1c34f1296de71ca8427974ec2f27d57554e3"}, - {file = "shapely-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:601c5c0058a6192df704cb889439f64994708563f57f99574798721e9777a44b"}, - {file = "shapely-2.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f24ecbb90a45c962b3b60d8d9a387272ed50dc010bfe605f1d16dfc94772d8a1"}, - {file = "shapely-2.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8c2a2989222c6062f7a0656e16276c01bb308bc7e5d999e54bf4e294ce62e76"}, - {file = "shapely-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42bceb9bceb3710a774ce04908fda0f28b291323da2688f928b3f213373b5aee"}, - {file = "shapely-2.0.3-cp38-cp38-win32.whl", hash = "sha256:54d925c9a311e4d109ec25f6a54a8bd92cc03481a34ae1a6a92c1fe6729b7e01"}, - {file = "shapely-2.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:300d203b480a4589adefff4c4af0b13919cd6d760ba3cbb1e56275210f96f654"}, - {file = "shapely-2.0.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:083d026e97b6c1f4a9bd2a9171c7692461092ed5375218170d91705550eecfd5"}, - {file = "shapely-2.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:27b6e1910094d93e9627f2664121e0e35613262fc037051680a08270f6058daf"}, - {file = "shapely-2.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:71b2de56a9e8c0e5920ae5ddb23b923490557ac50cb0b7fa752761bf4851acde"}, - {file = "shapely-2.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d279e56bbb68d218d63f3efc80c819cedcceef0e64efbf058a1df89dc57201b"}, - {file = "shapely-2.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88566d01a30f0453f7d038db46bc83ce125e38e47c5f6bfd4c9c287010e9bf74"}, - {file = "shapely-2.0.3-cp39-cp39-win32.whl", hash = "sha256:58afbba12c42c6ed44c4270bc0e22f3dadff5656d711b0ad335c315e02d04707"}, - {file = "shapely-2.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:5026b30433a70911979d390009261b8c4021ff87c7c3cbd825e62bb2ffa181bc"}, - {file = "shapely-2.0.3.tar.gz", hash = "sha256:4d65d0aa7910af71efa72fd6447e02a8e5dd44da81a983de9d736d6e6ccbe674"}, + {file = "shapely-2.0.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:011b77153906030b795791f2fdfa2d68f1a8d7e40bce78b029782ade3afe4f2f"}, + {file = "shapely-2.0.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9831816a5d34d5170aa9ed32a64982c3d6f4332e7ecfe62dc97767e163cb0b17"}, + {file = "shapely-2.0.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5c4849916f71dc44e19ed370421518c0d86cf73b26e8656192fcfcda08218fbd"}, + {file = "shapely-2.0.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:841f93a0e31e4c64d62ea570d81c35de0f6cea224568b2430d832967536308e6"}, + {file = "shapely-2.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b4431f522b277c79c34b65da128029a9955e4481462cbf7ebec23aab61fc58"}, + {file = "shapely-2.0.4-cp310-cp310-win32.whl", hash = "sha256:92a41d936f7d6743f343be265ace93b7c57f5b231e21b9605716f5a47c2879e7"}, + {file = "shapely-2.0.4-cp310-cp310-win_amd64.whl", hash = "sha256:30982f79f21bb0ff7d7d4a4e531e3fcaa39b778584c2ce81a147f95be1cd58c9"}, + {file = "shapely-2.0.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:de0205cb21ad5ddaef607cda9a3191eadd1e7a62a756ea3a356369675230ac35"}, + {file = "shapely-2.0.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7d56ce3e2a6a556b59a288771cf9d091470116867e578bebced8bfc4147fbfd7"}, + {file = "shapely-2.0.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:58b0ecc505bbe49a99551eea3f2e8a9b3b24b3edd2a4de1ac0dc17bc75c9ec07"}, + {file = "shapely-2.0.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:790a168a808bd00ee42786b8ba883307c0e3684ebb292e0e20009588c426da47"}, + {file = "shapely-2.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4310b5494271e18580d61022c0857eb85d30510d88606fa3b8314790df7f367d"}, + {file = "shapely-2.0.4-cp311-cp311-win32.whl", hash = "sha256:63f3a80daf4f867bd80f5c97fbe03314348ac1b3b70fb1c0ad255a69e3749879"}, + {file = "shapely-2.0.4-cp311-cp311-win_amd64.whl", hash = "sha256:c52ed79f683f721b69a10fb9e3d940a468203f5054927215586c5d49a072de8d"}, + {file = "shapely-2.0.4-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:5bbd974193e2cc274312da16b189b38f5f128410f3377721cadb76b1e8ca5328"}, + {file = "shapely-2.0.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:41388321a73ba1a84edd90d86ecc8bfed55e6a1e51882eafb019f45895ec0f65"}, + {file = "shapely-2.0.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0776c92d584f72f1e584d2e43cfc5542c2f3dd19d53f70df0900fda643f4bae6"}, + {file = "shapely-2.0.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c75c98380b1ede1cae9a252c6dc247e6279403fae38c77060a5e6186c95073ac"}, + {file = "shapely-2.0.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3e700abf4a37b7b8b90532fa6ed5c38a9bfc777098bc9fbae5ec8e618ac8f30"}, + {file = "shapely-2.0.4-cp312-cp312-win32.whl", hash = "sha256:4f2ab0faf8188b9f99e6a273b24b97662194160cc8ca17cf9d1fb6f18d7fb93f"}, + {file = "shapely-2.0.4-cp312-cp312-win_amd64.whl", hash = "sha256:03152442d311a5e85ac73b39680dd64a9892fa42bb08fd83b3bab4fe6999bfa0"}, + {file = "shapely-2.0.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:994c244e004bc3cfbea96257b883c90a86e8cbd76e069718eb4c6b222a56f78b"}, + {file = "shapely-2.0.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05ffd6491e9e8958b742b0e2e7c346635033d0a5f1a0ea083547fcc854e5d5cf"}, + {file = "shapely-2.0.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fbdc1140a7d08faa748256438291394967aa54b40009f54e8d9825e75ef6113"}, + {file = "shapely-2.0.4-cp37-cp37m-win32.whl", hash = "sha256:5af4cd0d8cf2912bd95f33586600cac9c4b7c5053a036422b97cfe4728d2eb53"}, + {file = "shapely-2.0.4-cp37-cp37m-win_amd64.whl", hash = "sha256:464157509ce4efa5ff285c646a38b49f8c5ef8d4b340f722685b09bb033c5ccf"}, + {file = "shapely-2.0.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:489c19152ec1f0e5c5e525356bcbf7e532f311bff630c9b6bc2db6f04da6a8b9"}, + {file = "shapely-2.0.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b79bbd648664aa6f44ef018474ff958b6b296fed5c2d42db60078de3cffbc8aa"}, + {file = "shapely-2.0.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:674d7baf0015a6037d5758496d550fc1946f34bfc89c1bf247cabdc415d7747e"}, + {file = "shapely-2.0.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6cd4ccecc5ea5abd06deeaab52fcdba372f649728050c6143cc405ee0c166679"}, + {file = "shapely-2.0.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb5cdcbbe3080181498931b52a91a21a781a35dcb859da741c0345c6402bf00c"}, + {file = "shapely-2.0.4-cp38-cp38-win32.whl", hash = "sha256:55a38dcd1cee2f298d8c2ebc60fc7d39f3b4535684a1e9e2f39a80ae88b0cea7"}, + {file = "shapely-2.0.4-cp38-cp38-win_amd64.whl", hash = "sha256:ec555c9d0db12d7fd777ba3f8b75044c73e576c720a851667432fabb7057da6c"}, + {file = "shapely-2.0.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:3f9103abd1678cb1b5f7e8e1af565a652e036844166c91ec031eeb25c5ca8af0"}, + {file = "shapely-2.0.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:263bcf0c24d7a57c80991e64ab57cba7a3906e31d2e21b455f493d4aab534aaa"}, + {file = "shapely-2.0.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ddf4a9bfaac643e62702ed662afc36f6abed2a88a21270e891038f9a19bc08fc"}, + {file = "shapely-2.0.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:485246fcdb93336105c29a5cfbff8a226949db37b7473c89caa26c9bae52a242"}, + {file = "shapely-2.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8de4578e838a9409b5b134a18ee820730e507b2d21700c14b71a2b0757396acc"}, + {file = "shapely-2.0.4-cp39-cp39-win32.whl", hash = "sha256:9dab4c98acfb5fb85f5a20548b5c0abe9b163ad3525ee28822ffecb5c40e724c"}, + {file = "shapely-2.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:31c19a668b5a1eadab82ff070b5a260478ac6ddad3a5b62295095174a8d26398"}, + {file = "shapely-2.0.4.tar.gz", hash = "sha256:5dc736127fac70009b8d309a0eeb74f3e08979e530cf7017f2f507ef62e6cfb8"}, ] [package.dependencies] -numpy = ">=1.14,<2" +numpy = ">=1.14,<3" [package.extras] docs = ["matplotlib", "numpydoc (==1.1.*)", "sphinx", "sphinx-book-theme", "sphinx-remove-toctrees"] @@ -1767,13 +1774,13 @@ tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"] [[package]] name = "threadpoolctl" -version = "3.4.0" +version = "3.5.0" description = "threadpoolctl" optional = false python-versions = ">=3.8" files = [ - {file = "threadpoolctl-3.4.0-py3-none-any.whl", hash = "sha256:8f4c689a65b23e5ed825c8436a92b818aac005e0f3715f6a1664d7c7ee29d262"}, - {file = "threadpoolctl-3.4.0.tar.gz", hash = "sha256:f11b491a03661d6dd7ef692dd422ab34185d982466c49c8f98c8f716b5c93196"}, + {file = "threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467"}, + {file = "threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107"}, ] [[package]] @@ -1829,28 +1836,28 @@ telegram = ["requests"] [[package]] name = "traitlets" -version = "5.14.2" +version = "5.14.3" description = "Traitlets Python configuration system" optional = false python-versions = ">=3.8" files = [ - {file = "traitlets-5.14.2-py3-none-any.whl", hash = "sha256:fcdf85684a772ddeba87db2f398ce00b40ff550d1528c03c14dbf6a02003cd80"}, - {file = "traitlets-5.14.2.tar.gz", hash = "sha256:8cdd83c040dab7d1dee822678e5f5d100b514f7b72b01615b26fc5718916fdf9"}, + {file = "traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f"}, + {file = "traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7"}, ] [package.extras] docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] -test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,<8.1)", "pytest-mock", "pytest-mypy-testing"] +test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,<8.2)", "pytest-mock", "pytest-mypy-testing"] [[package]] name = "typing-extensions" -version = "4.9.0" +version = "4.11.0" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.9.0-py3-none-any.whl", hash = "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd"}, - {file = "typing_extensions-4.9.0.tar.gz", hash = "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783"}, + {file = "typing_extensions-4.11.0-py3-none-any.whl", hash = "sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a"}, + {file = "typing_extensions-4.11.0.tar.gz", hash = "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0"}, ] [[package]] @@ -1878,7 +1885,7 @@ click = "^8.1.3" pandas = "^2.0.0" polars = "^0.20.6" protobuf = "^4.22.3" -pyarrow = "^13.0.0" +pyarrow = "^15.0.0" typing-extensions = "^4.9.0" [package.extras] @@ -1889,7 +1896,7 @@ examples = ["ipykernel (>=6.22.0,<7.0.0)", "ipython (>=8.22.1,<9.0.0)", "nbforma type = "git" url = "https://github.com/alan-turing-institute/uatk-spc.git" reference = "55-output-formats-python" -resolved_reference = "ad1d8267e563348bd9634f5773c64bd98e76bd96" +resolved_reference = "c8ae0f336141cee4961541c51ba3f46bb02ae488" subdirectory = "python" [[package]] From 0da575a16aeb1011990337cd60ca57d4410dfd29 Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Fri, 3 May 2024 09:12:47 +0100 Subject: [PATCH 14/16] Remove unused test module --- tests/test_dummy.py | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 tests/test_dummy.py diff --git a/tests/test_dummy.py b/tests/test_dummy.py deleted file mode 100644 index 91cae42..0000000 --- a/tests/test_dummy.py +++ /dev/null @@ -1,5 +0,0 @@ -from acbm.dummy import hello - - -def test_hello(): - assert hello() == 1 From f9e3cbbe4e110683b23a51a6bf20c699d8a78621 Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Fri, 3 May 2024 09:41:54 +0100 Subject: [PATCH 15/16] Add placeholder tests for matching module --- tests/test_matching.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 tests/test_matching.py diff --git a/tests/test_matching.py b/tests/test_matching.py new file mode 100644 index 0000000..c0e0da6 --- /dev/null +++ b/tests/test_matching.py @@ -0,0 +1,18 @@ +import pytest + +from acbm.matching import match_categorical, match_individuals, match_psm # noqa: F401 + + +@pytest.mark.skip(reason="todo") +def test_match_categorical(): + pass + + +@pytest.mark.skip(reason="todo") +def test_match_individuals(): + pass + + +@pytest.mark.skip(reason="todo") +def test_match_psm(): + pass From ebec8b736f7eb8275ccc6700ccd8463c3058ac04 Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Fri, 3 May 2024 12:22:21 +0100 Subject: [PATCH 16/16] Add rng seed for scripts --- scripts/1_prep_synthpop.py | 6 ++++++ scripts/2.1_sandbox-match_households.py | 3 +++ scripts/2_match_households_and_individuals.py | 4 ++++ 3 files changed, 13 insertions(+) diff --git a/scripts/1_prep_synthpop.py b/scripts/1_prep_synthpop.py index 34c8f51..ca4e313 100644 --- a/scripts/1_prep_synthpop.py +++ b/scripts/1_prep_synthpop.py @@ -1,7 +1,13 @@ +import numpy as np from uatk_spc.builder import Builder +SEED = 0 + def main(): + # Seed RNG + np.ranmdom.seed(SEED) + # Pick a region with SPC output saved path = "../data/external/spc_output/raw/" region = "west-yorkshire" diff --git a/scripts/2.1_sandbox-match_households.py b/scripts/2.1_sandbox-match_households.py index d6d8941..f11cf1a 100644 --- a/scripts/2.1_sandbox-match_households.py +++ b/scripts/2.1_sandbox-match_households.py @@ -15,6 +15,9 @@ pd.set_option("display.max_columns", None) +# Seed RNG +SEED = 0 +np.ranmdom.seed(SEED) # ## Step 1: Load in the datasets diff --git a/scripts/2_match_households_and_individuals.py b/scripts/2_match_households_and_individuals.py index 3ab577e..ddcea42 100644 --- a/scripts/2_match_households_and_individuals.py +++ b/scripts/2_match_households_and_individuals.py @@ -18,6 +18,10 @@ truncate_values, ) +# Seed RNG +SEED = 0 +np.ranmdom.seed(SEED) + pd.set_option("display.max_columns", None)