diff --git a/pandas-reset-index/Main_Code.ipynb b/pandas-reset-index/Main_Code.ipynb new file mode 100644 index 0000000000..955a2a6c5d --- /dev/null +++ b/pandas-reset-index/Main_Code.ipynb @@ -0,0 +1,639 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b6863e61-0c9e-4bfa-9d07-29acccc53023", + "metadata": {}, + "source": [ + "**Pre-requisites**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "de7f1d83-8c89-4c42-b6cd-2ed641c3f982", + "metadata": {}, + "outputs": [], + "source": [ + "!Python -m pip install pandas pyarrow" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b31a0d1d-f4fb-4ed1-bb36-3d83b091aec0", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "beach_boys = pd.read_csv(\"band_members.csv\").convert_dtypes(\n", + " dtype_backend=\"pyarrow\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "67077b0b-17ff-4357-9a54-26b0a82d94d2", + "metadata": {}, + "outputs": [], + "source": [ + "beach_boys.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb101f27-d344-4f1c-b093-9b08eab444ca", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "beach_boys" + ] + }, + { + "cell_type": "markdown", + "id": "7c45de02-5ba2-4c7a-9068-92707508842a", + "metadata": {}, + "source": [ + "**How to Reset an Index in a pandas DataFrame With `.reset_index()`**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "98823343-7f50-4bbf-a951-ea02b9329a70", + "metadata": {}, + "outputs": [], + "source": [ + "beach_boys.sort_values(by=\"first_name\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5fd5db9f-0180-404b-8dae-099a48203f53", + "metadata": {}, + "outputs": [], + "source": [ + "beach_boys.sort_values(by=\"first_name\").reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4239013e-1bf8-41c7-9228-00a669e19a57", + "metadata": {}, + "outputs": [], + "source": [ + "beach_boys = beach_boys.sort_values(by=\"first_name\").reset_index(drop=True)\n", + "\n", + "beach_boys" + ] + }, + { + "cell_type": "markdown", + "id": "bf85bcf0-12dc-4c7d-a25e-5109bb057a76", + "metadata": {}, + "source": [ + "**Reset an Index Directly With `.index`**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b19f621-f2da-4bbd-80c0-942cb9244d76", + "metadata": {}, + "outputs": [], + "source": [ + "beach_boys = pd.read_csv(\"band_members.csv\").convert_dtypes(\n", + " dtype_backend=\"pyarrow\"\n", + ")\n", + "\n", + "initials = [\n", + " \"BW\",\n", + " \"ML\",\n", + " \"AJ\",\n", + " \"BJ\",\n", + " \"CW\",\n", + " \"DW\",\n", + " \"DM\",\n", + " \"RF\",\n", + " \"BC\",\n", + "]\n", + "\n", + "beach_boys.index = initials\n", + "beach_boys" + ] + }, + { + "cell_type": "markdown", + "id": "25a26ee2-b3bb-4988-9d59-d390ca5e73c9", + "metadata": {}, + "source": [ + "**Select Rows Using `.loc[]` and `.iloc[]`**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fab39a99-4c2f-40bf-aadf-b57cd937a1a2", + "metadata": {}, + "outputs": [], + "source": [ + "beach_boys = pd.read_csv(\"band_members.csv\").convert_dtypes(\n", + " dtype_backend=\"pyarrow\"\n", + ")\n", + "\n", + "initials = [\n", + " \"BW\",\n", + " \"ML\",\n", + " \"AJ\",\n", + " \"BJ\",\n", + " \"CW\",\n", + " \"DW\",\n", + " \"DM\",\n", + " \"RF\",\n", + " \"BC\",\n", + "]\n", + "\n", + "beach_boys.index = initials" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3cdb97d-67b2-4655-bc01-5ab5506c83ed", + "metadata": {}, + "outputs": [], + "source": [ + "beach_boys.loc[[\"BW\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eddc3706-51f8-425e-8c91-a389bb59ad7e", + "metadata": {}, + "outputs": [], + "source": [ + "beach_boys.iloc[[1]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11f190ff-400d-42ee-b322-b4b23eb2dc15", + "metadata": {}, + "outputs": [], + "source": [ + "beach_boys.loc[\"BW\":\"BJ\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab7b2d99-5870-4b45-91d3-266609270c0e", + "metadata": {}, + "outputs": [], + "source": [ + "beach_boys.iloc[1:4]" + ] + }, + { + "cell_type": "markdown", + "id": "d41359c2-87de-4cdd-809c-9dcf90d52b3d", + "metadata": {}, + "source": [ + "**Reset an Index Directly With `.set_axis()`**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0fdee9b5-3dbd-4e74-9cb1-e426093b4f09", + "metadata": {}, + "outputs": [], + "source": [ + "beach_boys = pd.read_csv(\"band_members.csv\").convert_dtypes(\n", + " dtype_backend=\"pyarrow\"\n", + ")\n", + "\n", + "beach_boys.set_axis(range(len(beach_boys)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "04358191-5878-4260-ac34-54264c43902d", + "metadata": {}, + "outputs": [], + "source": [ + "%timeit -n 1000 beach_boys.reset_index(drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37980a62-1ff1-4d38-956a-d1ca978f7407", + "metadata": {}, + "outputs": [], + "source": [ + "%timeit -n 1000 beach_boys.index = pd.RangeIndex(len(beach_boys.index))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1cd8531-652a-4f60-83d3-d43dc70ba2ef", + "metadata": {}, + "outputs": [], + "source": [ + "%timeit -n 1000 beach_boys.set_axis(range(len(beach_boys)))" + ] + }, + { + "cell_type": "markdown", + "id": "5619822e-7fb0-4c7b-ab5a-129f3243f8c5", + "metadata": {}, + "source": [ + "**Restore a Sequential Index**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e01df44-2cf8-42b2-a12f-f4d581312c39", + "metadata": {}, + "outputs": [], + "source": [ + "beach_boys = pd.read_csv(\"band_members.csv\").convert_dtypes(\n", + " dtype_backend=\"pyarrow\"\n", + ")\n", + "\n", + "beach_boys" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cb0c30d-ca9c-4519-9154-408f65824820", + "metadata": {}, + "outputs": [], + "source": [ + "beach_boys.drop(labels=[3, 5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b85d26af-e073-44de-aaaf-c1e630b35229", + "metadata": {}, + "outputs": [], + "source": [ + "beach_boys.drop(labels=[3, 5]).reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ddde0dbc-f91f-4dc8-b180-54014b14b8de", + "metadata": {}, + "outputs": [], + "source": [ + "beach_boys.drop(labels=[3, 5]).reset_index(drop=True)" + ] + }, + { + "cell_type": "markdown", + "id": "1a506c68-e74f-4f35-a837-322946d7a882", + "metadata": {}, + "source": [ + "**Remove Duplicate Index Values**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d2736456-2e6b-4f1e-89b2-cd7400a0bfee", + "metadata": {}, + "outputs": [], + "source": [ + "beach_boys = pd.read_csv(\"band_members.csv\").convert_dtypes(\n", + " dtype_backend=\"pyarrow\"\n", + ")\n", + "\n", + "guitar_players = beach_boys.query(\"instrument == 'Guitar'\").reset_index(\n", + " drop=True\n", + ")\n", + "\n", + "guitar_players" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d18846e7-05ab-4713-b869-a6c95f59bcaa", + "metadata": {}, + "outputs": [], + "source": [ + "others = beach_boys.query(\"instrument != 'Guitar'\").reset_index(drop=True)\n", + "\n", + "others" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1701609-8a70-4670-b466-8db357ca3182", + "metadata": {}, + "outputs": [], + "source": [ + "all_beach_boys = pd.concat([guitar_players, others])\n", + "all_beach_boys" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8deb773b-d40c-4db4-aa3a-dc09991415ae", + "metadata": {}, + "outputs": [], + "source": [ + "all_beach_boys.loc[3]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c2ea368-cf03-462c-865d-3f1bffebd3c8", + "metadata": {}, + "outputs": [], + "source": [ + "all_beach_boys.iloc[[3]]" + ] + }, + { + "cell_type": "markdown", + "id": "a822bb24-af5b-47d1-b51e-0d93a419e95f", + "metadata": {}, + "source": [ + "# This code will fail due to the duplicate index.\n", + "\n", + "all_beach_boys.loc[3:4]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b5a7d4ce-74eb-440d-b234-3adaf960dfc4", + "metadata": {}, + "outputs": [], + "source": [ + "all_beach_boys.iloc[3:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a379a7f3-6b29-43d8-8893-445e335068ea", + "metadata": {}, + "outputs": [], + "source": [ + "all_beach_boys.sort_index().loc[3:4]" + ] + }, + { + "cell_type": "markdown", + "id": "cf30ce38-8b34-426b-801d-4329a19b36e8", + "metadata": {}, + "source": [ + "# This code will fail due to the duplicate index.\n", + "\n", + "all_beach_boys.filter(items=[1, 3], axis=\"index\")" + ] + }, + { + "cell_type": "markdown", + "id": "eef40ce9-5814-4c53-bb7e-6c216eeed6d8", + "metadata": {}, + "source": [ + "**Use an Existing Column as an Index**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d89d0233-6caa-4ee4-bae5-80cc85b8238b", + "metadata": {}, + "outputs": [], + "source": [ + "beach_boys = pd.read_csv(\"band_members.csv\").convert_dtypes(\n", + " dtype_backend=\"pyarrow\"\n", + ")\n", + "\n", + "beach_boys.set_index(\"first_name\").loc[[\"Brian\", \"Carl\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11abd710-8384-40a9-b965-e0a4ac863765", + "metadata": {}, + "outputs": [], + "source": [ + "beach_boys.index = [f\"Employee_{x}\" for x in range(len(beach_boys))]\n", + "beach_boys" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9fbba98c-7719-4c85-8700-52fdcf7620e2", + "metadata": {}, + "outputs": [], + "source": [ + "beach_boys.loc[[\"Employee_3\"]]" + ] + }, + { + "cell_type": "markdown", + "id": "590ac61d-0c8d-48bd-a5bd-4ebd458f6899", + "metadata": {}, + "source": [ + "**Align Indexes of Several DataFrames**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "de1d3982-e319-4832-8933-4ddc68416679", + "metadata": {}, + "outputs": [], + "source": [ + "week1_sales = pd.read_csv(\"week1_record_sales.csv\").set_index(\"index\")\n", + "\n", + "week1_sales" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ddd30d0e-722e-4af5-bfa5-776b52ca3924", + "metadata": {}, + "outputs": [], + "source": [ + "week2_sales = pd.read_csv(\"week2_record_sales.csv\").set_index(\"index\")\n", + "\n", + "week2_sales" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "76cb534c-b6c6-4293-8e85-5d5615f8eff2", + "metadata": {}, + "outputs": [], + "source": [ + "week1_sales.loc[:, \"sales\"] + week2_sales.loc[:, \"sales\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51195c17-b2cd-4a4c-a58b-c91341db7e32", + "metadata": {}, + "outputs": [], + "source": [ + "week1_sales.merge(week2_sales, left_index=True, right_index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "966792a2-b608-4708-b43c-fa6c81cc2853", + "metadata": {}, + "outputs": [], + "source": [ + "week2_sales = week2_sales.reset_index(drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "74eebf7b-e7d1-40fa-a916-5a66ede9ba43", + "metadata": {}, + "outputs": [], + "source": [ + "week1_sales.loc[:, \"sales\"] + week2_sales.loc[:, \"sales\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ce9e3f8-e39b-498f-b807-a4ac36808aa7", + "metadata": {}, + "outputs": [], + "source": [ + "week1_sales.merge(week2_sales, left_index=True, right_index=True)" + ] + }, + { + "cell_type": "markdown", + "id": "6b0afa7c-7c21-4786-96dc-d073d70632eb", + "metadata": {}, + "source": [ + "**Resetting Multi-Indexes**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "84875451-a540-4b99-9e64-7a89979259ed", + "metadata": {}, + "outputs": [], + "source": [ + "cereals = pd.read_csv(\"cereals.csv\").convert_dtypes(dtype_backend=\"pyarrow\")\n", + "cereals.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf46a23d-a713-442a-8d1f-ff0d7ade6ad9", + "metadata": {}, + "outputs": [], + "source": [ + "cereals.pivot_table(\n", + " values=\"fiber\",\n", + " index=[\"manufacturer\", \"type\"],\n", + " aggfunc=\"mean\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b800c41b-a419-4ca9-9f44-eb2e142994b5", + "metadata": {}, + "outputs": [], + "source": [ + "cereals.pivot_table(\n", + " values=\"fiber\",\n", + " index=[\"manufacturer\", \"type\"],\n", + " aggfunc=\"mean\",\n", + ").index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "809b06cb-20cd-43c7-acfd-54dcff8a72b7", + "metadata": {}, + "outputs": [], + "source": [ + "cereals.pivot_table(\n", + " values=\"fiber\", index=[\"manufacturer\", \"type\"], aggfunc=\"mean\"\n", + ").reset_index(level=1, drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "54fe7d99-1cd6-4a14-8b42-af794cd52766", + "metadata": {}, + "outputs": [], + "source": [ + "cereals.pivot_table(\n", + " values=\"fiber\",\n", + " index=[\"manufacturer\", \"type\"],\n", + " aggfunc=\"mean\",\n", + ").reset_index()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pandas-reset-index/README.md b/pandas-reset-index/README.md new file mode 100644 index 0000000000..e568286d6e --- /dev/null +++ b/pandas-reset-index/README.md @@ -0,0 +1,7 @@ +# How to Reset a pandas DataFrame Index + +These are the download files you can use with the Real Python tutorial [How to Reset a pandas DataFrame Index](https://realpython.com/how-to-pandas-reset-index-dataframe/): + +- `Main_Code.ipynb` contains the main code from the tutorial. +- `Solutions.ipynb` contains sample solutions to the various exercises. +- `band_members`, `cereals.csv`, `week1_record_sales.csv`, and `week2_record_sales.csv`, each contain source data used in the tutorial. diff --git a/pandas-reset-index/Solutions.ipynb b/pandas-reset-index/Solutions.ipynb new file mode 100644 index 0000000000..f0663606ab --- /dev/null +++ b/pandas-reset-index/Solutions.ipynb @@ -0,0 +1,519 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e6c42d98-1acf-4d7c-bd10-5cf0aa9d502f", + "metadata": {}, + "source": [ + "**Pre-requisites**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "70f3a97f-053b-4b99-be86-b2543f817130", + "metadata": {}, + "outputs": [], + "source": [ + "!python -m pip install pandas pyarrow" + ] + }, + { + "cell_type": "markdown", + "id": "76b58c64-3bff-4aae-be3f-7df32be2bca3", + "metadata": {}, + "source": [ + "**Possible Solution - Retaining An Old Index**" + ] + }, + { + "cell_type": "markdown", + "id": "7aabcf40-8016-4eb0-990b-7880a818cc2d", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "**Note:** This code parses the dates as opposed to leaving them as strings." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "834761d7-aaee-4545-b2a8-94efae4c82ab", + "metadata": {}, + "outputs": [], + "source": [ + "# This version will parse the \"date_of_birth\" column.\n", + "# These dates will be formatted differently to the tutorial.\n", + "\n", + "import pandas as pd\n", + "\n", + "beach_boys = pd.read_csv(\n", + " \"band_members.csv\",\n", + " parse_dates=[\"date_of_birth\"],\n", + " dayfirst=True,\n", + ").convert_dtypes(dtype_backend=\"pyarrow\")\n", + "\n", + "beach_boys = beach_boys.assign(\n", + " date_of_birth=beach_boys[\"date_of_birth\"].dt.date\n", + ")\n", + "beach_boys" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00f37c3d-33a5-4b2a-98ed-9431ef125f89", + "metadata": {}, + "outputs": [], + "source": [ + "# Tutorial version of the code.\n", + "\n", + "import pandas as pd\n", + "\n", + "beach_boys = pd.read_csv(\"band_members.csv\").convert_dtypes(\n", + " dtype_backend=\"pyarrow\"\n", + ")\n", + "\n", + "beach_boys.index = range(1, 10)\n", + "beach_boys = beach_boys.reset_index(names=\"old_index\")\n", + "beach_boys" + ] + }, + { + "cell_type": "markdown", + "id": "d6bcea77-6b70-4086-981b-1333a1ddd3b1", + "metadata": {}, + "source": [ + "**Possible Solution - Using `.index` and row selection**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5e1c40ea-0e4b-4b1d-99af-1d68a82ef32b", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "beach_boys = pd.read_csv(\"band_members.csv\").convert_dtypes(\n", + " dtype_backend=\"pyarrow\"\n", + ")\n", + "\n", + "beach_boys.index = [x for x in range(1, 20) if x % 2 == 0]\n", + "\n", + "beach_boys" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3909e7e6-224c-4c28-a915-892a0cb014a9", + "metadata": {}, + "outputs": [], + "source": [ + "beach_boys.loc[16:18]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b961441a-cc26-4d91-8edc-232f8c4e81bf", + "metadata": {}, + "outputs": [], + "source": [ + "beach_boys.iloc[-2:]" + ] + }, + { + "cell_type": "markdown", + "id": "7c2c333f-aabb-429d-a16c-56b380a3a211", + "metadata": {}, + "source": [ + "**Possible Solution - Using `.set_axis()`**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12555390-e34b-4b07-ba0d-661452dd52be", + "metadata": {}, + "outputs": [], + "source": [ + "beach_boys = beach_boys.set_axis(labels=[x**2 for x in range(0, 9)])\n", + "beach_boys" + ] + }, + { + "cell_type": "markdown", + "id": "8d8c73db-4c2e-4f49-9f0e-bc038cf6f716", + "metadata": {}, + "source": [ + "**Possible Solutions - Index Restoration**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "54a4d206-9f0d-41b2-8604-7be79652c32d", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "beach_boys = pd.read_csv(\"band_members.csv\").convert_dtypes(\n", + " dtype_backend=\"pyarrow\"\n", + ")\n", + "\n", + "beach_boys = beach_boys.drop(labels=[3, 5])\n", + "beach_boys" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "165e61e6-160f-4a94-9c07-ef4449627c47", + "metadata": {}, + "outputs": [], + "source": [ + "# (i) Using .reset_index()\n", + "beach_boys = beach_boys.reset_index(drop=True)\n", + "beach_boys" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "287eb5f2-3f58-43fc-beef-42ff89c390df", + "metadata": {}, + "outputs": [], + "source": [ + "# (ii) Using .index\n", + "beach_boys.index = [x for x in range(len(beach_boys))]\n", + "beach_boys" + ] + }, + { + "cell_type": "markdown", + "id": "351344f2-2db6-4054-9751-63436bc6ee67", + "metadata": {}, + "source": [ + "**Possible Solution - Dealing With Duplicates**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0195d6d3-01ed-4f3e-b374-646be970d1ff", + "metadata": {}, + "outputs": [], + "source": [ + "beach_boys = pd.read_csv(\"band_members.csv\").convert_dtypes(\n", + " dtype_backend=\"pyarrow\"\n", + ")\n", + "\n", + "guitar_players = beach_boys.query(\"instrument == 'Guitar'\").reset_index(\n", + " drop=True\n", + ")\n", + "\n", + "others = beach_boys.query(\"instrument != 'Guitar'\").reset_index(drop=True)\n", + "\n", + "all_beach_boys = pd.concat([guitar_players, others]).reset_index(drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "443301b0-b2b2-4e98-ac30-87cd5247b497", + "metadata": {}, + "outputs": [], + "source": [ + "all_beach_boys.loc[[3]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6018960f-963a-4fec-9e92-6e96197e5b7d", + "metadata": {}, + "outputs": [], + "source": [ + "all_beach_boys.iloc[[3]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0f664b0-5903-490e-9714-a4c4dda2681a", + "metadata": {}, + "outputs": [], + "source": [ + "all_beach_boys.filter(items=[1, 3], axis=\"index\")" + ] + }, + { + "cell_type": "markdown", + "id": "63c18951-baa5-4244-b053-e104c7ddbeaa", + "metadata": {}, + "source": [ + "**Possible Solution - Customising Existing Columns**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "969da9c7-30ca-4ae6-8d7a-925f19369984", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "beach_boys = pd.read_csv(\n", + " \"band_members.csv\",\n", + ").convert_dtypes(dtype_backend=\"pyarrow\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f62038bd-fa3e-4c51-bf62-09d32f30c719", + "metadata": {}, + "outputs": [], + "source": [ + "def calculate_user_ID(row):\n", + " return f\"{row[\"last_name\"]}{row[\"first_name\"][0]}\"\n", + "\n", + "\n", + "beach_boys.index = beach_boys.apply(calculate_user_ID, axis=1)\n", + "\n", + "beach_boys" + ] + }, + { + "cell_type": "markdown", + "id": "b480ab24-5dfe-4892-aa4d-ea07b74e9bd5", + "metadata": {}, + "source": [ + "**Possible Solution - Index Alignment (1)**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9d37b244-13a7-42f3-9ad1-462ff59f23e7", + "metadata": {}, + "outputs": [], + "source": [ + "week1_sales = pd.read_csv(\"week1_record_sales.csv\").set_index(\"index\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecd91233-8bdf-45fa-87a9-700f6056b750", + "metadata": {}, + "outputs": [], + "source": [ + "week2_sales = pd.read_csv(\"week2_record_sales.csv\").set_index(\"index\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db6bec66-fea7-41b3-a874-30c30b15d54c", + "metadata": {}, + "outputs": [], + "source": [ + "week1_sales" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b76f6afe-0c08-4653-a40f-d82194cc2dc5", + "metadata": {}, + "outputs": [], + "source": [ + "week2_sales" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8fba8c2c-d90d-4158-9712-cfe0fde11140", + "metadata": {}, + "outputs": [], + "source": [ + "week1_sales.index = week2_sales.index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16341960-0d3b-4aea-b650-a20c58673c2e", + "metadata": {}, + "outputs": [], + "source": [ + "week1_sales" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e378f632-93c7-45fc-bb6b-3fa0ef54cf4d", + "metadata": {}, + "outputs": [], + "source": [ + "week2_sales" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1c9a420-3e1c-4eb9-ae87-eaba0079bc24", + "metadata": {}, + "outputs": [], + "source": [ + "week1_sales.loc[:, \"sales\"] + week2_sales.loc[:, \"sales\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d2dfdfbe-5d03-4a60-a86f-5cdb24ac22e1", + "metadata": {}, + "outputs": [], + "source": [ + "week1_sales.merge(week2_sales, left_index=True, right_index=True)" + ] + }, + { + "cell_type": "markdown", + "id": "eda12215-7c8e-44e2-a110-67934f06a2d4", + "metadata": {}, + "source": [ + "**Possible Solution - Index Alignment (2)**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2c5b10d4-02e1-493a-8a4c-e87989998f3c", + "metadata": {}, + "outputs": [], + "source": [ + "week1_sales = week1_sales.set_index(\"day\")\n", + "week2_sales = week2_sales.set_index(\"day\")\n", + "\n", + "week1_sales" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4ba444f6-5581-4ad5-8bca-6e647563c4b7", + "metadata": {}, + "outputs": [], + "source": [ + "week2_sales" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c792ee8-eb75-4be4-afff-f4f844c55129", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "week1_sales.loc[:, \"sales\"] + week2_sales.loc[:, \"sales\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9092f593-8067-4b8b-8582-6702a09699d2", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "week1_sales.merge(week2_sales, left_index=True, right_index=True)" + ] + }, + { + "cell_type": "markdown", + "id": "b4b6511d-ef2f-4e64-9359-325f6228e3a6", + "metadata": {}, + "source": [ + "**Possible Solution - Dealing With A `MultiIndex`**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45c281f1-9c97-447d-894e-bd33196dbad7", + "metadata": {}, + "outputs": [], + "source": [ + "cereals = pd.read_csv(\"cereals.csv\").convert_dtypes(dtype_backend=\"pyarrow\")\n", + "\n", + "cereals = cereals.pivot_table(\n", + " values=\"fiber\",\n", + " index=[\"type\", \"manufacturer\"],\n", + " aggfunc=\"mean\",\n", + ").reset_index(level=1)\n", + "\n", + "cereals" + ] + }, + { + "cell_type": "markdown", + "id": "52bd7cce-e4c5-4b11-b0b7-4e7b7f1d5cdc", + "metadata": {}, + "source": [ + "**Possible Solution - Creating A Meaningful Index**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0af37ac-c7d7-4c92-a22a-da74f645d622", + "metadata": {}, + "outputs": [], + "source": [ + "cereals = pd.read_csv(\"cereals.csv\").convert_dtypes(dtype_backend=\"pyarrow\")\n", + "\n", + "cereals = cereals.pivot_table(\n", + " values=\"fiber\", index=[\"manufacturer\", \"type\"], aggfunc=\"mean\"\n", + ")\n", + "\n", + "cereals.index = cereals.index.to_flat_index()\n", + "cereals" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pandas-reset-index/band_members.csv b/pandas-reset-index/band_members.csv new file mode 100644 index 0000000000..e34876f5de --- /dev/null +++ b/pandas-reset-index/band_members.csv @@ -0,0 +1,10 @@ +first_name,last_name,instrument,date_of_birth +Brian,Wilson,Bass,20-Jun-1942 +Mike,Love,Saxophone,15-Mar-1941 +Al,Jardine,Guitar,03-Sep-1942 +Bruce,Johnston,Bass,27-Jun-1942 +Carl,Wilson,Guitar,21-Dec-1946 +Dennis,Wilson,Drums,04-Dec-1944 +David,Marks,Guitar,22-Aug-1948 +Ricky,Fataar,Drums,05-Sep-1952 +Blondie,Chaplin,Guitar,07-Jul-1951 diff --git a/pandas-reset-index/cereals.csv b/pandas-reset-index/cereals.csv new file mode 100644 index 0000000000..e30a1e1345 --- /dev/null +++ b/pandas-reset-index/cereals.csv @@ -0,0 +1,78 @@ +name,manufacturer,type,fiber +100% Bran,Nabisco,Cold,10 +100% Natural Bran,Quaker Oats,Cold,2 +All-Bran,Kelloggs,Cold,9 +All-Bran with Extra Fiber,Kelloggs,Cold,14 +Almond Delight,Ralston Purina,Cold,1 +Apple Cinnamon Cheerios,General Mills,Cold,1.5 +Apple Jacks,Kelloggs,Cold,1 +Basic 4,General Mills,Cold,2 +Bran Chex,Ralston Purina,Cold,4 +Bran Flakes,Post,Cold,5 +Cap'n'Crunch,Quaker Oats,Cold,0 +Cheerios,General Mills,Cold,2 +Cinnamon Toast Crunch,General Mills,Cold,0 +Clusters,General Mills,Cold,2 +Cocoa Puffs,General Mills,Cold,0 +Corn Chex,Ralston Purina,Cold,0 +Corn Flakes,Kelloggs,Cold,1 +Corn Pops,Kelloggs,Cold,1 +Count Chocula,General Mills,Cold,0 +Cracklin' Oat Bran,Kelloggs,Cold,4 +Cream of Wheat (Quick),Nabisco,Hot,1 +Crispix,Kelloggs,Cold,1 +Crispy Wheat & Raisins,General Mills,Cold,2 +Double Chex,Ralston Purina,Cold,1 +Froot Loops,Kelloggs,Cold,1 +Frosted Flakes,Kelloggs,Cold,1 +Frosted Mini-Wheats,Kelloggs,Cold,3 +Fruit & Fibre Dates; Walnuts; and Oats,Post,Cold,5 +Fruitful Bran,Kelloggs,Cold,5 +Fruity Pebbles,Post,Cold,0 +Golden Crisp,Post,Cold,0 +Golden Grahams,General Mills,Cold,0 +Grape Nuts Flakes,Post,Cold,3 +Grape-Nuts,Post,Cold,3 +Great Grains Pecan,Post,Cold,3 +Honey Graham Ohs,Quaker Oats,Cold,1 +Honey Nut Cheerios,General Mills,Cold,1.5 +Honey-comb,Post,Cold,0 +Just Right Crunchy Nuggets,Kelloggs,Cold,1 +Just Right Fruit & Nut,Kelloggs,Cold,2 +Kix,General Mills,Cold,0 +Life,Quaker Oats,Cold,2 +Lucky Charms,General Mills,Cold,0 +Maypo,American Home Food Products,Hot,0 +Muesli Raisins; Dates; & Almonds,Ralston Purina,Cold,3 +Muesli Raisins; Peaches; & Pecans,Ralston Purina,Cold,3 +Mueslix Crispy Blend,Kelloggs,Cold,3 +Multi-Grain Cheerios,General Mills,Cold,2 +Nut&Honey Crunch,Kelloggs,Cold,0 +Nutri-Grain Almond-Raisin,Kelloggs,Cold,3 +Nutri-grain Wheat,Kelloggs,Cold,3 +Oatmeal Raisin Crisp,General Mills,Cold,1.5 +Post Nat. Raisin Bran,Post,Cold,6 +Product 19,Kelloggs,Cold,1 +Puffed Rice,Quaker Oats,Cold,0 +Puffed Wheat,Quaker Oats,Cold,1 +Quaker Oat Squares,Quaker Oats,Cold,2 +Quaker Oatmeal,Quaker Oats,Hot,2.7 +Raisin Bran,Kelloggs,Cold,5 +Raisin Nut Bran,General Mills,Cold,2.5 +Raisin Squares,Kelloggs,Cold,2 +Rice Chex,Ralston Purina,Cold,0 +Rice Krispies,Kelloggs,Cold,0 +Shredded Wheat,Nabisco,Cold,3 +Shredded Wheat 'n'Bran,Nabisco,Cold,4 +Shredded Wheat spoon size,Nabisco,Cold,3 +Smacks,Kelloggs,Cold,1 +Special K,Kelloggs,Cold,1 +Strawberry Fruit Wheats,Nabisco,Cold,3 +Total Corn Flakes,General Mills,Cold,0 +Total Raisin Bran,General Mills,Cold,4 +Total Whole Grain,General Mills,Cold,3 +Triples,General Mills,Cold,0 +Trix,General Mills,Cold,0 +Wheat Chex,Ralston Purina,Cold,3 +Wheaties,General Mills,Cold,3 +Wheaties Honey Gold,General Mills,Cold,1 diff --git a/pandas-reset-index/week1_record_sales.csv b/pandas-reset-index/week1_record_sales.csv new file mode 100644 index 0000000000..b99bbdbcc5 --- /dev/null +++ b/pandas-reset-index/week1_record_sales.csv @@ -0,0 +1,6 @@ +index,day,sales +0,Mon,100 +1,Tue,150 +2,Wed,200 +3,Thu,250 +4,Fri,300 diff --git a/pandas-reset-index/week2_record_sales.csv b/pandas-reset-index/week2_record_sales.csv new file mode 100644 index 0000000000..e83772ee8e --- /dev/null +++ b/pandas-reset-index/week2_record_sales.csv @@ -0,0 +1,6 @@ +index,day,sales +1,Mon,100 +2,Tue,150 +3,Wed,200 +4,Thu,250 +5,Fri,300