Skip to content
This repository has been archived by the owner on Nov 23, 2024. It is now read-only.

Commit

Permalink
feat: house sales example (#23)
Browse files Browse the repository at this point in the history
Closes #14.

### Summary of Changes

* Add a new dataset containing house sales
* Add a new function `load_house_sales` to load it
* Add a new function `describe_house_sales_columns` to get descriptions
for the columns of the dataset

---------

Co-authored-by: lars-reimann <[email protected]>
  • Loading branch information
lars-reimann and lars-reimann authored Mar 15, 2023
1 parent 54c568f commit be847cd
Show file tree
Hide file tree
Showing 13 changed files with 21,969 additions and 3 deletions.
44 changes: 44 additions & 0 deletions docs/examples/display_column_description.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import pandas as pd
from IPython.core.display_functions import DisplayHandle
from IPython.display import display
from safeds.data.tabular import Table


def display_column_descriptions(column_descriptions: Table) -> DisplayHandle:
"""
Displays a Table containing the column descriptions.
Parameters
----------
column_descriptions : Table
The column descriptions.
Returns
-------
DisplayHandle
The display handle.
"""

# Remember the current value of the max_colwidth option
max_colwidth = pd.get_option("max_colwidth")

# Don't cut off the column descriptions
pd.set_option("max_colwidth", None)

# Create a DisplayHandle that displays the column descriptions nicely
styler = (
column_descriptions._data.style.relabel_index(["Name", "Description"], axis="columns")
.hide(axis="index")
.set_properties(
**{
"text-align": "left",
"white-space": "pre-wrap",
}
)
)
result = display(styler)

# Restore the max_colwidth option
pd.set_option("max_colwidth", max_colwidth)

return result
170 changes: 170 additions & 0 deletions docs/examples/house_sales.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
{
"cells": [
{
"cell_type": "markdown",
"source": [
"# House Sales\n",
"\n",
"The dataset contains house sale prices for King County, USA between May 2014 and May 2015. It is well suited to practice regression techniques."
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"## Column descriptions"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"from safeds_examples.tabular import describe_house_sales_columns\n",
"from display_column_description import display_column_descriptions\n",
"\n",
"house_sales_description = describe_house_sales_columns()\n",
"display_column_descriptions(house_sales_description)"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"## Sample"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"from safeds_examples.tabular import load_house_sales\n",
"\n",
"house_sales = load_house_sales()\n",
"house_sales.slice(end=10)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"is_executing": true
}
}
},
{
"cell_type": "markdown",
"source": [
"## Schema"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"house_sales.schema"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"## Statistics"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"house_sales.summary()"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"## Correlation heatmap"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"from safeds.plotting import correlation_heatmap\n",
"\n",
"house_sales_correlation = house_sales.drop_columns([\n",
" \"id\",\n",
" \"year\",\n",
" \"month\",\n",
" \"day\",\n",
" \"zipcode\",\n",
" \"latitude\",\n",
" \"longitude\"\n",
"])\n",
"correlation_heatmap(house_sales_correlation)"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"## Attribution\n",
"\n",
"This dataset is a modified version of the [\"House Sales in King County, USA\" dataset](https://www.kaggle.com/datasets/harlfoxem/housesalesprediction) by Kaggle user [`harlfoxem`](https://www.kaggle.com/harlfoxem). The original dataset is licensed under `CC0: Public Domain`.\n",
"\n",
"Column descriptions are based on [this Kaggle discussion](https://www.kaggle.com/datasets/harlfoxem/housesalesprediction/discussion/207885).\n"
],
"metadata": {
"collapsed": false
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
2 changes: 2 additions & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ nav:
- README.md
- Changelog: CHANGELOG.md
- Examples:
- House Sales: examples/house_sales.ipynb
- Titanic: examples/titanic.ipynb
- API Reference: reference/
- Development:
Expand Down Expand Up @@ -64,6 +65,7 @@ plugins:
include: ["*.ipynb"]
execute: true
allow_errors: false
no_input: true

watch:
- src
Expand Down
1 change: 1 addition & 0 deletions src/safeds_examples/tabular/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from ._house_sales import describe_house_sales_columns, load_house_sales
from ._titanic import load_titanic
1 change: 1 addition & 0 deletions src/safeds_examples/tabular/_house_sales/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from ._house_sales import describe_house_sales_columns, load_house_sales
69 changes: 69 additions & 0 deletions src/safeds_examples/tabular/_house_sales/_house_sales.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import os

from safeds.data.tabular import Table

_path = os.path.join(os.path.dirname(__file__), "data", "house_sales.csv")


def load_house_sales() -> Table:
"""
Loads the "House Sales" dataset.
Returns
-------
Table
The "House Sales" dataset.
"""

return Table.from_csv(_path)


def describe_house_sales_columns() -> Table:
"""
Returns a `Table` with two columns `"Name"` and `"Description"`, containing the name of a column in the "House
Sales" dataset and its description respectively.
Returns
-------
Table
A `Table` with names and descriptions for all columns of the "House Sales" dataset.
"""

return Table(
[
{"Name": "id", "Description": "A unique identifier"},
{"Name": "year", "Description": "Year of sale"},
{"Name": "month", "Description": "Month of sale"},
{"Name": "day", "Description": "Day of sale"},
{"Name": "zipcode", "Description": "Zipcode"},
{"Name": "latitude", "Description": "Latitude"},
{"Name": "longitude", "Description": "Longitude"},
{"Name": "sqft_lot", "Description": "Lot area in square feet"},
{"Name": "sqft_living", "Description": "Interior living space in square feet"},
{"Name": "sqft_above", "Description": "Interior living space above ground in square feet"},
{"Name": "sqft_basement", "Description": "Interior living space below ground in square feet"},
{"Name": "floors", "Description": "Number of floors"},
{"Name": "bedrooms", "Description": "Number of bedrooms"},
{
"Name": "bathrooms",
"Description": "Number of bathrooms.\n\n"
"Fractional values indicate that components (toilet/sink/shower/bathtub) are missing.",
},
{"Name": "waterfront", "Description": "Whether the building overlooks a waterfront (0 = no, 1 = yes)"},
{"Name": "view", "Description": "Rating of the view (1 to 5, higher is better)"},
{"Name": "condition", "Description": "Rating of the condition of the house (1 to 5, higher is better)"},
{"Name": "grade", "Description": "Rating of building construction and design (1 to 13, higher is better)"},
{"Name": "year_built", "Description": "Year the house was built"},
{
"Name": "year_renovated",
"Description": "Year the house was last renovated.\n\n"
"A value of 0 indicates that it was never renovated.",
},
{"Name": "sqft_lot_15nn", "Description": "Lot area of the 15 nearest neighbors in square feet"},
{
"Name": "sqft_living_15nn",
"Description": "Interior living space of the 15 nearest neighbors in square feet",
},
{"Name": "price", "Description": "Price the house sold for in USD"},
]
)
Loading

0 comments on commit be847cd

Please sign in to comment.