This repository has been archived by the owner on Nov 23, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Closes #14. ### Summary of Changes * Add a new dataset containing house sales * Add a new function `load_house_sales` to load it * Add a new function `describe_house_sales_columns` to get descriptions for the columns of the dataset --------- Co-authored-by: lars-reimann <[email protected]>
- Loading branch information
1 parent
54c568f
commit be847cd
Showing
13 changed files
with
21,969 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import pandas as pd | ||
from IPython.core.display_functions import DisplayHandle | ||
from IPython.display import display | ||
from safeds.data.tabular import Table | ||
|
||
|
||
def display_column_descriptions(column_descriptions: Table) -> DisplayHandle: | ||
""" | ||
Displays a Table containing the column descriptions. | ||
Parameters | ||
---------- | ||
column_descriptions : Table | ||
The column descriptions. | ||
Returns | ||
------- | ||
DisplayHandle | ||
The display handle. | ||
""" | ||
|
||
# Remember the current value of the max_colwidth option | ||
max_colwidth = pd.get_option("max_colwidth") | ||
|
||
# Don't cut off the column descriptions | ||
pd.set_option("max_colwidth", None) | ||
|
||
# Create a DisplayHandle that displays the column descriptions nicely | ||
styler = ( | ||
column_descriptions._data.style.relabel_index(["Name", "Description"], axis="columns") | ||
.hide(axis="index") | ||
.set_properties( | ||
**{ | ||
"text-align": "left", | ||
"white-space": "pre-wrap", | ||
} | ||
) | ||
) | ||
result = display(styler) | ||
|
||
# Restore the max_colwidth option | ||
pd.set_option("max_colwidth", max_colwidth) | ||
|
||
return result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"# House Sales\n", | ||
"\n", | ||
"The dataset contains house sale prices for King County, USA between May 2014 and May 2015. It is well suited to practice regression techniques." | ||
], | ||
"metadata": { | ||
"collapsed": false | ||
} | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"## Column descriptions" | ||
], | ||
"metadata": { | ||
"collapsed": false | ||
} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"outputs": [], | ||
"source": [ | ||
"from safeds_examples.tabular import describe_house_sales_columns\n", | ||
"from display_column_description import display_column_descriptions\n", | ||
"\n", | ||
"house_sales_description = describe_house_sales_columns()\n", | ||
"display_column_descriptions(house_sales_description)" | ||
], | ||
"metadata": { | ||
"collapsed": false | ||
} | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"## Sample" | ||
], | ||
"metadata": { | ||
"collapsed": false | ||
} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"outputs": [], | ||
"source": [ | ||
"from safeds_examples.tabular import load_house_sales\n", | ||
"\n", | ||
"house_sales = load_house_sales()\n", | ||
"house_sales.slice(end=10)" | ||
], | ||
"metadata": { | ||
"collapsed": false, | ||
"pycharm": { | ||
"is_executing": true | ||
} | ||
} | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"## Schema" | ||
], | ||
"metadata": { | ||
"collapsed": false | ||
} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"outputs": [], | ||
"source": [ | ||
"house_sales.schema" | ||
], | ||
"metadata": { | ||
"collapsed": false | ||
} | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"## Statistics" | ||
], | ||
"metadata": { | ||
"collapsed": false | ||
} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"outputs": [], | ||
"source": [ | ||
"house_sales.summary()" | ||
], | ||
"metadata": { | ||
"collapsed": false | ||
} | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"## Correlation heatmap" | ||
], | ||
"metadata": { | ||
"collapsed": false | ||
} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"outputs": [], | ||
"source": [ | ||
"from safeds.plotting import correlation_heatmap\n", | ||
"\n", | ||
"house_sales_correlation = house_sales.drop_columns([\n", | ||
" \"id\",\n", | ||
" \"year\",\n", | ||
" \"month\",\n", | ||
" \"day\",\n", | ||
" \"zipcode\",\n", | ||
" \"latitude\",\n", | ||
" \"longitude\"\n", | ||
"])\n", | ||
"correlation_heatmap(house_sales_correlation)" | ||
], | ||
"metadata": { | ||
"collapsed": false | ||
} | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"## Attribution\n", | ||
"\n", | ||
"This dataset is a modified version of the [\"House Sales in King County, USA\" dataset](https://www.kaggle.com/datasets/harlfoxem/housesalesprediction) by Kaggle user [`harlfoxem`](https://www.kaggle.com/harlfoxem). The original dataset is licensed under `CC0: Public Domain`.\n", | ||
"\n", | ||
"Column descriptions are based on [this Kaggle discussion](https://www.kaggle.com/datasets/harlfoxem/housesalesprediction/discussion/207885).\n" | ||
], | ||
"metadata": { | ||
"collapsed": false | ||
} | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 2 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython2", | ||
"version": "2.7.6" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 0 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
from ._house_sales import describe_house_sales_columns, load_house_sales | ||
from ._titanic import load_titanic |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from ._house_sales import describe_house_sales_columns, load_house_sales |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
import os | ||
|
||
from safeds.data.tabular import Table | ||
|
||
_path = os.path.join(os.path.dirname(__file__), "data", "house_sales.csv") | ||
|
||
|
||
def load_house_sales() -> Table: | ||
""" | ||
Loads the "House Sales" dataset. | ||
Returns | ||
------- | ||
Table | ||
The "House Sales" dataset. | ||
""" | ||
|
||
return Table.from_csv(_path) | ||
|
||
|
||
def describe_house_sales_columns() -> Table: | ||
""" | ||
Returns a `Table` with two columns `"Name"` and `"Description"`, containing the name of a column in the "House | ||
Sales" dataset and its description respectively. | ||
Returns | ||
------- | ||
Table | ||
A `Table` with names and descriptions for all columns of the "House Sales" dataset. | ||
""" | ||
|
||
return Table( | ||
[ | ||
{"Name": "id", "Description": "A unique identifier"}, | ||
{"Name": "year", "Description": "Year of sale"}, | ||
{"Name": "month", "Description": "Month of sale"}, | ||
{"Name": "day", "Description": "Day of sale"}, | ||
{"Name": "zipcode", "Description": "Zipcode"}, | ||
{"Name": "latitude", "Description": "Latitude"}, | ||
{"Name": "longitude", "Description": "Longitude"}, | ||
{"Name": "sqft_lot", "Description": "Lot area in square feet"}, | ||
{"Name": "sqft_living", "Description": "Interior living space in square feet"}, | ||
{"Name": "sqft_above", "Description": "Interior living space above ground in square feet"}, | ||
{"Name": "sqft_basement", "Description": "Interior living space below ground in square feet"}, | ||
{"Name": "floors", "Description": "Number of floors"}, | ||
{"Name": "bedrooms", "Description": "Number of bedrooms"}, | ||
{ | ||
"Name": "bathrooms", | ||
"Description": "Number of bathrooms.\n\n" | ||
"Fractional values indicate that components (toilet/sink/shower/bathtub) are missing.", | ||
}, | ||
{"Name": "waterfront", "Description": "Whether the building overlooks a waterfront (0 = no, 1 = yes)"}, | ||
{"Name": "view", "Description": "Rating of the view (1 to 5, higher is better)"}, | ||
{"Name": "condition", "Description": "Rating of the condition of the house (1 to 5, higher is better)"}, | ||
{"Name": "grade", "Description": "Rating of building construction and design (1 to 13, higher is better)"}, | ||
{"Name": "year_built", "Description": "Year the house was built"}, | ||
{ | ||
"Name": "year_renovated", | ||
"Description": "Year the house was last renovated.\n\n" | ||
"A value of 0 indicates that it was never renovated.", | ||
}, | ||
{"Name": "sqft_lot_15nn", "Description": "Lot area of the 15 nearest neighbors in square feet"}, | ||
{ | ||
"Name": "sqft_living_15nn", | ||
"Description": "Interior living space of the 15 nearest neighbors in square feet", | ||
}, | ||
{"Name": "price", "Description": "Price the house sold for in USD"}, | ||
] | ||
) |
Oops, something went wrong.