Skip to content
This repository has been archived by the owner on Nov 23, 2024. It is now read-only.

feat: house sales example #23

Merged
merged 2 commits into from
Mar 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions docs/examples/display_column_description.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import pandas as pd
from IPython.core.display_functions import DisplayHandle
from IPython.display import display
from safeds.data.tabular import Table


def display_column_descriptions(column_descriptions: Table) -> DisplayHandle:
"""
Displays a Table containing the column descriptions.

Parameters
----------
column_descriptions : Table
The column descriptions.

Returns
-------
DisplayHandle
The display handle.
"""

# Remember the current value of the max_colwidth option
max_colwidth = pd.get_option("max_colwidth")

# Don't cut off the column descriptions
pd.set_option("max_colwidth", None)

# Create a DisplayHandle that displays the column descriptions nicely
styler = (
column_descriptions._data.style.relabel_index(["Name", "Description"], axis="columns")
.hide(axis="index")
.set_properties(
**{
"text-align": "left",
"white-space": "pre-wrap",
}
)
)
result = display(styler)

# Restore the max_colwidth option
pd.set_option("max_colwidth", max_colwidth)

return result
170 changes: 170 additions & 0 deletions docs/examples/house_sales.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
{
"cells": [
{
"cell_type": "markdown",
"source": [
"# House Sales\n",
"\n",
"The dataset contains house sale prices for King County, USA between May 2014 and May 2015. It is well suited to practice regression techniques."
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"## Column descriptions"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"from safeds_examples.tabular import describe_house_sales_columns\n",
"from display_column_description import display_column_descriptions\n",
"\n",
"house_sales_description = describe_house_sales_columns()\n",
"display_column_descriptions(house_sales_description)"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"## Sample"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"from safeds_examples.tabular import load_house_sales\n",
"\n",
"house_sales = load_house_sales()\n",
"house_sales.slice(end=10)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"is_executing": true
}
}
},
{
"cell_type": "markdown",
"source": [
"## Schema"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"house_sales.schema"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"## Statistics"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"house_sales.summary()"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"## Correlation heatmap"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"from safeds.plotting import correlation_heatmap\n",
"\n",
"house_sales_correlation = house_sales.drop_columns([\n",
" \"id\",\n",
" \"year\",\n",
" \"month\",\n",
" \"day\",\n",
" \"zipcode\",\n",
" \"latitude\",\n",
" \"longitude\"\n",
"])\n",
"correlation_heatmap(house_sales_correlation)"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"## Attribution\n",
"\n",
"This dataset is a modified version of the [\"House Sales in King County, USA\" dataset](https://www.kaggle.com/datasets/harlfoxem/housesalesprediction) by Kaggle user [`harlfoxem`](https://www.kaggle.com/harlfoxem). The original dataset is licensed under `CC0: Public Domain`.\n",
"\n",
"Column descriptions are based on [this Kaggle discussion](https://www.kaggle.com/datasets/harlfoxem/housesalesprediction/discussion/207885).\n"
],
"metadata": {
"collapsed": false
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
2 changes: 2 additions & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ nav:
- README.md
- Changelog: CHANGELOG.md
- Examples:
- House Sales: examples/house_sales.ipynb
- Titanic: examples/titanic.ipynb
- API Reference: reference/
- Development:
Expand Down Expand Up @@ -64,6 +65,7 @@ plugins:
include: ["*.ipynb"]
execute: true
allow_errors: false
no_input: true

watch:
- src
Expand Down
1 change: 1 addition & 0 deletions src/safeds_examples/tabular/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from ._house_sales import describe_house_sales_columns, load_house_sales
from ._titanic import load_titanic
1 change: 1 addition & 0 deletions src/safeds_examples/tabular/_house_sales/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from ._house_sales import describe_house_sales_columns, load_house_sales
69 changes: 69 additions & 0 deletions src/safeds_examples/tabular/_house_sales/_house_sales.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import os

from safeds.data.tabular import Table

_path = os.path.join(os.path.dirname(__file__), "data", "house_sales.csv")


def load_house_sales() -> Table:
"""
Loads the "House Sales" dataset.

Returns
-------
Table
The "House Sales" dataset.
"""

return Table.from_csv(_path)


def describe_house_sales_columns() -> Table:
"""
Returns a `Table` with two columns `"Name"` and `"Description"`, containing the name of a column in the "House
Sales" dataset and its description respectively.

Returns
-------
Table
A `Table` with names and descriptions for all columns of the "House Sales" dataset.
"""

return Table(
[
{"Name": "id", "Description": "A unique identifier"},
{"Name": "year", "Description": "Year of sale"},
{"Name": "month", "Description": "Month of sale"},
{"Name": "day", "Description": "Day of sale"},
{"Name": "zipcode", "Description": "Zipcode"},
{"Name": "latitude", "Description": "Latitude"},
{"Name": "longitude", "Description": "Longitude"},
{"Name": "sqft_lot", "Description": "Lot area in square feet"},
{"Name": "sqft_living", "Description": "Interior living space in square feet"},
{"Name": "sqft_above", "Description": "Interior living space above ground in square feet"},
{"Name": "sqft_basement", "Description": "Interior living space below ground in square feet"},
{"Name": "floors", "Description": "Number of floors"},
{"Name": "bedrooms", "Description": "Number of bedrooms"},
{
"Name": "bathrooms",
"Description": "Number of bathrooms.\n\n"
"Fractional values indicate that components (toilet/sink/shower/bathtub) are missing.",
},
{"Name": "waterfront", "Description": "Whether the building overlooks a waterfront (0 = no, 1 = yes)"},
{"Name": "view", "Description": "Rating of the view (1 to 5, higher is better)"},
{"Name": "condition", "Description": "Rating of the condition of the house (1 to 5, higher is better)"},
{"Name": "grade", "Description": "Rating of building construction and design (1 to 13, higher is better)"},
{"Name": "year_built", "Description": "Year the house was built"},
{
"Name": "year_renovated",
"Description": "Year the house was last renovated.\n\n"
"A value of 0 indicates that it was never renovated.",
},
{"Name": "sqft_lot_15nn", "Description": "Lot area of the 15 nearest neighbors in square feet"},
{
"Name": "sqft_living_15nn",
"Description": "Interior living space of the 15 nearest neighbors in square feet",
},
{"Name": "price", "Description": "Price the house sold for in USD"},
]
)
Loading