Skip to content

Commit

Permalink
add jupytext and new nb for presentation
Browse files Browse the repository at this point in the history
  • Loading branch information
pmayd committed Jan 29, 2024
1 parent 3d8c347 commit 6e060f7
Show file tree
Hide file tree
Showing 4 changed files with 292 additions and 274 deletions.
122 changes: 122 additions & 0 deletions nb/presentation.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "571e9d1b",
"metadata": {},
"outputs": [],
"source": [
"import pystatis"
]
},
{
"cell_type": "markdown",
"id": "d580984f",
"metadata": {},
"source": [
"# Pystatis presentation\n",
"\n",
"`pystatis` is a small Python library to conveniently wrap the different GENESIS web services (APIs) in a centralized and user-friendly manner.\n",
"\n",
"It allows users to browse the different databases and download the desired tables from all supported databases in a convenient `pandas` `DataFrame` object, suited for further analysis."
]
},
{
"cell_type": "markdown",
"id": "62b9f397",
"metadata": {},
"source": [
"## Setup\n",
"\n",
"We won't cover the initial only-once setup here because the user has to enter their credentials for the supported databases (GENESIS, Regionalstatistik, Zensus). But there is a dedicated notebook [Setup](./00_Setup.ipynb) with examples and explanations."
]
},
{
"cell_type": "markdown",
"id": "cbe657a8",
"metadata": {},
"source": [
"## Main Use Cases"
]
},
{
"cell_type": "markdown",
"id": "90350387",
"metadata": {},
"source": [
"### Find"
]
},
{
"cell_type": "markdown",
"id": "354d61d3",
"metadata": {},
"source": [
"### Table"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "fe99d7cd",
"metadata": {},
"outputs": [],
"source": [
"t = pystatis.Table(name=\"12111-01-01-5-B\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "69b87edb",
"metadata": {},
"outputs": [
{
"ename": "PystatisConfigError",
"evalue": "No active database set! Please run `set_db()`.",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mPystatisConfigError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[5], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/git/github/CorrelAid/pystatis/src/pystatis/table.py:37\u001b[0m, in \u001b[0;36mTable.get_data\u001b[0;34m(self, area, **kwargs)\u001b[0m\n\u001b[1;32m 33\u001b[0m params \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mname, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124marea\u001b[39m\u001b[38;5;124m\"\u001b[39m: area, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mformat\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mffcsv\u001b[39m\u001b[38;5;124m\"\u001b[39m}\n\u001b[1;32m 35\u001b[0m params \u001b[38;5;241m|\u001b[39m\u001b[38;5;241m=\u001b[39m kwargs\n\u001b[0;32m---> 37\u001b[0m raw_data \u001b[38;5;241m=\u001b[39m \u001b[43mload_data\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 38\u001b[0m \u001b[43m \u001b[49m\u001b[43mendpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdata\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtablefile\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mas_json\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m 39\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 40\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(raw_data, \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;66;03m# nosec assert_used\u001b[39;00m\n\u001b[1;32m 41\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw_data \u001b[38;5;241m=\u001b[39m raw_data\n",
"File \u001b[0;32m~/git/github/CorrelAid/pystatis/src/pystatis/http_helper.py:51\u001b[0m, in \u001b[0;36mload_data\u001b[0;34m(endpoint, method, params, as_json)\u001b[0m\n\u001b[1;32m 49\u001b[0m data \u001b[38;5;241m=\u001b[39m read_from_cache(cache_dir, name, params)\n\u001b[1;32m 50\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m---> 51\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mget_data_from_endpoint\u001b[49m\u001b[43m(\u001b[49m\u001b[43mendpoint\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 52\u001b[0m data \u001b[38;5;241m=\u001b[39m response\u001b[38;5;241m.\u001b[39mtext\n\u001b[1;32m 54\u001b[0m \u001b[38;5;66;03m# status code 98 means that the table is too big\u001b[39;00m\n\u001b[1;32m 55\u001b[0m \u001b[38;5;66;03m# we have to start a job and wait for it to be ready\u001b[39;00m\n",
"File \u001b[0;32m~/git/github/CorrelAid/pystatis/src/pystatis/http_helper.py:95\u001b[0m, in \u001b[0;36mget_data_from_endpoint\u001b[0;34m(endpoint, method, params)\u001b[0m\n\u001b[1;32m 80\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_data_from_endpoint\u001b[39m(\n\u001b[1;32m 81\u001b[0m endpoint: \u001b[38;5;28mstr\u001b[39m, method: \u001b[38;5;28mstr\u001b[39m, params: \u001b[38;5;28mdict\u001b[39m\n\u001b[1;32m 82\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m requests\u001b[38;5;241m.\u001b[39mResponse:\n\u001b[1;32m 83\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 84\u001b[0m \u001b[38;5;124;03m Wrapper method which constructs an url for querying data from Destatis and\u001b[39;00m\n\u001b[1;32m 85\u001b[0m \u001b[38;5;124;03m sends a GET request.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 93\u001b[0m \u001b[38;5;124;03m requests.Response: the response object holding the response from calling the Destatis endpoint.\u001b[39;00m\n\u001b[1;32m 94\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 95\u001b[0m db_host, db_user, db_pw \u001b[38;5;241m=\u001b[39m \u001b[43mdb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_db_settings\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 96\u001b[0m url \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdb_host\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mendpoint\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmethod\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 98\u001b[0m \u001b[38;5;66;03m# params is used to calculate hash for caching so don't alter params dict here!\u001b[39;00m\n",
"File \u001b[0;32m~/git/github/CorrelAid/pystatis/src/pystatis/db.py:61\u001b[0m, in \u001b[0;36mget_db_settings\u001b[0;34m()\u001b[0m\n\u001b[1;32m 59\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_db_settings\u001b[39m() \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mtuple\u001b[39m[\u001b[38;5;28mstr\u001b[39m, \u001b[38;5;28mstr\u001b[39m, \u001b[38;5;28mstr\u001b[39m]:\n\u001b[1;32m 60\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Get the active database settings (host, user, password).\"\"\"\u001b[39;00m\n\u001b[0;32m---> 61\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mget_db_host\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m, get_db_user(), get_db_pw()\n",
"File \u001b[0;32m~/git/github/CorrelAid/pystatis/src/pystatis/db.py:43\u001b[0m, in \u001b[0;36mget_db_host\u001b[0;34m()\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_db_host\u001b[39m() \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mstr\u001b[39m:\n\u001b[0;32m---> 43\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config\u001b[38;5;241m.\u001b[39mconfig[\u001b[43mget_db\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbase_url\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n",
"File \u001b[0;32m~/git/github/CorrelAid/pystatis/src/pystatis/db.py:35\u001b[0m, in \u001b[0;36mget_db\u001b[0;34m()\u001b[0m\n\u001b[1;32m 32\u001b[0m active_db \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msettings\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mactive_db\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 34\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m active_db:\n\u001b[0;32m---> 35\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m PystatisConfigError(\n\u001b[1;32m 36\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo active database set! Please run `set_db()`.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 37\u001b[0m )\n\u001b[1;32m 39\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m active_db\n",
"\u001b[0;31mPystatisConfigError\u001b[0m: No active database set! Please run `set_db()`."
]
}
],
"source": [
"t.get_data()"
]
}
],
"metadata": {
"jupytext": {
"formats": "ipynb,py:percent"
},
"kernelspec": {
"display_name": "pystatis",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
44 changes: 44 additions & 0 deletions nb/presentation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# ---
# jupyter:
# jupytext:
# formats: ipynb,py:percent
# text_representation:
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.16.1
# kernelspec:
# display_name: pystatis
# language: python
# name: python3
# ---

# %%
import pystatis

# %% [markdown]
# # Pystatis presentation
#
# `pystatis` is a small Python library to conveniently wrap the different GENESIS web services (APIs) in a centralized and user-friendly manner.
#
# It allows users to browse the different databases and download the desired tables from all supported databases in a convenient `pandas` `DataFrame` object, suited for further analysis.

# %% [markdown]
# ## Setup
#
# We won't cover the initial only-once setup here because the user has to enter their credentials for the supported databases (GENESIS, Regionalstatistik, Zensus). But there is a dedicated notebook [Setup](./00_Setup.ipynb) with examples and explanations.

# %% [markdown]
# ## Main Use Cases

# %% [markdown]
# ### Find

# %% [markdown]
# ### Table

# %%
t = pystatis.Table(name="12111-01-01-5-B")

# %%
t.get_data()
Loading

0 comments on commit 6e060f7

Please sign in to comment.