Skip to content

Commit

Permalink
refactor code to implement new config; correct tests
Browse files Browse the repository at this point in the history
  • Loading branch information
pmayd committed Oct 28, 2023
1 parent c574d5f commit 857af8e
Show file tree
Hide file tree
Showing 14 changed files with 213 additions and 649 deletions.
23 changes: 8 additions & 15 deletions nb/00_Setup.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@
"metadata": {},
"outputs": [],
"source": [
"\n",
"import dotenv\n",
"import pystatis\n",
"import pystatis.config\n",
"from pystatis.config import get_supported_db, config\n",
"\n",
"print(\"pystatis version: \", pystatis.__version__)\n",
"dotenv.load_dotenv()"
Expand All @@ -32,7 +33,7 @@
"metadata": {},
"outputs": [],
"source": [
" # only execute if you want to delete your config file for test purposes\n",
"# only execute if you want to delete your config file for test purposes\n",
"pystatis.config.delete_config()\n",
"pystatis.config.init_config()"
]
Expand Down Expand Up @@ -86,20 +87,12 @@
"source": [
"from pprint import pprint\n",
"\n",
"config = pystatis.config.load_config()\n",
"\n",
"for db in pystatis.config.get_supported_db():\n",
" for field in [\"username\", \"password\"]:\n",
" print(f\"{db} {field}: {config[db][field]}\")"
"for db in get_supported_db():\n",
" print(\"Database: \", db)\n",
" for k, v in config[db].items():\n",
" print(k, v)\n",
" print()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "faa25df3",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
8 changes: 1 addition & 7 deletions nb/01_Databases.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"metadata": {},
"outputs": [],
"source": [
"# expected to fail!\n",
"pystatdb.set_db(\"test\")"
]
},
Expand All @@ -47,13 +48,6 @@
"pystatdb.set_db(\"zensus\")\n",
"logincheck()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
20 changes: 14 additions & 6 deletions nb/cache.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,18 @@
"logging.basicConfig(level=logging.INFO)\n",
"\n",
"import pystatis\n",
"from pystatis import Cube, Table, init_config, clear_cache"
"from pystatis import Cube, Table, clear_cache\n",
"from pystatis.db import set_db"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6c4b965e",
"id": "5c944d20",
"metadata": {},
"outputs": [],
"source": [
"# only run this if you haven't done so earlier\n",
"# init_config()"
"set_db(\"genesis\")"
]
},
{
Expand All @@ -34,7 +34,7 @@
"source": [
"# first, let's download two data sets\n",
"# these will be cached under your \n",
"Cube(name=\"22922KJ1141\").get_data()\n",
"#Cube(name=\"22922KJ1141\").get_data()\n",
"Table(name=\"21311-0001\").get_data()"
]
},
Expand All @@ -60,6 +60,14 @@
"# Or without a name to clear the whole cache at once\n",
"clear_cache()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7219b44b",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand All @@ -78,7 +86,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
"version": "3.11.5"
},
"vscode": {
"interpreter": {
Expand Down
17 changes: 14 additions & 3 deletions nb/cube.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,18 @@
"import logging\n",
"logging.basicConfig(level=logging.INFO)\n",
"\n",
"from pystatis import Cube"
"from pystatis import Cube\n",
"from pystatis.db import set_db"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "812c0e39",
"metadata": {},
"outputs": [],
"source": [
"set_db(\"genesis\")"
]
},
{
Expand All @@ -30,7 +41,7 @@
"metadata": {},
"outputs": [],
"source": [
"c.get_data()"
"c.get_data() # TODO: currently broken?"
]
},
{
Expand Down Expand Up @@ -105,7 +116,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
"version": "3.11.5"
},
"vscode": {
"interpreter": {
Expand Down
486 changes: 25 additions & 461 deletions nb/table.ipynb

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions src/pystatis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@
print("Version:", pstat.__version__)
```
"""
import pystatis.cache
import pystatis.config
import pystatis.cube
import pystatis.db
import pystatis.find
import pystatis.helloworld
import pystatis.profile
import pystatis.table
from pystatis.cache import clear_cache
from pystatis.config import setup_credentials
from pystatis.cube import Cube
Expand Down
39 changes: 16 additions & 23 deletions src/pystatis/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,19 @@
import shutil
import zipfile
from datetime import date
from operator import attrgetter
from pathlib import Path
from typing import Optional

from pystatis.config import load_config
from pystatis.config import get_cache_dir

logger = logging.getLogger(__name__)

JOB_ID_PATTERN = r"\d+"


def cache_data(
cache_dir: Path,
cache_dir: str,
name: Optional[str],
params: dict,
data: str,
Expand All @@ -28,10 +30,8 @@ def cache_data(
This allows to cache different results for different params.
Args:
cache_dir (Path): The cash directory as configured in the config.
cache_dir (str): The cash directory as configured in the config.
name (str): The unique identifier in GENESIS-Online.
endpoint (str): The endpoint for this data request.
method (str): The method for this data request.
params (dict): The dictionary holding the params for this data request.
data (str): The actual raw text data as returned by GENESIS-Online.
"""
Expand Down Expand Up @@ -73,8 +73,6 @@ def read_from_cache(
Args:
cache_dir (Path): The cash directory as configured in the config.
name (str): The unique identifier in GENESIS-Online.
endpoint (str): The endpoint for this data request.
method (str): The method for this data request.
params (dict): The dictionary holding the params for this data request.
Returns:
Expand All @@ -85,11 +83,11 @@ def read_from_cache(

data_dir = _build_file_path(cache_dir, name, params)

versions = sorted(
latest_version = sorted(
data_dir.glob("*"),
key=lambda path: int(path.stem),
)
file_name = versions[-1].name
key=attrgetter("stem"),
)[-1]
file_name = latest_version.name
file_path = data_dir / file_name
with zipfile.ZipFile(file_path, "r") as myzip:
with myzip.open(file_name.replace(".zip", ".txt")) as file:
Expand All @@ -98,15 +96,13 @@ def read_from_cache(
return data


def _build_file_path(cache_dir: Path, name: str, params: dict) -> Path:
def _build_file_path(cache_dir: str, name: str, params: dict) -> Path:
"""Builds a unique cache directory name from name and hashed params dictionary.
The way this method works is that it creates a path under cache dir that is unique
because the name is a unique EVAS identifier number in Destatis and the hash
is (close enough) unique to a given dictionary with query parameter values.
The way this method works is that it creates a path under cache dir that is unique because the name is a unique EVAS identifier number in Destatis and the hash is (close enough) unique to a given dictionary with query parameter values.
Args:
cache_dir (Path): The root cache directory as configured in the config.ini.
cache_dir (str): The root cache directory as configured in the config.ini.
name (str): The unique identifier for an object in Destatis.
params (dict): The query parameters for a given call to the Destatis API.
Expand All @@ -120,7 +116,7 @@ def _build_file_path(cache_dir: Path, name: str, params: dict) -> Path:
del params_["job"]
params_hash = hashlib.blake2s(digest_size=10, usedforsecurity=False)
params_hash.update(json.dumps(params_).encode("UTF-8"))
data_dir = cache_dir / name / params_hash.hexdigest()
data_dir = Path(cache_dir) / name / params_hash.hexdigest()

return data_dir

Expand All @@ -141,17 +137,15 @@ def normalize_name(name: str) -> str:


def hit_in_cash(
cache_dir: Path,
cache_dir: str,
name: Optional[str],
params: dict,
) -> bool:
"""Check if data is already cached.
Args:
cache_dir (Path): The cash directory as configured in the config.
cache_dir (str): The cash directory as configured in the config.
name (str): The unique identifier in GENESIS-Online.
endpoint (str): The endpoint for this data request.
method (str): The method for this data request.
params (dict): The dictionary holding the params for this data request.
Returns:
Expand All @@ -170,8 +164,7 @@ def clear_cache(name: Optional[str] = None) -> None:
Args:
name (str, optional): Unique name to be deleted from cached data.
"""
config = load_config()
cache_dir = Path(config["DATA"]["cache_dir"])
cache_dir = Path(get_cache_dir())

# remove specified file (directory) from the data cache
# or clear complete cache (remove childs, preserve base)
Expand Down
Loading

0 comments on commit 857af8e

Please sign in to comment.