From df106766a9a72696d1703bd69cf8a96c86ff60a1 Mon Sep 17 00:00:00 2001 From: Marigold Date: Thu, 19 Dec 2024 10:45:19 +0100 Subject: [PATCH] :hammer: Add notebook for measuring execution times of steps --- scripts/execution_times.ipynb | 407 ++++++++++++++++++++++++++++++++++ 1 file changed, 407 insertions(+) create mode 100644 scripts/execution_times.ipynb diff --git a/scripts/execution_times.ipynb b/scripts/execution_times.ipynb new file mode 100644 index 00000000000..08f4fb28038 --- /dev/null +++ b/scripts/execution_times.ipynb @@ -0,0 +1,407 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load execution times" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[2m2024-12-19 10:33:58\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mfile.downloaded \u001b[0m \u001b[36mpath\u001b[0m=\u001b[35mPosixPath('/var/folders/0s/2yqr44dj44zcmyzdrf8fvxyc0000gn/T/tmpxyijvoya')\u001b[0m \u001b[36mtarget\u001b[0m=\u001b[35mowid@etl-prod-2:~/etl/.execution_time.json\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "(4659,)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "from etl import files\n", + "import tempfile\n", + "import json\n", + "from pathlib import Path\n", + "\n", + "\n", + "with tempfile.NamedTemporaryFile(delete=False) as temp_file:\n", + " files.download_file_from_server(Path(temp_file.name), f\"owid@etl-prod-2:~/etl/.execution_time.json\")\n", + " temp_file_path = temp_file.name\n", + "\n", + "with open(temp_file_path, 'r') as f:\n", + " data = json.load(f)\n", + "\n", + "data = pd.Series(data)\n", + "data.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Longest steps" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 t
grapher://grapher/who/2021-07-01/ghe3547
data-private://garden/ihme_gbd/2024-05-20/gbd_prevalence1625
grapher://grapher/ihme_gbd/2019/gbd_prevalence1328
data://garden/un/2022-07-11/un_wpp1237
data://garden/faostat/2024-03-14/faostat_fbsc1224
data://meadow/un/2022-07-11/un_wpp1191
data-private://garden/ihme_gbd/2024-05-20/gbd_cause1090
grapher://grapher/ihme_gbd/2019/gbd_cause970
grapher://grapher/ihme_gbd/2024-05-20/gbd_risk934
grapher://grapher/ihme_gbd/2024-05-20/gbd_prevalence850
grapher://grapher/un/2024-08-27/un_sdg836
data://garden/who/2024-07-30/ghe832
data://garden/who/2022-09-30/ghe798
data://meadow/demography/2024-12-06/wittgenstein_human_capital_historical768
grapher://grapher/ihme_gbd/2024-05-20/gbd_child_mortality741
data-private://meadow/ihme_gbd/2024-05-20/gbd_cause731
data://meadow/faostat/2024-03-14/faostat_ti632
data://meadow/faostat/2024-03-14/faostat_tcl611
data-private://garden/ihme_gbd/2024-05-20/gbd_risk601
grapher://grapher/ihme_gbd/2024-05-20/gbd_cause583
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.sort_values(ascending=False).head(20).to_frame('t').style.format('{:.0f}').bar(color='orange')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Time per step type & channel" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 t
channel 
grapher://grapher21617
data://garden19388
data://meadow14428
data://grapher4956
backport://backport1518
data://explorers1011
data://open_numbers38
export://multidim15
data://external12
data://examples6
export://github5
github://open-numbers2
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = data.to_frame('t').reset_index().rename(columns={'index': 'step'})\n", + "df['channel'] = df.step.map(lambda s: '/'.join(s.split('/')[:3])).str.replace('-private', '')\n", + "df = df[~df['channel'].str.contains('snapshot://')]\n", + "df.groupby('channel').t.sum().sort_values(ascending=False).to_frame('t').style.format('{:.0f}').bar(color='orange')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}