From 26a8c05e0fce88d7abba67b4afcdd0740cd2b890 Mon Sep 17 00:00:00 2001 From: Xavier Coubez Date: Sat, 23 Oct 2021 18:13:31 +0200 Subject: [PATCH 1/6] Adding lumisections - first look at views --- lumisections/admin.py | 5 +++ lumisections/apps.py | 6 +++ .../commands/extract_lumisections.py | 39 +++++++++++++++++++ lumisections/models.py | 11 ++++++ lumisections/views.py | 3 ++ mlp/settings.py | 18 +++++---- mlp/urls.py | 2 + .../management/commands/extract_run_histos.py | 1 - run_histos/templates/run_histos/main.html | 35 +++++++++++++++++ run_histos/urls.py | 8 ++++ run_histos/views.py | 30 ++++++++++++++ runs/models.py | 2 +- runs/templates/runs/main.html | 8 +--- runs/views.py | 4 +- scripts/step1_extract_runs.sh | 2 +- scripts/step2_extract_run_histos.sh | 2 +- scripts/step3_extract_lumisections.sh | 16 ++++++++ static_project/main.js | 6 +++ static_project/style.css | 0 templates/base.html | 31 +++++++++++++++ templates/navbar.html | 2 + 21 files changed, 212 insertions(+), 19 deletions(-) create mode 100644 lumisections/admin.py create mode 100644 lumisections/apps.py create mode 100644 lumisections/management/commands/extract_lumisections.py create mode 100644 lumisections/models.py create mode 100644 lumisections/views.py create mode 100644 run_histos/templates/run_histos/main.html create mode 100644 run_histos/urls.py create mode 100644 scripts/step3_extract_lumisections.sh create mode 100644 static_project/main.js create mode 100644 static_project/style.css create mode 100644 templates/base.html create mode 100644 templates/navbar.html diff --git a/lumisections/admin.py b/lumisections/admin.py new file mode 100644 index 0000000..ac2b267 --- /dev/null +++ b/lumisections/admin.py @@ -0,0 +1,5 @@ +from django.contrib import admin +from .models import Lumisection + +# Register your models here. +admin.site.register(Lumisection) diff --git a/lumisections/apps.py b/lumisections/apps.py new file mode 100644 index 0000000..fcdc9d6 --- /dev/null +++ b/lumisections/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class LumisectionsConfig(AppConfig): + default_auto_field = 'django.db.models.BigAutoField' + name = 'lumisections' diff --git a/lumisections/management/commands/extract_lumisections.py b/lumisections/management/commands/extract_lumisections.py new file mode 100644 index 0000000..4ed46ed --- /dev/null +++ b/lumisections/management/commands/extract_lumisections.py @@ -0,0 +1,39 @@ +from django.core.management.base import BaseCommand + +from runs.models import Run +from lumisections.models import Lumisection + +# https://betterprogramming.pub/3-techniques-for-importing-large-csv-files-into-a-django-app-2b6e5e47dba0 +import pandas as pd + +class Command(BaseCommand): + help = "Extracts lumisections from files" + + def add_arguments(self, parser): + parser.add_argument("file_path", type=str) + + def handle(self, *args, **options): + file_path = options["file_path"] + + df = pd.read_csv(file_path) + + lumisections = [] + + for index, row in df.iterrows(): + run_number = row["fromrun"] + lumi_number = row["fromlumi"] + #print(run_number, lumi_number) + + run, _ = Run.objects.get_or_create(run_number=run_number) + + lumisection = Lumisection( + run_number = run, + ls_number = lumi_number, + ) + + lumisections.append(lumisection) + + Lumisection.objects.bulk_create(lumisections) + print(f'lumisections successfully added!') + + diff --git a/lumisections/models.py b/lumisections/models.py new file mode 100644 index 0000000..75d82ef --- /dev/null +++ b/lumisections/models.py @@ -0,0 +1,11 @@ +from django.db import models +from runs.models import Run + +# Create your models here. +class Lumisection(models.Model): + ls_number = models.IntegerField() + run_number = models.ForeignKey(Run, on_delete=models.CASCADE) + date = models.DateTimeField(auto_now_add=True) + + def __str__(self): + return f"run {self.run_number.run_number} / lumisection {self.ls_number}" diff --git a/lumisections/views.py b/lumisections/views.py new file mode 100644 index 0000000..91ea44a --- /dev/null +++ b/lumisections/views.py @@ -0,0 +1,3 @@ +from django.shortcuts import render + +# Create your views here. diff --git a/mlp/settings.py b/mlp/settings.py index 9f6741c..203267f 100644 --- a/mlp/settings.py +++ b/mlp/settings.py @@ -10,13 +10,11 @@ https://docs.djangoproject.com/en/3.2/ref/settings/ """ -import os - +from pathlib import Path from decouple import config # Build paths inside the project like this: BASE_DIR / 'subdir'. -BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - +BASE_DIR = Path(__file__).resolve().parent.parent # Quick-start development settings - unsuitable for production # See https://docs.djangoproject.com/en/3.2/howto/deployment/checklist/ @@ -53,6 +51,7 @@ 'runs.apps.RunsConfig', 'run_histos.apps.RunHistosConfig', + 'lumisections', ] MIDDLEWARE = [ @@ -71,7 +70,7 @@ TEMPLATES = [ { 'BACKEND': 'django.template.backends.django.DjangoTemplates', - 'DIRS': [os.path.join(BASE_DIR, "templates")], + 'DIRS': [BASE_DIR / 'templates'], 'APP_DIRS': True, 'OPTIONS': { 'context_processors': [ @@ -132,8 +131,13 @@ # https://docs.djangoproject.com/en/3.2/howto/static-files/ STATIC_URL = '/static/' -#STATICFILES_DIRS = (os.path.join(BASE_DIR, 'home/static'),) -STATIC_ROOT = os.path.join(BASE_DIR, 'static') + +STATICFILES_DIRS = [ BASE_DIR / 'static_project' ] + +STATIC_ROOT = BASE_DIR / 'static_cdn/static_root' + +MEDIA_URL = '/media/' +MEDIA_ROOT = BASE_DIR / 'static_cdn/media_root' # Default primary key field type # https://docs.djangoproject.com/en/3.2/ref/settings/#default-auto-field diff --git a/mlp/urls.py b/mlp/urls.py index 50ba8ee..1261efe 100644 --- a/mlp/urls.py +++ b/mlp/urls.py @@ -19,5 +19,7 @@ urlpatterns = [ path("", include("home.urls")), path("datasets/", include("datasets.urls")), + path("runs/", include("runs.urls")), + path("run_histos/", include("run_histos.urls")), path('admin/', admin.site.urls), ] diff --git a/run_histos/management/commands/extract_run_histos.py b/run_histos/management/commands/extract_run_histos.py index 8e59e8f..603871d 100644 --- a/run_histos/management/commands/extract_run_histos.py +++ b/run_histos/management/commands/extract_run_histos.py @@ -20,7 +20,6 @@ def handle(self, *args, **options): # extracting and creating run number (ForeignKey of histo1DRun) run_number = df.run.unique()[0] - print(type(run_number)) run, _ = Run.objects.get_or_create(run_number=run_number) print(f'run {run_number} successfully added!') diff --git a/run_histos/templates/run_histos/main.html b/run_histos/templates/run_histos/main.html new file mode 100644 index 0000000..d49b98f --- /dev/null +++ b/run_histos/templates/run_histos/main.html @@ -0,0 +1,35 @@ +{% extends "base.html" %} + +{% block title %} Run Histos {% endblock title %} + +{% block content %} + +
+ {% if error_message %} + {{ error_message }} + {% endif %} + +
+ {% csrf_token %} +
+ + +
+ +
+ +
+ +
+ +{{ df | safe }} + +{% endblock content %} diff --git a/run_histos/urls.py b/run_histos/urls.py new file mode 100644 index 0000000..0a1acd0 --- /dev/null +++ b/run_histos/urls.py @@ -0,0 +1,8 @@ +from django.urls import path +from .views import chart_select_view + +app_name = 'run_histos' + +urlpatterns = [ + path('', chart_select_view, name='main-runhistos-view') +] diff --git a/run_histos/views.py b/run_histos/views.py index 91ea44a..a93f724 100644 --- a/run_histos/views.py +++ b/run_histos/views.py @@ -1,3 +1,33 @@ from django.shortcuts import render +from runs.models import Run +from .models import RunHisto + +import pandas as pd # Create your views here. + +def chart_select_view(request): + + error_message = None + + # objects.all().values() provides a dictionary while objects.all().values_list() provides a tuple + runs_df = pd.DataFrame(Run.objects.all().values()) + runhistos_df = pd.DataFrame(RunHisto.objects.all().values()) + + if runhistos_df.shape[0] > 0: + df = pd.merge(runs_df, runhistos_df, left_on='id', right_on='run_id').drop(['id_x', 'id_y', 'run_id', 'date_x', 'date_y'], axis=1).head(20) + + if request.method == 'POST': + print(f"request.POST is {request.POST}") + histogram = request.POST['histogram'] + print(f"histogram: {histogram}") + + else: + error_message = "No records in the database" + + context = { + 'error_message': error_message, + 'df': df.to_html(), + } + + return render(request, 'run_histos/main.html', context) diff --git a/runs/models.py b/runs/models.py index b8302ea..0ddfcc3 100644 --- a/runs/models.py +++ b/runs/models.py @@ -6,4 +6,4 @@ class Run(models.Model): date = models.DateTimeField(auto_now_add=True) def __str__(self): - return f"run {run_number}" + return f"run {self.run_number}" diff --git a/runs/templates/runs/main.html b/runs/templates/runs/main.html index 08f28dd..a3a4c6d 100644 --- a/runs/templates/runs/main.html +++ b/runs/templates/runs/main.html @@ -1,11 +1,7 @@ -{% extends 'base.html' %} +{% extends "base.html" %} {% block title %} Runs {% endblock title %} {% block content %} - -

Welcome to the run view

-{{runs}} - +{{ runs | safe }} {% endblock content %} - diff --git a/runs/views.py b/runs/views.py index 80ce6b4..9fd72cf 100644 --- a/runs/views.py +++ b/runs/views.py @@ -7,8 +7,8 @@ def chart_select_view(request): # objects.all().values() provides a dictionary while objects.all().values_list() provides a tuple - runs_df = pd.DataFrame(Run.objects.all().values()) + runs_df = pd.DataFrame(Run.objects.all().values()).drop(['id'], axis=1) context = { - 'runs': runs_df, + 'runs': runs_df.to_html(), } return render(request, 'runs/main.html', context) diff --git a/scripts/step1_extract_runs.sh b/scripts/step1_extract_runs.sh index b4ff3a7..2e03b75 100644 --- a/scripts/step1_extract_runs.sh +++ b/scripts/step1_extract_runs.sh @@ -12,5 +12,5 @@ for f in $FILES do echo "Processing $f file..." - #python manage.py extract_runs f + #python manage.py extract_runs $f done diff --git a/scripts/step2_extract_run_histos.sh b/scripts/step2_extract_run_histos.sh index c802749..cf86f91 100644 --- a/scripts/step2_extract_run_histos.sh +++ b/scripts/step2_extract_run_histos.sh @@ -12,5 +12,5 @@ for f in $FILES do echo "Processing $f file..." - #python manage.py extract_run_histos f + python manage.py extract_run_histos $f done diff --git a/scripts/step3_extract_lumisections.sh b/scripts/step3_extract_lumisections.sh new file mode 100644 index 0000000..502b658 --- /dev/null +++ b/scripts/step3_extract_lumisections.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +cd .. + +# testing one file +python manage.py extract_lumisections /eos/project/c/cmsml4dc/ML_2020/UL2018_Data/DF2018A_1D_Complete/ZeroBias_2018A_DataFrame_1D_1.csv + +FILES="/eos/project/c/cmsml4dc/ML_2020/UL2018_Data/DF2018A_1D_Complete/*" + +# extracting from all files +for f in $FILES + +do + echo "Processing $f file..." + #python manage.py extract_lumisections $f +done diff --git a/static_project/main.js b/static_project/main.js new file mode 100644 index 0000000..1fc70d7 --- /dev/null +++ b/static_project/main.js @@ -0,0 +1,6 @@ +$(document).ready(function(){ + $('.ui.dropdown') + .dropdown() +; +}) + diff --git a/static_project/style.css b/static_project/style.css new file mode 100644 index 0000000..e69de29 diff --git a/templates/base.html b/templates/base.html new file mode 100644 index 0000000..4ca80a2 --- /dev/null +++ b/templates/base.html @@ -0,0 +1,31 @@ +{% load static %} + + + + + + + + + + + + + + + + + ML Playground - {% block title %}{% endblock title %} + + + + {% include 'navbar.html' %} +
+ {% block content %} + {% endblock content %} +
+ + + + + diff --git a/templates/navbar.html b/templates/navbar.html new file mode 100644 index 0000000..218633a --- /dev/null +++ b/templates/navbar.html @@ -0,0 +1,2 @@ +

Navbar

+ From c97bf38cf1b30e628e9b8800ab6963bc7019cf73 Mon Sep 17 00:00:00 2001 From: Xavier Coubez Date: Mon, 25 Oct 2021 11:43:22 +0200 Subject: [PATCH 2/6] First look at altair and vega embedding --- run_histos/templates/run_histos/main.html | 48 ++++++++++++++++++++++- run_histos/urls.py | 5 ++- run_histos/views.py | 19 ++++++++- templates/base.html | 13 +++++- 4 files changed, 77 insertions(+), 8 deletions(-) diff --git a/run_histos/templates/run_histos/main.html b/run_histos/templates/run_histos/main.html index d49b98f..43bbead 100644 --- a/run_histos/templates/run_histos/main.html +++ b/run_histos/templates/run_histos/main.html @@ -9,12 +9,28 @@ {{ error_message }} {% endif %} +

Choosing Dataset / Variable / Plot type

+
{% csrf_token %}
- + + +
+
+
+
+ + +
+
+ + + + + +

Here should be an altair plot...

+ +
+ + +
{{ df | safe }} diff --git a/run_histos/urls.py b/run_histos/urls.py index 0a1acd0..ead9ba6 100644 --- a/run_histos/urls.py +++ b/run_histos/urls.py @@ -1,8 +1,9 @@ from django.urls import path -from .views import chart_select_view +from .views import chart_select_view, chart_view_altair app_name = 'run_histos' urlpatterns = [ - path('', chart_select_view, name='main-runhistos-view') + path('', chart_select_view, name='main-runhistos-view'), + path('altair/', chart_view_altair, name='altair-view') ] diff --git a/run_histos/views.py b/run_histos/views.py index a93f724..28a748a 100644 --- a/run_histos/views.py +++ b/run_histos/views.py @@ -1,8 +1,13 @@ from django.shortcuts import render +from django.http import JsonResponse + from runs.models import Run from .models import RunHisto import pandas as pd +import altair as alt + +from vega_datasets import data # Create your views here. @@ -19,8 +24,10 @@ def chart_select_view(request): if request.method == 'POST': print(f"request.POST is {request.POST}") - histogram = request.POST['histogram'] - print(f"histogram: {histogram}") + dataset = request.POST['dataset'] + variable = request.POST['variable'] + plot_type = request.POST['plot_type'] + print(f"dataset: {dataset} / variable: {variable} / plot_type: {plot_type}") else: error_message = "No records in the database" @@ -31,3 +38,11 @@ def chart_select_view(request): } return render(request, 'run_histos/main.html', context) + +def chart_view_altair(request): + runhistos_df = pd.DataFrame(RunHisto.objects.all().values()).head(100) + chart_obj = alt.Chart(runhistos_df).mark_bar().encode( + x='mean', + ).to_json(indent=None) + + return JsonResponse(chart_obj) diff --git a/templates/base.html b/templates/base.html index 4ca80a2..cb2afd3 100644 --- a/templates/base.html +++ b/templates/base.html @@ -3,6 +3,7 @@ + @@ -13,19 +14,27 @@ - ML Playground - {% block title %}{% endblock title %} + + {% include 'navbar.html' %} +
{% block content %} {% endblock content %}
- + + + + + + + From e045cc7c9094158e1643070b4e2367802315b666 Mon Sep 17 00:00:00 2001 From: Xavier Coubez Date: Mon, 25 Oct 2021 12:21:42 +0200 Subject: [PATCH 3/6] Successfully added first altair histogram to run_histos view --- run_histos/templates/run_histos/main.html | 10 +++------- run_histos/views.py | 8 ++++++-- templates/base.html | 10 +++++----- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/run_histos/templates/run_histos/main.html b/run_histos/templates/run_histos/main.html index 43bbead..8215f50 100644 --- a/run_histos/templates/run_histos/main.html +++ b/run_histos/templates/run_histos/main.html @@ -58,16 +58,12 @@

Choosing Dataset / Variable / Plot type


- - - - -

Here should be an altair plot...

+

Histogram

- diff --git a/run_histos/views.py b/run_histos/views.py index 28a748a..b117084 100644 --- a/run_histos/views.py +++ b/run_histos/views.py @@ -32,9 +32,14 @@ def chart_select_view(request): else: error_message = "No records in the database" + chart = alt.Chart(runhistos_df.head(100)).mark_bar().encode( + x='mean', + ).to_json(indent=None) + context = { 'error_message': error_message, - 'df': df.to_html(), + 'df': df.to_html(), + 'chart' : chart, } return render(request, 'run_histos/main.html', context) @@ -44,5 +49,4 @@ def chart_view_altair(request): chart_obj = alt.Chart(runhistos_df).mark_bar().encode( x='mean', ).to_json(indent=None) - return JsonResponse(chart_obj) diff --git a/templates/base.html b/templates/base.html index cb2afd3..5d16cdb 100644 --- a/templates/base.html +++ b/templates/base.html @@ -14,7 +14,11 @@ - + + + + + ML Playground - {% block title %}{% endblock title %} @@ -30,10 +34,6 @@ - - - - From 9aaf02a3535eda64ca0e0f3724a4b8e4cac73ee9 Mon Sep 17 00:00:00 2001 From: Xavier Coubez Date: Mon, 25 Oct 2021 18:58:11 +0200 Subject: [PATCH 4/6] Modifying histogram extraction to deal with various datasets - Adding project visualization --- run_histos/management/commands/extract_run_histos.py | 12 +++++++++--- scripts/project_visualization.sh | 4 ++++ scripts/step2_extract_run_histos.sh | 2 +- 3 files changed, 14 insertions(+), 4 deletions(-) create mode 100644 scripts/project_visualization.sh diff --git a/run_histos/management/commands/extract_run_histos.py b/run_histos/management/commands/extract_run_histos.py index 603871d..a271c2c 100644 --- a/run_histos/management/commands/extract_run_histos.py +++ b/run_histos/management/commands/extract_run_histos.py @@ -14,17 +14,23 @@ def add_arguments(self, parser): def handle(self, *args, **options): file_path = options["file_path"] - + split_file_path = file_path.replace('.csv', "").split('/')[-1].split('_') + print(split_file_path) + # opening per run file from ML4DQM df = pd.read_csv(file_path) # extracting and creating run number (ForeignKey of histo1DRun) - run_number = df.run.unique()[0] + dataset = split_file_path[0] + run_number = split_file_path[1] + workspace = 'TrackParameters/generalTracks/GeneralProperties' + tag = 'generalTracks' + run, _ = Run.objects.get_or_create(run_number=run_number) print(f'run {run_number} successfully added!') # extracting set of histograms corresponding to the run - dataset = 'ZeroBias' + dataset = split_file_path[0] workspace = 'TrackParameters/generalTracks/GeneralProperties' tag = 'generalTracks' diff --git a/scripts/project_visualization.sh b/scripts/project_visualization.sh new file mode 100644 index 0000000..bb368d8 --- /dev/null +++ b/scripts/project_visualization.sh @@ -0,0 +1,4 @@ +cd .. + +./manage.py graph_models -a -g -o ad_project.png +./manage.py graph_models -a -I Run,RunHisto,Lumisection -o ad_project_classes.png diff --git a/scripts/step2_extract_run_histos.sh b/scripts/step2_extract_run_histos.sh index cf86f91..edb4ca8 100644 --- a/scripts/step2_extract_run_histos.sh +++ b/scripts/step2_extract_run_histos.sh @@ -5,7 +5,7 @@ cd .. # testing one file python manage.py extract_run_histos /eos/project/c/cmsml4dc/ML_2020/PerRun_UL2018_Data/ZeroBias_315257_UL2018.csv -FILES="/eos/project/c/cmsml4dc/ML_2020/PerRun_UL2018_Data/ZeroBia*" +FILES="/eos/project/c/cmsml4dc/ML_2020/PerRun_UL2018_Data/*" # extracting from all files for f in $FILES From 710b69d6fbdad3ff3c82cade3598e8ac4fff1fe2 Mon Sep 17 00:00:00 2001 From: Xavier Coubez Date: Tue, 26 Oct 2021 11:40:16 +0200 Subject: [PATCH 5/6] Updating scripts and prototyping cross-filtering using altair --- notebook.ipynb | 732 +++++++++++++++++++++++++- scripts/project_notebook.sh | 5 + scripts/project_visualization.sh | 2 + scripts/step1_extract_runs.sh | 4 +- scripts/step2_extract_run_histos.sh | 4 +- scripts/step3_extract_lumisections.sh | 4 +- 6 files changed, 738 insertions(+), 13 deletions(-) create mode 100644 scripts/project_notebook.sh diff --git a/notebook.ipynb b/notebook.ipynb index ce0d3f9..3c11177 100644 --- a/notebook.ipynb +++ b/notebook.ipynb @@ -28,6 +28,34 @@ { "cell_type": "code", "execution_count": 3, + "id": "0162a7cc", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import altair as alt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "253b0e45", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "786ff643", + "metadata": {}, + "source": [ + "## Checking apps" + ] + }, + { + "cell_type": "code", + "execution_count": 4, "id": "64de1c6d", "metadata": {}, "outputs": [], @@ -37,7 +65,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "b056afe4", "metadata": {}, "outputs": [ @@ -46,10 +74,10 @@ "text/plain": [ "{'id': 1,\n", " 'run_number': 315257,\n", - " 'date': datetime.datetime(2021, 10, 22, 12, 35, 30, 551814, tzinfo=)}" + " 'date': datetime.datetime(2021, 10, 25, 15, 37, 22, 290978, tzinfo=)}" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -60,7 +88,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "a897f790", "metadata": {}, "outputs": [], @@ -70,7 +98,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "id": "bd9589f5", "metadata": {}, "outputs": [ @@ -79,7 +107,7 @@ "text/plain": [ "{'id': 1,\n", " 'run_id': 1,\n", - " 'date': datetime.datetime(2021, 10, 22, 12, 35, 52, 542313, tzinfo=),\n", + " 'date': datetime.datetime(2021, 10, 25, 15, 37, 22, 710167, tzinfo=),\n", " 'primary_dataset': 'ZeroBias',\n", " 'path': 'TrackParameters/generalTracks/GeneralProperties',\n", " 'title': 'AbsDistanceOfClosestApproachToBS_GenTk',\n", @@ -90,7 +118,7 @@ " 'kurtosis': 35.49796721670949}" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -106,6 +134,696 @@ "metadata": {}, "outputs": [], "source": [] + }, + { + "cell_type": "markdown", + "id": "2f876830", + "metadata": {}, + "source": [ + "## Converting to dataframes" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "76e78f5f", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(run_histos)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "3a0284f1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idrun_iddateprimary_datasetpathtitleentriesmeanrmsskewnesskurtosis
0112021-10-25 15:37:22.710167+00:00ZeroBiasTrackParameters/generalTracks/GeneralPropertiesAbsDistanceOfClosestApproachToBS_GenTk344303870.4408992.7660853.18444335.497967
1212021-10-25 15:37:22.710269+00:00ZeroBiasTrackParameters/generalTracks/GeneralPropertiesChi2Prob_GenTk344303870.4976230.322158-0.089739-1.339663
2312021-10-25 15:37:22.710310+00:00ZeroBiasTrackParameters/generalTracks/GeneralPropertiesChi2_GenTk3443038718.86258666.9643650.046712-2.909550
3412021-10-25 15:37:22.710343+00:00ZeroBiasTrackParameters/generalTracks/GeneralPropertiesChi2oNDFVsEta_ImpactPoint_GenTk34421103-0.1033961.5863030.264605-0.090844
4512021-10-25 15:37:22.710387+00:00ZeroBiasTrackParameters/generalTracks/GeneralPropertiesChi2oNDFVsNHits_ImpactPoint_GenTk3442110311.3218036.70838238.499120175.574616
\n", + "
" + ], + "text/plain": [ + " id run_id date primary_dataset \\\n", + "0 1 1 2021-10-25 15:37:22.710167+00:00 ZeroBias \n", + "1 2 1 2021-10-25 15:37:22.710269+00:00 ZeroBias \n", + "2 3 1 2021-10-25 15:37:22.710310+00:00 ZeroBias \n", + "3 4 1 2021-10-25 15:37:22.710343+00:00 ZeroBias \n", + "4 5 1 2021-10-25 15:37:22.710387+00:00 ZeroBias \n", + "\n", + " path \\\n", + "0 TrackParameters/generalTracks/GeneralProperties \n", + "1 TrackParameters/generalTracks/GeneralProperties \n", + "2 TrackParameters/generalTracks/GeneralProperties \n", + "3 TrackParameters/generalTracks/GeneralProperties \n", + "4 TrackParameters/generalTracks/GeneralProperties \n", + "\n", + " title entries mean rms \\\n", + "0 AbsDistanceOfClosestApproachToBS_GenTk 34430387 0.440899 2.766085 \n", + "1 Chi2Prob_GenTk 34430387 0.497623 0.322158 \n", + "2 Chi2_GenTk 34430387 18.862586 66.964365 \n", + "3 Chi2oNDFVsEta_ImpactPoint_GenTk 34421103 -0.103396 1.586303 \n", + "4 Chi2oNDFVsNHits_ImpactPoint_GenTk 34421103 11.321803 6.708382 \n", + "\n", + " skewness kurtosis \n", + "0 3.184443 35.497967 \n", + "1 -0.089739 -1.339663 \n", + "2 0.046712 -2.909550 \n", + "3 0.264605 -0.090844 \n", + "4 38.499120 175.574616 " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31f284d1", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "1ba4f204", + "metadata": {}, + "source": [ + "## Filtering" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "89c1c27d", + "metadata": {}, + "outputs": [], + "source": [ + "select_title = ['Chi2Prob_GenTk', 'Chi2_GenTk']" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "20786fd3", + "metadata": {}, + "outputs": [], + "source": [ + "df_filtered = df[df['title'].isin(select_title)][['primary_dataset', 'run_id', 'title', 'mean']]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "f21db4be", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
primary_datasetrun_idtitlemean
1ZeroBias1Chi2Prob_GenTk0.497623
2ZeroBias1Chi2_GenTk18.862586
97EGamma2Chi2Prob_GenTk0.492466
98EGamma2Chi2_GenTk19.083402
180EGamma3Chi2Prob_GenTk0.486288
...............
57534JetHT151Chi2_GenTk19.614357
57616JetHT153Chi2Prob_GenTk0.473647
57617JetHT153Chi2_GenTk19.476241
57699JetHT154Chi2Prob_GenTk0.442081
57700JetHT154Chi2_GenTk20.097355
\n", + "

1392 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " primary_dataset run_id title mean\n", + "1 ZeroBias 1 Chi2Prob_GenTk 0.497623\n", + "2 ZeroBias 1 Chi2_GenTk 18.862586\n", + "97 EGamma 2 Chi2Prob_GenTk 0.492466\n", + "98 EGamma 2 Chi2_GenTk 19.083402\n", + "180 EGamma 3 Chi2Prob_GenTk 0.486288\n", + "... ... ... ... ...\n", + "57534 JetHT 151 Chi2_GenTk 19.614357\n", + "57616 JetHT 153 Chi2Prob_GenTk 0.473647\n", + "57617 JetHT 153 Chi2_GenTk 19.476241\n", + "57699 JetHT 154 Chi2Prob_GenTk 0.442081\n", + "57700 JetHT 154 Chi2_GenTk 20.097355\n", + "\n", + "[1392 rows x 4 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_filtered" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "a890359c", + "metadata": {}, + "outputs": [], + "source": [ + "df_filtered = df_filtered.pivot(index=['primary_dataset', 'run_id'], columns='title', values='mean').reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "5b78479c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
titleprimary_datasetrun_idChi2Prob_GenTkChi2_GenTk
0EGamma20.49246619.083402
1EGamma30.48628819.146579
2EGamma40.49979119.027085
3EGamma50.48909519.103232
4EGamma60.48394719.190387
...............
691JetHT1540.44208120.097355
692JetHT5420.49527119.129277
693JetHT5430.46920419.599869
694JetHT5440.47870719.342482
695ZeroBias10.49762318.862586
\n", + "

696 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + "title primary_dataset run_id Chi2Prob_GenTk Chi2_GenTk\n", + "0 EGamma 2 0.492466 19.083402\n", + "1 EGamma 3 0.486288 19.146579\n", + "2 EGamma 4 0.499791 19.027085\n", + "3 EGamma 5 0.489095 19.103232\n", + "4 EGamma 6 0.483947 19.190387\n", + ".. ... ... ... ...\n", + "691 JetHT 154 0.442081 20.097355\n", + "692 JetHT 542 0.495271 19.129277\n", + "693 JetHT 543 0.469204 19.599869\n", + "694 JetHT 544 0.478707 19.342482\n", + "695 ZeroBias 1 0.497623 18.862586\n", + "\n", + "[696 rows x 4 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_filtered" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "19e79e5e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['EGamma', 'JetHT', 'ZeroBias'], dtype=object)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_filtered.primary_dataset.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e70de59", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "dad6535a", + "metadata": {}, + "source": [ + "## Testing cross-filtering with altair" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "e2ce2510", + "metadata": {}, + "outputs": [], + "source": [ + "source = df_filtered" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "bb329f3d", + "metadata": {}, + "outputs": [], + "source": [ + "brush_EGamma = alt.selection(type='interval')\n", + "brush_JetHT = alt.selection(type='interval')\n", + "brush_ZeroBias = alt.selection(type='interval')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "b5a40a03", + "metadata": {}, + "outputs": [], + "source": [ + "EGamma = alt.Chart(source[source['primary_dataset']=='EGamma']).mark_point().encode(\n", + " x='Chi2Prob_GenTk:Q',\n", + " y='Chi2_GenTk:Q',\n", + " color=alt.condition(brush_EGamma|brush_JetHT|brush_ZeroBias, 'primary_dataset', alt.value('lightgray'))\n", + ").add_selection(\n", + " brush_EGamma\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "28351c10", + "metadata": {}, + "outputs": [], + "source": [ + "JetHT = alt.Chart(source[source['primary_dataset']=='JetHT']).mark_point().encode(\n", + " x='Chi2Prob_GenTk:Q',\n", + " y='Chi2_GenTk:Q',\n", + " color=alt.condition(brush_EGamma|brush_JetHT|brush_ZeroBias, 'primary_dataset', alt.value('lightgray'))\n", + ").add_selection(\n", + " brush_JetHT\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "bc80295e", + "metadata": {}, + "outputs": [], + "source": [ + "ZeroBias = alt.Chart(source[source['primary_dataset']=='ZeroBias']).mark_point().encode(\n", + " x='Chi2Prob_GenTk:Q',\n", + " y='Chi2_GenTk:Q',\n", + " color=alt.condition(brush_EGamma|brush_JetHT|brush_ZeroBias, 'primary_dataset', alt.value('lightgray'))\n", + ").add_selection(\n", + " brush_ZeroBias\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "7f3a3588", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.VConcatChart(...)" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "EGamma & JetHT & ZeroBias" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b74199fb", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/scripts/project_notebook.sh b/scripts/project_notebook.sh new file mode 100644 index 0000000..480d4ef --- /dev/null +++ b/scripts/project_notebook.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +cd .. + +./manage.py shell_plus --notebook diff --git a/scripts/project_visualization.sh b/scripts/project_visualization.sh index bb368d8..f9e11d0 100644 --- a/scripts/project_visualization.sh +++ b/scripts/project_visualization.sh @@ -1,3 +1,5 @@ +#!/bin/bash + cd .. ./manage.py graph_models -a -g -o ad_project.png diff --git a/scripts/step1_extract_runs.sh b/scripts/step1_extract_runs.sh index 2e03b75..f6f6c2f 100644 --- a/scripts/step1_extract_runs.sh +++ b/scripts/step1_extract_runs.sh @@ -3,7 +3,7 @@ cd .. # testing one file -python manage.py extract_runs /eos/project/c/cmsml4dc/ML_2020/PerRun_UL2018_Data/ZeroBias_315257_UL2018.csv +./manage.py extract_runs /eos/project/c/cmsml4dc/ML_2020/PerRun_UL2018_Data/ZeroBias_315257_UL2018.csv FILES="/eos/project/c/cmsml4dc/ML_2020/PerRun_UL2018_Data/ZeroBia*" @@ -12,5 +12,5 @@ for f in $FILES do echo "Processing $f file..." - #python manage.py extract_runs $f + #./manage.py extract_runs $f done diff --git a/scripts/step2_extract_run_histos.sh b/scripts/step2_extract_run_histos.sh index edb4ca8..9810646 100644 --- a/scripts/step2_extract_run_histos.sh +++ b/scripts/step2_extract_run_histos.sh @@ -3,7 +3,7 @@ cd .. # testing one file -python manage.py extract_run_histos /eos/project/c/cmsml4dc/ML_2020/PerRun_UL2018_Data/ZeroBias_315257_UL2018.csv +#./manage.py extract_run_histos /eos/project/c/cmsml4dc/ML_2020/PerRun_UL2018_Data/ZeroBias_315257_UL2018.csv FILES="/eos/project/c/cmsml4dc/ML_2020/PerRun_UL2018_Data/*" @@ -12,5 +12,5 @@ for f in $FILES do echo "Processing $f file..." - python manage.py extract_run_histos $f + ./python manage.py extract_run_histos $f done diff --git a/scripts/step3_extract_lumisections.sh b/scripts/step3_extract_lumisections.sh index 502b658..24e0432 100644 --- a/scripts/step3_extract_lumisections.sh +++ b/scripts/step3_extract_lumisections.sh @@ -3,7 +3,7 @@ cd .. # testing one file -python manage.py extract_lumisections /eos/project/c/cmsml4dc/ML_2020/UL2018_Data/DF2018A_1D_Complete/ZeroBias_2018A_DataFrame_1D_1.csv +./manage.py extract_lumisections /eos/project/c/cmsml4dc/ML_2020/UL2018_Data/DF2018A_1D_Complete/ZeroBias_2018A_DataFrame_1D_1.csv FILES="/eos/project/c/cmsml4dc/ML_2020/UL2018_Data/DF2018A_1D_Complete/*" @@ -12,5 +12,5 @@ for f in $FILES do echo "Processing $f file..." - #python manage.py extract_lumisections $f + #./manage.py extract_lumisections $f done From fcc523c10a58d97688e4d20b4e8117ad6f97f159 Mon Sep 17 00:00:00 2001 From: Xavier Coubez Date: Tue, 26 Oct 2021 19:16:15 +0200 Subject: [PATCH 6/6] Finishing prototype of runhistos view --- notebook.ipynb | 334 ++++++++++++---------- run_histos/templates/run_histos/main.html | 6 +- run_histos/urls.py | 4 +- run_histos/views.py | 54 +++- runs/templates/runs/main.html | 8 + runs/templates/runs/run.html | 9 + runs/urls.py | 5 +- runs/views.py | 23 +- scripts/step2_extract_run_histos.sh | 4 +- 9 files changed, 262 insertions(+), 185 deletions(-) create mode 100644 runs/templates/runs/run.html diff --git a/notebook.ipynb b/notebook.ipynb index 3c11177..edca606 100644 --- a/notebook.ipynb +++ b/notebook.ipynb @@ -73,8 +73,8 @@ "data": { "text/plain": [ "{'id': 1,\n", - " 'run_number': 315257,\n", - " 'date': datetime.datetime(2021, 10, 25, 15, 37, 22, 290978, tzinfo=)}" + " 'run_number': 315258,\n", + " 'date': datetime.datetime(2021, 10, 26, 11, 37, 39, 211844, tzinfo=)}" ] }, "execution_count": 5, @@ -107,15 +107,15 @@ "text/plain": [ "{'id': 1,\n", " 'run_id': 1,\n", - " 'date': datetime.datetime(2021, 10, 25, 15, 37, 22, 710167, tzinfo=),\n", - " 'primary_dataset': 'ZeroBias',\n", + " 'date': datetime.datetime(2021, 10, 26, 11, 37, 48, 578496, tzinfo=),\n", + " 'primary_dataset': 'EGamma',\n", " 'path': 'TrackParameters/generalTracks/GeneralProperties',\n", " 'title': 'AbsDistanceOfClosestApproachToBS_GenTk',\n", - " 'entries': 34430387,\n", - " 'mean': 0.4408986607028722,\n", - " 'rms': 2.7660848356477787,\n", - " 'skewness': 3.1844432366039466,\n", - " 'kurtosis': 35.49796721670949}" + " 'entries': 786498,\n", + " 'mean': 0.4792563974038401,\n", + " 'rms': 2.9386499655611944,\n", + " 'skewness': 2.917119692221068,\n", + " 'kurtosis': 29.826709167508028}" ] }, "execution_count": 7, @@ -198,71 +198,71 @@ " 0\n", " 1\n", " 1\n", - " 2021-10-25 15:37:22.710167+00:00\n", - " ZeroBias\n", + " 2021-10-26 11:37:48.578496+00:00\n", + " EGamma\n", " TrackParameters/generalTracks/GeneralProperties\n", " AbsDistanceOfClosestApproachToBS_GenTk\n", - " 34430387\n", - " 0.440899\n", - " 2.766085\n", - " 3.184443\n", - " 35.497967\n", + " 786498\n", + " 0.479256\n", + " 2.938650\n", + " 2.917120\n", + " 29.826709\n", " \n", " \n", " 1\n", " 2\n", " 1\n", - " 2021-10-25 15:37:22.710269+00:00\n", - " ZeroBias\n", + " 2021-10-26 11:37:48.578595+00:00\n", + " EGamma\n", " TrackParameters/generalTracks/GeneralProperties\n", " Chi2Prob_GenTk\n", - " 34430387\n", - " 0.497623\n", - " 0.322158\n", - " -0.089739\n", - " -1.339663\n", + " 786498\n", + " 0.492466\n", + " 0.322329\n", + " -0.069946\n", + " -1.345512\n", " \n", " \n", " 2\n", " 3\n", " 1\n", - " 2021-10-25 15:37:22.710310+00:00\n", - " ZeroBias\n", + " 2021-10-26 11:37:48.578635+00:00\n", + " EGamma\n", " TrackParameters/generalTracks/GeneralProperties\n", " Chi2_GenTk\n", - " 34430387\n", - " 18.862586\n", - " 66.964365\n", - " 0.046712\n", - " -2.909550\n", + " 786498\n", + " 19.083402\n", + " 68.698831\n", + " 0.043568\n", + " -2.917017\n", " \n", " \n", " 3\n", " 4\n", " 1\n", - " 2021-10-25 15:37:22.710343+00:00\n", - " ZeroBias\n", + " 2021-10-26 11:37:48.578670+00:00\n", + " EGamma\n", " TrackParameters/generalTracks/GeneralProperties\n", " Chi2oNDFVsEta_ImpactPoint_GenTk\n", - " 34421103\n", - " -0.103396\n", - " 1.586303\n", - " 0.264605\n", - " -0.090844\n", + " 786288\n", + " -0.106133\n", + " 1.589274\n", + " 0.272494\n", + " -0.112022\n", " \n", " \n", " 4\n", " 5\n", " 1\n", - " 2021-10-25 15:37:22.710387+00:00\n", - " ZeroBias\n", + " 2021-10-26 11:37:48.578703+00:00\n", + " EGamma\n", " TrackParameters/generalTracks/GeneralProperties\n", " Chi2oNDFVsNHits_ImpactPoint_GenTk\n", - " 34421103\n", - " 11.321803\n", - " 6.708382\n", - " 38.499120\n", - " 175.574616\n", + " 786288\n", + " 11.331133\n", + " 6.684785\n", + " 37.555632\n", + " 171.534894\n", " \n", " \n", "\n", @@ -270,11 +270,11 @@ ], "text/plain": [ " id run_id date primary_dataset \\\n", - "0 1 1 2021-10-25 15:37:22.710167+00:00 ZeroBias \n", - "1 2 1 2021-10-25 15:37:22.710269+00:00 ZeroBias \n", - "2 3 1 2021-10-25 15:37:22.710310+00:00 ZeroBias \n", - "3 4 1 2021-10-25 15:37:22.710343+00:00 ZeroBias \n", - "4 5 1 2021-10-25 15:37:22.710387+00:00 ZeroBias \n", + "0 1 1 2021-10-26 11:37:48.578496+00:00 EGamma \n", + "1 2 1 2021-10-26 11:37:48.578595+00:00 EGamma \n", + "2 3 1 2021-10-26 11:37:48.578635+00:00 EGamma \n", + "3 4 1 2021-10-26 11:37:48.578670+00:00 EGamma \n", + "4 5 1 2021-10-26 11:37:48.578703+00:00 EGamma \n", "\n", " path \\\n", "0 TrackParameters/generalTracks/GeneralProperties \n", @@ -283,19 +283,19 @@ "3 TrackParameters/generalTracks/GeneralProperties \n", "4 TrackParameters/generalTracks/GeneralProperties \n", "\n", - " title entries mean rms \\\n", - "0 AbsDistanceOfClosestApproachToBS_GenTk 34430387 0.440899 2.766085 \n", - "1 Chi2Prob_GenTk 34430387 0.497623 0.322158 \n", - "2 Chi2_GenTk 34430387 18.862586 66.964365 \n", - "3 Chi2oNDFVsEta_ImpactPoint_GenTk 34421103 -0.103396 1.586303 \n", - "4 Chi2oNDFVsNHits_ImpactPoint_GenTk 34421103 11.321803 6.708382 \n", + " title entries mean rms \\\n", + "0 AbsDistanceOfClosestApproachToBS_GenTk 786498 0.479256 2.938650 \n", + "1 Chi2Prob_GenTk 786498 0.492466 0.322329 \n", + "2 Chi2_GenTk 786498 19.083402 68.698831 \n", + "3 Chi2oNDFVsEta_ImpactPoint_GenTk 786288 -0.106133 1.589274 \n", + "4 Chi2oNDFVsNHits_ImpactPoint_GenTk 786288 11.331133 6.684785 \n", "\n", " skewness kurtosis \n", - "0 3.184443 35.497967 \n", - "1 -0.089739 -1.339663 \n", - "2 0.046712 -2.909550 \n", - "3 0.264605 -0.090844 \n", - "4 38.499120 175.574616 " + "0 2.917120 29.826709 \n", + "1 -0.069946 -1.345512 \n", + "2 0.043568 -2.917017 \n", + "3 0.272494 -0.112022 \n", + "4 37.555632 171.534894 " ] }, "execution_count": 9, @@ -379,38 +379,38 @@ " \n", " \n", " 1\n", - " ZeroBias\n", + " EGamma\n", " 1\n", " Chi2Prob_GenTk\n", - " 0.497623\n", + " 0.492466\n", " \n", " \n", " 2\n", - " ZeroBias\n", + " EGamma\n", " 1\n", " Chi2_GenTk\n", - " 18.862586\n", + " 19.083402\n", " \n", " \n", - " 97\n", + " 84\n", " EGamma\n", " 2\n", " Chi2Prob_GenTk\n", - " 0.492466\n", + " 0.486288\n", " \n", " \n", - " 98\n", + " 85\n", " EGamma\n", " 2\n", " Chi2_GenTk\n", - " 19.083402\n", + " 19.146579\n", " \n", " \n", - " 180\n", + " 167\n", " EGamma\n", " 3\n", " Chi2Prob_GenTk\n", - " 0.486288\n", + " 0.499791\n", " \n", " \n", " ...\n", @@ -420,60 +420,60 @@ " ...\n", " \n", " \n", - " 57534\n", - " JetHT\n", - " 151\n", + " 17182\n", + " ZeroBias\n", + " 49\n", " Chi2_GenTk\n", - " 19.614357\n", + " 19.138309\n", " \n", " \n", - " 57616\n", - " JetHT\n", - " 153\n", + " 17277\n", + " ZeroBias\n", + " 50\n", " Chi2Prob_GenTk\n", - " 0.473647\n", + " 0.505047\n", " \n", " \n", - " 57617\n", - " JetHT\n", - " 153\n", + " 17278\n", + " ZeroBias\n", + " 50\n", " Chi2_GenTk\n", - " 19.476241\n", + " 19.139480\n", " \n", " \n", - " 57699\n", - " JetHT\n", - " 154\n", + " 17373\n", + " ZeroBias\n", + " 51\n", " Chi2Prob_GenTk\n", - " 0.442081\n", + " 0.516412\n", " \n", " \n", - " 57700\n", - " JetHT\n", - " 154\n", + " 17374\n", + " ZeroBias\n", + " 51\n", " Chi2_GenTk\n", - " 20.097355\n", + " 19.086274\n", " \n", " \n", "\n", - "

1392 rows × 4 columns

\n", + "

404 rows × 4 columns

\n", "" ], "text/plain": [ " primary_dataset run_id title mean\n", - "1 ZeroBias 1 Chi2Prob_GenTk 0.497623\n", - "2 ZeroBias 1 Chi2_GenTk 18.862586\n", - "97 EGamma 2 Chi2Prob_GenTk 0.492466\n", - "98 EGamma 2 Chi2_GenTk 19.083402\n", - "180 EGamma 3 Chi2Prob_GenTk 0.486288\n", + "1 EGamma 1 Chi2Prob_GenTk 0.492466\n", + "2 EGamma 1 Chi2_GenTk 19.083402\n", + "84 EGamma 2 Chi2Prob_GenTk 0.486288\n", + "85 EGamma 2 Chi2_GenTk 19.146579\n", + "167 EGamma 3 Chi2Prob_GenTk 0.499791\n", "... ... ... ... ...\n", - "57534 JetHT 151 Chi2_GenTk 19.614357\n", - "57616 JetHT 153 Chi2Prob_GenTk 0.473647\n", - "57617 JetHT 153 Chi2_GenTk 19.476241\n", - "57699 JetHT 154 Chi2Prob_GenTk 0.442081\n", - "57700 JetHT 154 Chi2_GenTk 20.097355\n", + "17182 ZeroBias 49 Chi2_GenTk 19.138309\n", + "17277 ZeroBias 50 Chi2Prob_GenTk 0.505047\n", + "17278 ZeroBias 50 Chi2_GenTk 19.139480\n", + "17373 ZeroBias 51 Chi2Prob_GenTk 0.516412\n", + "17374 ZeroBias 51 Chi2_GenTk 19.086274\n", "\n", - "[1392 rows x 4 columns]" + "[404 rows x 4 columns]" ] }, "execution_count": 12, @@ -532,35 +532,35 @@ " \n", " 0\n", " EGamma\n", - " 2\n", + " 1\n", " 0.492466\n", " 19.083402\n", " \n", " \n", " 1\n", " EGamma\n", - " 3\n", + " 2\n", " 0.486288\n", " 19.146579\n", " \n", " \n", " 2\n", " EGamma\n", - " 4\n", + " 3\n", " 0.499791\n", " 19.027085\n", " \n", " \n", " 3\n", " EGamma\n", - " 5\n", + " 4\n", " 0.489095\n", " 19.103232\n", " \n", " \n", " 4\n", " EGamma\n", - " 6\n", + " 5\n", " 0.483947\n", " 19.190387\n", " \n", @@ -572,60 +572,60 @@ " ...\n", " \n", " \n", - " 691\n", - " JetHT\n", - " 154\n", - " 0.442081\n", - " 20.097355\n", + " 197\n", + " ZeroBias\n", + " 51\n", + " 0.516412\n", + " 19.086274\n", " \n", " \n", - " 692\n", - " JetHT\n", - " 542\n", - " 0.495271\n", - " 19.129277\n", + " 198\n", + " ZeroBias\n", + " 52\n", + " 0.497623\n", + " 18.862586\n", " \n", " \n", - " 693\n", - " JetHT\n", - " 543\n", - " 0.469204\n", - " 19.599869\n", + " 199\n", + " ZeroBias\n", + " 53\n", + " 0.502357\n", + " 18.838694\n", " \n", " \n", - " 694\n", - " JetHT\n", - " 544\n", - " 0.478707\n", - " 19.342482\n", + " 200\n", + " ZeroBias\n", + " 54\n", + " 0.479684\n", + " 19.094168\n", " \n", " \n", - " 695\n", + " 201\n", " ZeroBias\n", - " 1\n", - " 0.497623\n", - " 18.862586\n", + " 55\n", + " 0.487128\n", + " 18.958929\n", " \n", " \n", "\n", - "

696 rows × 4 columns

\n", + "

202 rows × 4 columns

\n", "" ], "text/plain": [ "title primary_dataset run_id Chi2Prob_GenTk Chi2_GenTk\n", - "0 EGamma 2 0.492466 19.083402\n", - "1 EGamma 3 0.486288 19.146579\n", - "2 EGamma 4 0.499791 19.027085\n", - "3 EGamma 5 0.489095 19.103232\n", - "4 EGamma 6 0.483947 19.190387\n", + "0 EGamma 1 0.492466 19.083402\n", + "1 EGamma 2 0.486288 19.146579\n", + "2 EGamma 3 0.499791 19.027085\n", + "3 EGamma 4 0.489095 19.103232\n", + "4 EGamma 5 0.483947 19.190387\n", ".. ... ... ... ...\n", - "691 JetHT 154 0.442081 20.097355\n", - "692 JetHT 542 0.495271 19.129277\n", - "693 JetHT 543 0.469204 19.599869\n", - "694 JetHT 544 0.478707 19.342482\n", - "695 ZeroBias 1 0.497623 18.862586\n", + "197 ZeroBias 51 0.516412 19.086274\n", + "198 ZeroBias 52 0.497623 18.862586\n", + "199 ZeroBias 53 0.502357 18.838694\n", + "200 ZeroBias 54 0.479684 19.094168\n", + "201 ZeroBias 55 0.487128 18.958929\n", "\n", - "[696 rows x 4 columns]" + "[202 rows x 4 columns]" ] }, "execution_count": 14, @@ -646,7 +646,7 @@ { "data": { "text/plain": [ - "array(['EGamma', 'JetHT', 'ZeroBias'], dtype=object)" + "array(['EGamma', 'JetHT', 'SingleMuon', 'ZeroBias'], dtype=object)" ] }, "execution_count": 15, @@ -703,9 +703,13 @@ "metadata": {}, "outputs": [], "source": [ - "EGamma = alt.Chart(source[source['primary_dataset']=='EGamma']).mark_point().encode(\n", - " x='Chi2Prob_GenTk:Q',\n", - " y='Chi2_GenTk:Q',\n", + "EGamma = alt.Chart(source[source['primary_dataset']=='EGamma']).mark_point(clip=True).encode(\n", + " alt.X('Chi2Prob_GenTk:Q',\n", + " scale=alt.Scale(domain=(0.4, 0.6))\n", + " ),\n", + " alt.Y('Chi2_GenTk:Q',\n", + " scale=alt.Scale(domain=(16, 22))\n", + " ),\n", " color=alt.condition(brush_EGamma|brush_JetHT|brush_ZeroBias, 'primary_dataset', alt.value('lightgray'))\n", ").add_selection(\n", " brush_EGamma\n", @@ -719,9 +723,13 @@ "metadata": {}, "outputs": [], "source": [ - "JetHT = alt.Chart(source[source['primary_dataset']=='JetHT']).mark_point().encode(\n", - " x='Chi2Prob_GenTk:Q',\n", - " y='Chi2_GenTk:Q',\n", + "JetHT = alt.Chart(source[source['primary_dataset']=='JetHT']).mark_point(clip=True).encode(\n", + " alt.X('Chi2Prob_GenTk:Q',\n", + " scale=alt.Scale(domain=(0.4, 0.6))\n", + " ),\n", + " alt.Y('Chi2_GenTk:Q',\n", + " scale=alt.Scale(domain=(16, 22))\n", + " ),\n", " color=alt.condition(brush_EGamma|brush_JetHT|brush_ZeroBias, 'primary_dataset', alt.value('lightgray'))\n", ").add_selection(\n", " brush_JetHT\n", @@ -735,9 +743,13 @@ "metadata": {}, "outputs": [], "source": [ - "ZeroBias = alt.Chart(source[source['primary_dataset']=='ZeroBias']).mark_point().encode(\n", - " x='Chi2Prob_GenTk:Q',\n", - " y='Chi2_GenTk:Q',\n", + "ZeroBias = alt.Chart(source[source['primary_dataset']=='ZeroBias']).mark_point(clip=True).encode(\n", + " alt.X('Chi2Prob_GenTk:Q',\n", + " scale=alt.Scale(domain=(0.4, 0.6))\n", + " ),\n", + " alt.Y('Chi2_GenTk:Q',\n", + " scale=alt.Scale(domain=(16, 22))\n", + " ),\n", " color=alt.condition(brush_EGamma|brush_JetHT|brush_ZeroBias, 'primary_dataset', alt.value('lightgray'))\n", ").add_selection(\n", " brush_ZeroBias\n", @@ -754,12 +766,12 @@ "data": { "text/html": [ "\n", - "
\n", + "
\n", "" ], "text/plain": [ @@ -824,6 +836,14 @@ "metadata": {}, "outputs": [], "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a96dbb5", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/run_histos/templates/run_histos/main.html b/run_histos/templates/run_histos/main.html index 8215f50..05910f1 100644 --- a/run_histos/templates/run_histos/main.html +++ b/run_histos/templates/run_histos/main.html @@ -58,7 +58,7 @@

Choosing Dataset / Variable / Plot type


-

Histogram

+

Plot

@@ -68,8 +68,4 @@

Histogram

vegaEmbed("#vis", spec) -
- -{{ df | safe }} - {% endblock content %} diff --git a/run_histos/urls.py b/run_histos/urls.py index ead9ba6..8f9c1be 100644 --- a/run_histos/urls.py +++ b/run_histos/urls.py @@ -1,9 +1,9 @@ from django.urls import path -from .views import chart_select_view, chart_view_altair +from .views import run_histos_view, chart_view_altair app_name = 'run_histos' urlpatterns = [ - path('', chart_select_view, name='main-runhistos-view'), + path('', run_histos_view, name='main-runhistos-view'), path('altair/', chart_view_altair, name='altair-view') ] diff --git a/run_histos/views.py b/run_histos/views.py index b117084..b1c27df 100644 --- a/run_histos/views.py +++ b/run_histos/views.py @@ -7,46 +7,72 @@ import pandas as pd import altair as alt -from vega_datasets import data - # Create your views here. -def chart_select_view(request): +def run_histos_view(request): error_message = None + dataset = None + variable = None + plot_type = None + df = None + chart = {} # objects.all().values() provides a dictionary while objects.all().values_list() provides a tuple runs_df = pd.DataFrame(Run.objects.all().values()) runhistos_df = pd.DataFrame(RunHisto.objects.all().values()) if runhistos_df.shape[0] > 0: - df = pd.merge(runs_df, runhistos_df, left_on='id', right_on='run_id').drop(['id_x', 'id_y', 'run_id', 'date_x', 'date_y'], axis=1).head(20) + df = pd.merge(runs_df, runhistos_df, left_on='id', right_on='run_id').drop(['id_x', 'id_y', 'run_id', 'date_x', 'date_y'], axis=1) if request.method == 'POST': - print(f"request.POST is {request.POST}") dataset = request.POST['dataset'] variable = request.POST['variable'] plot_type = request.POST['plot_type'] print(f"dataset: {dataset} / variable: {variable} / plot_type: {plot_type}") - else: - error_message = "No records in the database" + #df = df.query('primary_dataset.str.lower()=="zerobias" & title.str.lower()=="chi2prob_gentk"') + df = df.query('primary_dataset.str.lower()==@dataset & title.str.lower()==@variable') - chart = alt.Chart(runhistos_df.head(100)).mark_bar().encode( - x='mean', - ).to_json(indent=None) + if plot_type == 'histogram': + chart = alt.Chart(df).mark_bar().encode( + alt.X("mean", bin=True), + y='count()', + ).to_json(indent=None) + elif plot_type == 'time serie': + chart = alt.Chart(df).mark_circle(size=60).encode( + alt.X('run_number', + scale=alt.Scale(domain=(315000, 316000)) # shouldn't be hardcoded + ), + y='mean', + tooltip=['run_number', 'mean'] + ).to_json(indent=None) + else: + print("No chart type was selected.") + + else: + error_message = "No runhistos in the database" context = { 'error_message': error_message, - 'df': df.to_html(), + 'df': df, 'chart' : chart, } return render(request, 'run_histos/main.html', context) def chart_view_altair(request): + + chart = {} + runhistos_df = pd.DataFrame(RunHisto.objects.all().values()).head(100) - chart_obj = alt.Chart(runhistos_df).mark_bar().encode( - x='mean', - ).to_json(indent=None) + + if runhistos_df.shape[0] > 0: + chart_obj = alt.Chart(runhistos_df).mark_bar().encode( + x='mean', + ).to_json(indent=None) + + else: + print("No runshistos in the database") + return JsonResponse(chart_obj) diff --git a/runs/templates/runs/main.html b/runs/templates/runs/main.html index a3a4c6d..e253505 100644 --- a/runs/templates/runs/main.html +++ b/runs/templates/runs/main.html @@ -3,5 +3,13 @@ {% block title %} Runs {% endblock title %} {% block content %} + +{% if error_message %} +{{ error_message }} +{% endif %} + +{% if runs %} {{ runs | safe }} +{% endif %} + {% endblock content %} diff --git a/runs/templates/runs/run.html b/runs/templates/runs/run.html new file mode 100644 index 0000000..380acae --- /dev/null +++ b/runs/templates/runs/run.html @@ -0,0 +1,9 @@ +{% extends "base.html" %} + +{% block title %} Run view {% endblock title %} + +{% block content %} + +Specific view of run number ... + +{% endblock content %} diff --git a/runs/urls.py b/runs/urls.py index 779c8be..8080340 100644 --- a/runs/urls.py +++ b/runs/urls.py @@ -1,8 +1,9 @@ from django.urls import path -from .views import chart_select_view +from .views import runs_view, run_view app_name = 'runs' urlpatterns = [ - path('', chart_select_view, name='main-runs-view') + path('', runs_view, name='main-runs-view'), + path('run/', run_view, name='main-run-view') ] diff --git a/runs/views.py b/runs/views.py index 9fd72cf..a1ce08c 100644 --- a/runs/views.py +++ b/runs/views.py @@ -5,10 +5,27 @@ # Create your views here. -def chart_select_view(request): +def runs_view(request): + + error_message = None + df = None + # objects.all().values() provides a dictionary while objects.all().values_list() provides a tuple - runs_df = pd.DataFrame(Run.objects.all().values()).drop(['id'], axis=1) + runs_df = pd.DataFrame(Run.objects.all().values()) + + if runs_df.shape[0] > 0: + df = runs_df.drop(['id'], axis=1) + + else: + error_message = "No runs in the database" + context = { - 'runs': runs_df.to_html(), + 'error_message': error_message, + 'runs': df, } return render(request, 'runs/main.html', context) + + +def run_view(request): + return render(request, 'runs/run.html') + diff --git a/scripts/step2_extract_run_histos.sh b/scripts/step2_extract_run_histos.sh index 9810646..18a461c 100644 --- a/scripts/step2_extract_run_histos.sh +++ b/scripts/step2_extract_run_histos.sh @@ -5,12 +5,12 @@ cd .. # testing one file #./manage.py extract_run_histos /eos/project/c/cmsml4dc/ML_2020/PerRun_UL2018_Data/ZeroBias_315257_UL2018.csv -FILES="/eos/project/c/cmsml4dc/ML_2020/PerRun_UL2018_Data/*" +FILES="/eos/project/c/cmsml4dc/ML_2020/PerRun_UL2018_Data/*_315*" # extracting from all files for f in $FILES do echo "Processing $f file..." - ./python manage.py extract_run_histos $f + ./manage.py extract_run_histos $f done