Merge 5d3dec5 into cb471c3

uzh-bf · Dec 6, 2024 · eb0b369 · eb0b369
2 parents cb471c3 + 5d3dec5
commit eb0b369
Show file tree

Hide file tree

Showing 10 changed files with 372 additions and 14 deletions.
diff --git a/apps/analytics/src/modules/__init__.py b/apps/analytics/src/modules/__init__.py
@@ -1,14 +1,6 @@
-from .participant_analytics import compute_correctness, get_participant_responses
-from .aggregated_analytics import compute_aggregated_analytics
-from .participant_course_analytics import (
-    get_running_past_courses,
-    get_active_weeks,
-    compute_participant_activity,
-    save_participant_course_analytics,
-)
-from .aggregated_course_analytics import compute_weekday_activity
-from .participant_performance import (
-    compute_response_error_rates,
-    compute_performance_levels,
-    save_participant_performance,
-)
+from .participant_analytics import *
+from .aggregated_analytics import *
+from .participant_course_analytics import *
+from .aggregated_course_analytics import *
+from .participant_performance import *
+from .instance_activity_performance import *
diff --git a/apps/analytics/src/modules/instance_activity_performance/__init__.py b/apps/analytics/src/modules/instance_activity_performance/__init__.py
@@ -0,0 +1,5 @@
+from .get_course_activities import get_course_activities
+from .compute_instance_performance import compute_instance_performance
+from .agg_activity_performance import agg_activity_performance
+from .save_instance_performances import save_instance_performances
+from .save_activity_performance import save_activity_performance
diff --git a/apps/analytics/src/modules/instance_activity_performance/agg_activity_performance.py b/apps/analytics/src/modules/instance_activity_performance/agg_activity_performance.py
@@ -0,0 +1,6 @@
+def agg_activity_performance(df_instance_performance):
+    activity_performance = df_instance_performance.mean()
+    activity_performance.drop("instanceId", inplace=True)
+    activity_performance.to_dict()
+
+    return activity_performance
diff --git a/apps/analytics/src/modules/instance_activity_performance/compute_instance_performance.py b/apps/analytics/src/modules/instance_activity_performance/compute_instance_performance.py
@@ -0,0 +1,110 @@
+import pandas as pd
+
+
+def compute_instance_performance(db, activity, total_only=False):
+    # initialize dataframes for performance tracking
+    df_instance_performance = pd.DataFrame(
+        columns=[
+            "instanceId",
+            "responseCount",
+            "firstErrorRate",
+            "firstPartialRate",
+            "firstCorrectRate",
+            "lastErrorRate",
+            "lastPartialRate",
+            "lastCorrectRate",
+            "totalErrorRate",
+            "totalPartialRate",
+            "totalCorrectRate",
+        ]
+    )
+
+    for stack in activity["stacks"]:
+        for instance in stack["elements"]:
+            df_responses = pd.DataFrame(instance["responses"])
+
+            if df_responses.empty:
+                continue
+
+            # count number of responses
+            num_responses = len(df_responses)
+
+            if not total_only:
+                # compute correctness rates for first and last response
+                first_error_rate = (
+                    df_responses["firstResponseCorrectness"]
+                    .value_counts()
+                    .get("WRONG", 0)
+                    / num_responses
+                )
+                first_partial_rate = (
+                    df_responses["firstResponseCorrectness"]
+                    .value_counts()
+                    .get("PARTIAL", 0)
+                    / num_responses
+                )
+                first_correct_rate = (
+                    df_responses["firstResponseCorrectness"]
+                    .value_counts()
+                    .get("CORRECT", 0)
+                    / num_responses
+                )
+                last_error_rate = (
+                    df_responses["lastResponseCorrectness"]
+                    .value_counts()
+                    .get("WRONG", 0)
+                    / num_responses
+                )
+                last_partial_rate = (
+                    df_responses["lastResponseCorrectness"]
+                    .value_counts()
+                    .get("PARTIAL", 0)
+                    / num_responses
+                )
+                last_correct_rate = (
+                    df_responses["lastResponseCorrectness"]
+                    .value_counts()
+                    .get("CORRECT", 0)
+                    / num_responses
+                )
+
+            # compute total correctness rates
+            df_responses["responseErrorRate"] = (
+                df_responses["wrongCount"] / df_responses["trialsCount"]
+            )
+            df_responses["responsePartialRate"] = (
+                df_responses["partialCorrectCount"] / df_responses["trialsCount"]
+            )
+            df_responses["responseCorrectRate"] = (
+                df_responses["correctCount"] / df_responses["trialsCount"]
+            )
+            total_error_rate = df_responses["responseErrorRate"].mean()
+            total_partial_rate = df_responses["responsePartialRate"].mean()
+            total_correct_rate = df_responses["responseCorrectRate"].mean()
+
+            # append instance values to dataframe
+            instance_performance = {
+                "instanceId": instance["id"],
+                "responseCount": num_responses,
+                "totalErrorRate": total_error_rate,
+                "totalPartialRate": total_partial_rate,
+                "totalCorrectRate": total_correct_rate,
+            }
+
+            if not total_only:
+                instance_performance.update(
+                    {
+                        "firstErrorRate": first_error_rate,
+                        "firstPartialRate": first_partial_rate,
+                        "firstCorrectRate": first_correct_rate,
+                        "lastErrorRate": last_error_rate,
+                        "lastPartialRate": last_partial_rate,
+                        "lastCorrectRate": last_correct_rate,
+                    }
+                )
+
+            df_instance_performance.loc[len(df_instance_performance)] = (
+                instance_performance
+            )
+
+    return df_instance_performance
diff --git a/apps/analytics/src/modules/instance_activity_performance/get_course_activities.py b/apps/analytics/src/modules/instance_activity_performance/get_course_activities.py
@@ -0,0 +1,14 @@
+def get_course_activities(db, course_id):
+    pqs = db.practicequiz.find_many(
+        where={"courseId": course_id},
+        include={"stacks": {"include": {"elements": {"include": {"responses": True}}}}},
+    )
+    pqs = list(map(lambda x: x.dict(), pqs))
+
+    mls = db.microlearning.find_many(
+        where={"courseId": course_id},
+        include={"stacks": {"include": {"elements": {"include": {"responses": True}}}}},
+    )
+    mls = list(map(lambda x: x.dict(), mls))
+
+    return pqs, mls
diff --git a/apps/analytics/src/modules/instance_activity_performance/save_activity_performance.py b/apps/analytics/src/modules/instance_activity_performance/save_activity_performance.py
@@ -0,0 +1,41 @@
+def save_activity_performance(
+    db, activity_performance, course_id, practice_quiz_id=None, microlearning_id=None
+):
+    values = {
+        "totalErrorRate": activity_performance.totalErrorRate,
+        "totalPartialRate": activity_performance.totalPartialRate,
+        "totalCorrectRate": activity_performance.totalCorrectRate,
+    }
+
+    if practice_quiz_id is not None:
+        values.update(
+            {
+                "firstErrorRate": activity_performance.firstErrorRate,
+                "firstPartialRate": activity_performance.firstPartialRate,
+                "firstCorrectRate": activity_performance.firstCorrectRate,
+                "lastErrorRate": activity_performance.lastErrorRate,
+                "lastPartialRate": activity_performance.lastPartialRate,
+                "lastCorrectRate": activity_performance.lastCorrectRate,
+            }
+        )
+
+        create_values = values.copy()
+        create_values["practiceQuiz"] = {"connect": {"id": practice_quiz_id}}
+        create_values["course"] = {"connect": {"id": course_id}}
+        where_clause = {"practiceQuizId": practice_quiz_id}
+
+    elif microlearning_id is not None:
+        create_values = values.copy()
+        create_values["microLearning"] = {"connect": {"id": microlearning_id}}
+        create_values["course"] = {"connect": {"id": course_id}}
+        where_clause = {"microLearningId": microlearning_id}
+
+    else:
+        raise ValueError(
+            "Either practice_quiz_id or microlearning_id must be provided for activity performance creation/update"
+        )
+
+    db.activityperformance.upsert(
+        where=where_clause,
+        data={"create": create_values, "update": values},
+    )
diff --git a/apps/analytics/src/modules/instance_activity_performance/save_instance_performances.py b/apps/analytics/src/modules/instance_activity_performance/save_instance_performances.py
@@ -0,0 +1,43 @@
+def save_instance_performances(
+    db, df_instance_performance, course_id, total_only=False
+):
+    for _, row in df_instance_performance.iterrows():
+        # extract values from dataframe
+        values = {
+            "responseCount": row["responseCount"],
+            "totalErrorRate": row["totalErrorRate"],
+            "totalPartialRate": row["totalPartialRate"],
+            "totalCorrectRate": row["totalCorrectRate"],
+        }
+
+        # only define first and last response rates if applicable
+        if not total_only:
+            values.update(
+                {
+                    "firstErrorRate": row["firstErrorRate"],
+                    "firstPartialRate": row["firstPartialRate"],
+                    "firstCorrectRate": row["firstCorrectRate"],
+                    "lastErrorRate": row["lastErrorRate"],
+                    "lastPartialRate": row["lastPartialRate"],
+                    "lastCorrectRate": row["lastCorrectRate"],
+                }
+            )
+
+        # add relational links during creation
+        create_values = values.copy()
+        create_values.update(
+            {
+                "instance": {"connect": {"id": row["instanceId"]}},
+                "course": {"connect": {"id": course_id}},
+            }
+        )
+
+        db.instanceperformance.upsert(
+            where={
+                "instanceId": row["instanceId"],
+            },
+            data={
+                "create": create_values,
+                "update": values,
+            },
+        )
diff --git a/apps/analytics/src/notebooks/instance_activity_performance.ipynb b/apps/analytics/src/notebooks/instance_activity_performance.ipynb
@@ -0,0 +1,144 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Preparation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import json\n",
+    "from datetime import datetime\n",
+    "from prisma import Prisma\n",
+    "import pandas as pd\n",
+    "import sys\n",
+    "\n",
+    "# set the python path correctly for module imports to work\n",
+    "sys.path.append(\"../../\")\n",
+    "\n",
+    "from src.modules.participant_course_analytics.get_running_past_courses import (\n",
+    "    get_running_past_courses,\n",
+    ")\n",
+    "from src.modules.instance_activity_performance.get_course_activities import get_course_activities\n",
+    "from src.modules.instance_activity_performance.compute_instance_performance import compute_instance_performance\n",
+    "from src.modules.instance_activity_performance.agg_activity_performance import agg_activity_performance\n",
+    "from src.modules.instance_activity_performance.save_instance_performances import save_instance_performances\n",
+    "from src.modules.instance_activity_performance.save_activity_performance import save_activity_performance"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "db = Prisma()\n",
+    "\n",
+    "# set the environment variable DATABASE_URL to the connection string of your database\n",
+    "os.environ[\"DATABASE_URL\"] = \"postgresql://klicker:klicker@localhost:5432/klicker-prod\"\n",
+    "\n",
+    "db.connect()\n",
+    "\n",
+    "# Script settings\n",
+    "verbose = False"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Compute Participant Performance"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Fetch all courses from the database\n",
+    "df_courses = get_running_past_courses(db)\n",
+    "\n",
+    "# Iterate over the course and fetch all question responses linked to it\n",
+    "for idx, course in df_courses.iterrows():\n",
+    "    course_id = course[\"id\"]\n",
+    "    print(f\"Processing course\", idx, \"of\", len(df_courses), \"with id\", course_id)\n",
+    "\n",
+    "    # fetch all practice quizzes and microlearnings linked to the course\n",
+    "    pqs, mls = get_course_activities(db, course_id)\n",
+    "\n",
+    "    for quiz in pqs:\n",
+    "        # compute instance performances\n",
+    "        df_instance_performance = compute_instance_performance(db, quiz)\n",
+    "\n",
+    "        # if no instances with values were found, skip the activity\n",
+    "        if df_instance_performance.empty:\n",
+    "            continue\n",
+    "\n",
+    "        # compute the activity performance by aggregating the all instance performances\n",
+    "        activity_performance = agg_activity_performance(df_instance_performance)\n",
+    "\n",
+    "        # save instance performance data\n",
+    "        save_instance_performances(db, df_instance_performance, course_id)\n",
+    "\n",
+    "        # save activity performance data\n",
+    "        save_activity_performance(db, activity_performance, course_id, practice_quiz_id=quiz[\"id\"])\n",
+    "\n",
+    "    for ml in mls:\n",
+    "        # compute instance performances\n",
+    "        df_instance_performance = compute_instance_performance(db, ml, total_only=True)\n",
+    "\n",
+    "        # if no instances with values were found, skip the activity\n",
+    "        if df_instance_performance.empty:\n",
+    "            continue\n",
+    "\n",
+    "        # compute the activity performance by aggregating the all instance performances\n",
+    "        activity_performance = agg_activity_performance(df_instance_performance)\n",
+    "\n",
+    "        # save instance performance data\n",
+    "        save_instance_performances(db, df_instance_performance, course_id, total_only=True)\n",
+    "\n",
+    "        # save activity performance data\n",
+    "        save_activity_performance(db, activity_performance, course_id, microlearning_id=ml[\"id\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Disconnect from the database\n",
+    "db.disconnect()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "analytics-fkWWeYLw-py3.12",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/...lytics_performance_progress/migration.sql → ...lytics_performance_progress/migration.sql b/...lytics_performance_progress/migration.sql → ...lytics_performance_progress/migration.sql
@@ -21,6 +21,7 @@ CREATE TABLE "ParticipantPerformance" (
 -- CreateTable
 CREATE TABLE "InstancePerformance" (
     "id" SERIAL NOT NULL,
+    "responseCount" INTEGER NOT NULL,
     "firstErrorRate" REAL,
     "firstPartialRate" REAL,
     "firstCorrectRate" REAL,