enhance: add first and last correctness counts to course-wide analyti…

…cs and extend database
uzh-bf · Aug 27, 2024 · c546071 · c546071
1 parent ca56973
commit c546071
Show file tree

Hide file tree

Showing 6 changed files with 220 additions and 43 deletions.
diff --git a/apps/analytics/src/modules/participant_analytics/aggregate_analytics.py b/apps/analytics/src/modules/participant_analytics/aggregate_analytics.py
@@ -1,7 +1,7 @@
 import pandas as pd
 
 
-def aggregate_analytics(df_details, verbose=False):
+def aggregate_analytics(df_details, df_course_responses=None):
     # Aggregate the question response details for the participant and course level
     df_analytics_counts = (
         df_details.groupby(["participantId", "courseId"])
@@ -93,9 +93,53 @@ def aggregate_analytics(df_details, verbose=False):
         }
     )
 
+    df_course_analytics = None
+    if df_course_responses is not None:
+        # Count entries where firstResponseCorrectness is 'CORRECT', 'WRONG' and lastResponseCorrectness is 'CORRECT', 'WRONG' into separate columns - grouped by participantId and courseId
+        df_course_analytics = (
+            df_course_responses.groupby(["participantId", "courseId"])
+            .agg(
+                {
+                    "firstResponseCorrectness": [
+                        ("correct", lambda x: (x == "CORRECT").sum()),
+                        ("wrong", lambda x: (x == "WRONG").sum()),
+                    ],
+                    "lastResponseCorrectness": [
+                        ("correct", lambda x: (x == "CORRECT").sum()),
+                        ("wrong", lambda x: (x == "WRONG").sum()),
+                    ],
+                }
+            )
+            .reset_index()
+        )
+        df_course_analytics.columns = df_course_analytics.columns.map(
+            "_".join
+        ).str.strip("_")
+        df_course_analytics = df_course_analytics.rename(
+            columns={
+                "firstResponseCorrectness_correct": "firstCorrectCount",
+                "firstResponseCorrectness_wrong": "firstWrongCount",
+                "lastResponseCorrectness_correct": "lastCorrectCount",
+                "lastResponseCorrectness_wrong": "lastWrongCount",
+            }
+        )
+
     # Combine the analytics counts and correctness dataframes based on the unique participantId and courseId combinations
-    df_analytics = pd.merge(
-        df_analytics_counts, df_analytics_correctness, on=["participantId", "courseId"]
-    )
+    if df_course_analytics is None:
+        df_analytics = pd.merge(
+            df_analytics_counts,
+            df_analytics_correctness,
+            on=["participantId", "courseId"],
+        )
+    else:
+        df_analytics = pd.merge(
+            df_analytics_counts,
+            pd.merge(
+                df_analytics_correctness,
+                df_course_analytics,
+                on=["participantId", "courseId"],
+            ),
+            on=["participantId", "courseId"],
+        )
 
     return df_analytics
diff --git a/apps/analytics/src/modules/participant_analytics/compute_participant_analytics.py b/apps/analytics/src/modules/participant_analytics/compute_participant_analytics.py
@@ -19,9 +19,7 @@ def compute_participant_analytics(
         return
 
     # Compute participant analytics (score/xp counts and correctness statistics)
-    df_analytics = aggregate_analytics(df_details, verbose)
-    if verbose:
-        df_analytics.head()
+    df_analytics = aggregate_analytics(df_details)
 
     # Save the aggreagted analytics into the database
     save_participant_analytics(db, df_analytics, timestamp, analytics_type)

diff --git a/apps/analytics/src/modules/participant_analytics/compute_participant_course_analytics.py b/apps/analytics/src/modules/participant_analytics/compute_participant_course_analytics.py
@@ -31,7 +31,8 @@ def compute_participant_course_analytics(db, df_courses, verbose=False):
                                     "lte": course_end_date,
                                 }
                             },
-                        }
+                        },
+                        "questionResponses": True,
                     }
                 }
             },
@@ -45,14 +46,32 @@ def compute_participant_course_analytics(db, df_courses, verbose=False):
                 participations_dict,
             )
         )
+        responses_dict = list(
+            map(lambda x: x["participant"]["questionResponses"], participations_dict)
+        )
+
         details = [item for sublist in details_dict for item in sublist]
-        if len(details) == 0:
+        responses = [item for sublist in responses_dict for item in sublist]
+        if len(details) == 0 or len(responses) == 0:
             courses_without_responses += 1
-            print("No detail responses found for course {}".format(course_id))
+            print(
+                "No detail responses or response entries found for course {}".format(
+                    course_id
+                )
+            )
             continue
 
-        # Create pandas dataframe containing all question response details
+        # Create pandas dataframe containing all question responses and details
         df_details = pd.DataFrame(details)
+        df_responses = pd.DataFrame(responses)
+        df_responses = df_responses[
+            [
+                "courseId",
+                "participantId",
+                "firstResponseCorrectness",
+                "lastResponseCorrectness",
+            ]
+        ]
 
         # Add the course start and end dates to the dataframe
         df_details["course_start_date"] = course_start_date
@@ -71,9 +90,7 @@ def compute_participant_course_analytics(db, df_courses, verbose=False):
             continue
 
         # Compute participant analytics (score/xp counts and correctness statistics)
-        df_analytics = aggregate_analytics(df_details, verbose)
-        if verbose:
-            df_analytics.head()
+        df_analytics = aggregate_analytics(df_details, df_responses)
 
         # Save the aggreagted analytics into the database
         end_curr_date = datetime.now().strftime("%Y-%m-%d") + "T23:59:59.999Z"

diff --git a/apps/analytics/src/modules/participant_analytics/save_participant_analytics.py b/apps/analytics/src/modules/participant_analytics/save_participant_analytics.py
@@ -1,30 +1,92 @@
+from datetime import datetime
+
+
 def save_participant_analytics(db, df_analytics, timestamp, analytics_type="DAILY"):
-    # Create daily analytics entries for all participants
-    for _, row in df_analytics.iterrows():
-        db.participantanalytics.upsert(
-            where={
-                "type_courseId_participantId_timestamp": {
-                    "type": analytics_type,
-                    "courseId": row["courseId"],
-                    "participantId": row["participantId"],
-                    "timestamp": timestamp,
-                }
-            },
-            data={
-                "create": {
-                    "type": analytics_type,
-                    "timestamp": timestamp,
-                    "trialsCount": row["trialsCount"],
-                    "responseCount": row["responseCount"],
-                    "totalScore": row["totalScore"],
-                    "totalPoints": row["totalPoints"],
-                    "totalXp": row["totalXp"],
-                    "meanCorrectCount": row["meanCorrectCount"],
-                    "meanPartialCorrectCount": row["meanPartialCount"],
-                    "meanWrongCount": row["meanWrongCount"],
-                    "participant": {"connect": {"id": row["participantId"]}},
-                    "course": {"connect": {"id": row["courseId"]}},
+    computedAt = datetime.now().strftime("%Y-%m-%d") + "T00:00:00.000Z"
+
+    # Create daily / weekly / monthly analytics entries for all participants
+    if analytics_type in ["DAILY", "WEEKLY", "MONTHLY"]:
+        for _, row in df_analytics.iterrows():
+            db.participantanalytics.upsert(
+                where={
+                    "type_courseId_participantId_timestamp": {
+                        "type": analytics_type,
+                        "courseId": row["courseId"],
+                        "participantId": row["participantId"],
+                        "timestamp": timestamp,
+                    }
                 },
-                "update": {},
-            },
-        )
+                data={
+                    "create": {
+                        "type": analytics_type,
+                        "timestamp": timestamp,
+                        "computedAt": computedAt,
+                        "trialsCount": row["trialsCount"],
+                        "responseCount": row["responseCount"],
+                        "totalScore": row["totalScore"],
+                        "totalPoints": row["totalPoints"],
+                        "totalXp": row["totalXp"],
+                        "meanCorrectCount": row["meanCorrectCount"],
+                        "meanPartialCorrectCount": row["meanPartialCount"],
+                        "meanWrongCount": row["meanWrongCount"],
+                        "participant": {"connect": {"id": row["participantId"]}},
+                        "course": {"connect": {"id": row["courseId"]}},
+                    },
+                    "update": {},
+                },
+            )
+
+    # Create or update course-wide analytics entries (should be unique for participant / course combination)
+    elif analytics_type == "COURSE":
+        timestamp_const = "1970-01-01T00:00:00.000Z"
+        for _, row in df_analytics.iterrows():
+            db.participantanalytics.upsert(
+                where={
+                    "type_courseId_participantId_timestamp": {
+                        "type": analytics_type,
+                        "courseId": row["courseId"],
+                        "participantId": row["participantId"],
+                        "timestamp": timestamp_const,
+                    }
+                },
+                data={
+                    "create": {
+                        "type": "COURSE",
+                        "timestamp": timestamp_const,
+                        "computedAt": computedAt,
+                        "trialsCount": row["trialsCount"],
+                        "responseCount": row["responseCount"],
+                        "totalScore": row["totalScore"],
+                        "totalPoints": row["totalPoints"],
+                        "totalXp": row["totalXp"],
+                        "meanCorrectCount": row["meanCorrectCount"],
+                        "meanPartialCorrectCount": row["meanPartialCount"],
+                        "meanWrongCount": row["meanWrongCount"],
+                        "firstCorrectCount": row["firstCorrectCount"],
+                        "firstWrongCount": row["firstWrongCount"],
+                        "lastCorrectCount": row["lastCorrectCount"],
+                        "lastWrongCount": row["lastWrongCount"],
+                        "participant": {"connect": {"id": row["participantId"]}},
+                        "course": {"connect": {"id": row["courseId"]}},
+                    },
+                    "update": {
+                        "timestamp": timestamp_const,
+                        "computedAt": computedAt,
+                        "trialsCount": row["trialsCount"],
+                        "responseCount": row["responseCount"],
+                        "totalScore": row["totalScore"],
+                        "totalPoints": row["totalPoints"],
+                        "totalXp": row["totalXp"],
+                        "meanCorrectCount": row["meanCorrectCount"],
+                        "meanPartialCorrectCount": row["meanPartialCount"],
+                        "meanWrongCount": row["meanWrongCount"],
+                        "firstCorrectCount": row["firstCorrectCount"],
+                        "firstWrongCount": row["firstWrongCount"],
+                        "lastCorrectCount": row["lastCorrectCount"],
+                        "lastWrongCount": row["lastWrongCount"],
+                    },
+                },
+            )
+
+    else:
+        raise ValueError("Unknown analytics type: {}".format(analytics_type))
diff --git a/packages/prisma/src/prisma/migrations/20240827152536_computed_at_analytics/migration.sql b/packages/prisma/src/prisma/migrations/20240827152536_computed_at_analytics/migration.sql
@@ -0,0 +1,36 @@
+/*
+  Warnings:
+
+  - Added the required column `updatedAt` to the `AggregatedAnalytics` table without a default value. This is not possible if the table is not empty.
+  - Added the required column `updatedAt` to the `AggregatedCompetencyAnalytics` table without a default value. This is not possible if the table is not empty.
+  - Added the required column `updatedAt` to the `AggregatedCourseAnalytics` table without a default value. This is not possible if the table is not empty.
+  - Added the required column `updatedAt` to the `CompetencyAnalytics` table without a default value. This is not possible if the table is not empty.
+  - Added the required column `updatedAt` to the `ParticipantAnalytics` table without a default value. This is not possible if the table is not empty.
+  - Added the required column `updatedAt` to the `ParticipantCourseAnalytics` table without a default value. This is not possible if the table is not empty.
+
+*/
+-- AlterTable
+ALTER TABLE "AggregatedAnalytics" ADD COLUMN     "computedAt" DATE NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ADD COLUMN     "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ADD COLUMN     "updatedAt" TIMESTAMP(3) NOT NULL;
+
+-- AlterTable
+ALTER TABLE "AggregatedCompetencyAnalytics" ADD COLUMN     "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ADD COLUMN     "updatedAt" TIMESTAMP(3) NOT NULL;
+
+-- AlterTable
+ALTER TABLE "AggregatedCourseAnalytics" ADD COLUMN     "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ADD COLUMN     "updatedAt" TIMESTAMP(3) NOT NULL;
+
+-- AlterTable
+ALTER TABLE "CompetencyAnalytics" ADD COLUMN     "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ADD COLUMN     "updatedAt" TIMESTAMP(3) NOT NULL;
+
+-- AlterTable
+ALTER TABLE "ParticipantAnalytics" ADD COLUMN     "computedAt" DATE NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ADD COLUMN     "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ADD COLUMN     "updatedAt" TIMESTAMP(3) NOT NULL;
+
+-- AlterTable
+ALTER TABLE "ParticipantCourseAnalytics" ADD COLUMN     "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ADD COLUMN     "updatedAt" TIMESTAMP(3) NOT NULL;
diff --git a/packages/prisma/src/prisma/schema/analytics.prisma b/packages/prisma/src/prisma/schema/analytics.prisma
@@ -16,7 +16,8 @@ model ParticipantAnalytics {
   id   Int           @id @default(autoincrement())
   type AnalyticsType
 
-  timestamp DateTime @db.Date
+  timestamp  DateTime @db.Date
+  computedAt DateTime @db.Date @default(now())
 
   // unsolvedQuestionsCount = AggregatedAnalytics.totalElementsAvailable - responseCount
   trialsCount   Int // total number of questions attempted
@@ -47,6 +48,9 @@ model ParticipantAnalytics {
   course   Course @relation(fields: [courseId], references: [id], onDelete: Cascade, onUpdate: Cascade)
   courseId String @db.Uuid
 
+  createdAt DateTime @default(now())
+  updatedAt DateTime @updatedAt
+
   @@unique([type, courseId, participantId, timestamp])
 }
 
@@ -57,6 +61,7 @@ model AggregatedAnalytics {
 
   // all quantities are defined as the values at the end of the selected timeframe
   timestamp              DateTime @db.Date
+  computedAt             DateTime @db.Date @default(now())
   responseCount          Int
   participantCount       Int
   totalScore             Int
@@ -69,6 +74,9 @@ model AggregatedAnalytics {
   course   Course @relation(fields: [courseId], references: [id], onDelete: Cascade, onUpdate: Cascade)
   courseId String @db.Uuid
 
+  createdAt DateTime @default(now())
+  updatedAt DateTime @updatedAt
+
   @@unique([type, courseId, timestamp])
 }
 
@@ -87,6 +95,9 @@ model CompetencyAnalytics {
   participantAnalytics   ParticipantAnalytics @relation(fields: [participantAnalyticsId], references: [id], onDelete: Cascade, onUpdate: Cascade)
   participantAnalyticsId Int
 
+  createdAt DateTime @default(now())
+  updatedAt DateTime @updatedAt
+
   @@unique([competencyId, participantAnalyticsId])
 }
 
@@ -105,6 +116,9 @@ model AggregatedCompetencyAnalytics {
   aggregatedAnalytics   AggregatedAnalytics @relation(fields: [aggregatedAnalyticsId], references: [id], onDelete: Cascade, onUpdate: Cascade)
   aggregatedAnalyticsId Int
 
+  createdAt DateTime @default(now())
+  updatedAt DateTime @updatedAt
+
   @@unique([competencyId, aggregatedAnalyticsId])
 }
 
@@ -123,6 +137,9 @@ model ParticipantCourseAnalytics {
   participant   Participant @relation(fields: [participantId], references: [id], onDelete: Cascade, onUpdate: Cascade)
   participantId String      @db.Uuid
 
+  createdAt DateTime @default(now())
+  updatedAt DateTime @updatedAt
+
   @@unique([courseId, participantId])
 }
 
@@ -140,6 +157,9 @@ model AggregatedCourseAnalytics {
 
   course   Course @relation(fields: [courseId], references: [id], onDelete: Cascade, onUpdate: Cascade)
   courseId String @db.Uuid
+
+  createdAt DateTime @default(now())
+  updatedAt DateTime @updatedAt
 }
 
 model CompetencyTree {