Skip to content

Commit

Permalink
enhance(apps/analytics): add computation logic for participant course…
Browse files Browse the repository at this point in the history
… performance (#4390)
  • Loading branch information
sjschlapbach authored Dec 5, 2024
1 parent 5f402d7 commit cb471c3
Show file tree
Hide file tree
Showing 8 changed files with 262 additions and 0 deletions.
5 changes: 5 additions & 0 deletions apps/analytics/src/modules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,8 @@
save_participant_course_analytics,
)
from .aggregated_course_analytics import compute_weekday_activity
from .participant_performance import (
compute_response_error_rates,
compute_performance_levels,
save_participant_performance,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .compute_response_error_rates import compute_response_error_rates
from .compute_performance_levels import compute_performance_levels
from .save_participant_performance import save_participant_performance
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
def compute_performance_levels(df_performance):
# set the performance levels based on the quantiles
first_qs = df_performance.firstErrorRate.quantile([0.25, 0.75])
last_qs = df_performance.lastErrorRate.quantile([0.25, 0.75])
total_qs = df_performance.totalErrorRate.quantile([0.25, 0.75])

first_q1 = first_qs[0.25]
first_q3 = first_qs[0.75]
last_q1 = last_qs[0.25]
last_q3 = last_qs[0.75]
total_q1 = total_qs[0.25]
total_q3 = total_qs[0.75]

# set the performance levels based on the quantiles (inverse logic compared to activity - higher error rate is worse)
df_performance["firstPerformance"] = "MEDIUM"
df_performance.loc[
df_performance.firstErrorRate <= first_q1, "firstPerformance"
] = "HIGH"
df_performance.loc[
df_performance.firstErrorRate >= first_q3, "firstPerformance"
] = "LOW"

df_performance["lastPerformance"] = "MEDIUM"
df_performance.loc[df_performance.lastErrorRate <= last_q1, "lastPerformance"] = (
"HIGH"
)
df_performance.loc[df_performance.lastErrorRate >= last_q3, "lastPerformance"] = (
"LOW"
)

df_performance["totalPerformance"] = "MEDIUM"
df_performance.loc[
df_performance.totalErrorRate <= total_q1, "totalPerformance"
] = "HIGH"
df_performance.loc[
df_performance.totalErrorRate >= total_q3, "totalPerformance"
] = "LOW"

return df_performance
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
def compute_response_error_rates(df_responses):
# compute the error rate for each response itself
df_responses["responseErrorRate"] = (
df_responses["wrongCount"] / df_responses["trialsCount"]
)

# compute the total number of responses, number of wrong first and last responses,
# total number of wrong responses, and the average total error rate
df_response_count = (
df_responses.groupby("participantId").size().reset_index(name="responseCount")
)
df_first_response_wrong_count = (
df_responses[df_responses["firstResponseCorrectness"] == "WRONG"]
.groupby("participantId")
.size()
.reset_index(name="wrongFirstResponseCount")
)
df_last_response_wrong_count = (
df_responses[df_responses["lastResponseCorrectness"] == "WRONG"]
.groupby("participantId")
.size()
.reset_index(name="wrongLastResponseCount")
)
df_total_error_rate = (
df_responses[["participantId", "responseErrorRate"]]
.groupby("participantId")
.agg("mean")
.reset_index()
.rename(
columns={
"responseErrorRate": "totalErrorRate",
}
)
)

# combine the dataframes into a single one
df_performance = (
df_response_count.merge(
df_first_response_wrong_count, on="participantId", how="left"
)
.merge(df_last_response_wrong_count, on="participantId", how="left")
.merge(df_total_error_rate, on="participantId", how="left")
.fillna(0)
)

# compute the first and last error rates
df_performance["firstErrorRate"] = (
df_performance["wrongFirstResponseCount"] / df_performance["responseCount"]
)
df_performance["lastErrorRate"] = (
df_performance["wrongLastResponseCount"] / df_performance["responseCount"]
)

return df_performance
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
def save_participant_performance(db, df_performance, course_id):
for _, row in df_performance.iterrows():
db.participantperformance.upsert(
where={
"participantId_courseId": {
"participantId": row["participantId"],
"courseId": course_id,
}
},
data={
"create": {
"firstErrorRate": row["firstErrorRate"],
"firstPerformance": row["firstPerformance"],
"lastErrorRate": row["lastErrorRate"],
"lastPerformance": row["lastPerformance"],
"totalErrorRate": row["totalErrorRate"],
"totalPerformance": row["totalPerformance"],
"participant": {"connect": {"id": row["participantId"]}},
"course": {"connect": {"id": course_id}},
},
"update": {
"firstErrorRate": row["firstErrorRate"],
"firstPerformance": row["firstPerformance"],
"lastErrorRate": row["lastErrorRate"],
"lastPerformance": row["lastPerformance"],
"totalErrorRate": row["totalErrorRate"],
"totalPerformance": row["totalPerformance"],
},
},
)
126 changes: 126 additions & 0 deletions apps/analytics/src/notebooks/participant_performance.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Preparation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import json\n",
"from datetime import datetime\n",
"from prisma import Prisma\n",
"import pandas as pd\n",
"import sys\n",
"\n",
"# set the python path correctly for module imports to work\n",
"sys.path.append(\"../../\")\n",
"\n",
"from src.modules.participant_course_analytics.get_running_past_courses import (\n",
" get_running_past_courses,\n",
")\n",
"from src.modules.participant_performance.compute_response_error_rates import (\n",
" compute_response_error_rates,\n",
")\n",
"from src.modules.participant_performance.compute_performance_levels import (\n",
" compute_performance_levels,\n",
")\n",
"from src.modules.participant_performance.save_participant_performance import (\n",
" save_participant_performance,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"db = Prisma()\n",
"\n",
"# set the environment variable DATABASE_URL to the connection string of your database\n",
"os.environ[\"DATABASE_URL\"] = \"postgresql://klicker:klicker@localhost:5432/klicker-prod\"\n",
"\n",
"db.connect()\n",
"\n",
"# Script settings\n",
"verbose = False"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Compute Participant Performance"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Fetch all courses from the database\n",
"df_courses = get_running_past_courses(db)\n",
"\n",
"# Iterate over the course and fetch all question responses linked to it\n",
"for idx, course in df_courses.iterrows():\n",
" course_id = course[\"id\"]\n",
" print(f\"Processing course\", idx, \"of\", len(df_courses), \"with id\", course_id)\n",
"\n",
" # fetch all question responses linked to this course\n",
" question_responses = db.questionresponse.find_many(where={\"courseId\": course_id})\n",
" df_responses = pd.DataFrame(list(map(lambda x: x.dict(), question_responses)))\n",
"\n",
" # if no responses are linked to the course, skip the iteration\n",
" if df_responses.empty:\n",
" print(\"No responses linked to course\", course_id)\n",
" continue\n",
"\n",
" df_performance = compute_response_error_rates(df_responses)\n",
" df_performance = compute_performance_levels(df_performance)\n",
"\n",
" # store computed performance analytics in the corresponding database table\n",
" save_participant_performance(db, df_performance, course_id)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Disconnect from the database\n",
"db.disconnect()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "analytics-fkWWeYLw-py3.12",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ CREATE TABLE "ActivityProgress" (
CONSTRAINT "ActivityProgress_pkey" PRIMARY KEY ("id")
);

-- CreateIndex
CREATE UNIQUE INDEX "ParticipantPerformance_participantId_courseId_key" ON "ParticipantPerformance"("participantId", "courseId");

-- CreateIndex
CREATE UNIQUE INDEX "InstancePerformance_instanceId_key" ON "InstancePerformance"("instanceId");

Expand Down
2 changes: 2 additions & 0 deletions packages/prisma/src/prisma/schema/analytics.prisma
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@ model ParticipantPerformance {
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
@@unique([participantId, courseId])
}

model InstancePerformance {
Expand Down

0 comments on commit cb471c3

Please sign in to comment.