-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
enhance(apps/analytics): add computation logic for participant course…
… performance (#4390)
- Loading branch information
1 parent
5f402d7
commit cb471c3
Showing
8 changed files
with
262 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
3 changes: 3 additions & 0 deletions
3
apps/analytics/src/modules/participant_performance/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from .compute_response_error_rates import compute_response_error_rates | ||
from .compute_performance_levels import compute_performance_levels | ||
from .save_participant_performance import save_participant_performance |
39 changes: 39 additions & 0 deletions
39
apps/analytics/src/modules/participant_performance/compute_performance_levels.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
def compute_performance_levels(df_performance): | ||
# set the performance levels based on the quantiles | ||
first_qs = df_performance.firstErrorRate.quantile([0.25, 0.75]) | ||
last_qs = df_performance.lastErrorRate.quantile([0.25, 0.75]) | ||
total_qs = df_performance.totalErrorRate.quantile([0.25, 0.75]) | ||
|
||
first_q1 = first_qs[0.25] | ||
first_q3 = first_qs[0.75] | ||
last_q1 = last_qs[0.25] | ||
last_q3 = last_qs[0.75] | ||
total_q1 = total_qs[0.25] | ||
total_q3 = total_qs[0.75] | ||
|
||
# set the performance levels based on the quantiles (inverse logic compared to activity - higher error rate is worse) | ||
df_performance["firstPerformance"] = "MEDIUM" | ||
df_performance.loc[ | ||
df_performance.firstErrorRate <= first_q1, "firstPerformance" | ||
] = "HIGH" | ||
df_performance.loc[ | ||
df_performance.firstErrorRate >= first_q3, "firstPerformance" | ||
] = "LOW" | ||
|
||
df_performance["lastPerformance"] = "MEDIUM" | ||
df_performance.loc[df_performance.lastErrorRate <= last_q1, "lastPerformance"] = ( | ||
"HIGH" | ||
) | ||
df_performance.loc[df_performance.lastErrorRate >= last_q3, "lastPerformance"] = ( | ||
"LOW" | ||
) | ||
|
||
df_performance["totalPerformance"] = "MEDIUM" | ||
df_performance.loc[ | ||
df_performance.totalErrorRate <= total_q1, "totalPerformance" | ||
] = "HIGH" | ||
df_performance.loc[ | ||
df_performance.totalErrorRate >= total_q3, "totalPerformance" | ||
] = "LOW" | ||
|
||
return df_performance |
54 changes: 54 additions & 0 deletions
54
apps/analytics/src/modules/participant_performance/compute_response_error_rates.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
def compute_response_error_rates(df_responses): | ||
# compute the error rate for each response itself | ||
df_responses["responseErrorRate"] = ( | ||
df_responses["wrongCount"] / df_responses["trialsCount"] | ||
) | ||
|
||
# compute the total number of responses, number of wrong first and last responses, | ||
# total number of wrong responses, and the average total error rate | ||
df_response_count = ( | ||
df_responses.groupby("participantId").size().reset_index(name="responseCount") | ||
) | ||
df_first_response_wrong_count = ( | ||
df_responses[df_responses["firstResponseCorrectness"] == "WRONG"] | ||
.groupby("participantId") | ||
.size() | ||
.reset_index(name="wrongFirstResponseCount") | ||
) | ||
df_last_response_wrong_count = ( | ||
df_responses[df_responses["lastResponseCorrectness"] == "WRONG"] | ||
.groupby("participantId") | ||
.size() | ||
.reset_index(name="wrongLastResponseCount") | ||
) | ||
df_total_error_rate = ( | ||
df_responses[["participantId", "responseErrorRate"]] | ||
.groupby("participantId") | ||
.agg("mean") | ||
.reset_index() | ||
.rename( | ||
columns={ | ||
"responseErrorRate": "totalErrorRate", | ||
} | ||
) | ||
) | ||
|
||
# combine the dataframes into a single one | ||
df_performance = ( | ||
df_response_count.merge( | ||
df_first_response_wrong_count, on="participantId", how="left" | ||
) | ||
.merge(df_last_response_wrong_count, on="participantId", how="left") | ||
.merge(df_total_error_rate, on="participantId", how="left") | ||
.fillna(0) | ||
) | ||
|
||
# compute the first and last error rates | ||
df_performance["firstErrorRate"] = ( | ||
df_performance["wrongFirstResponseCount"] / df_performance["responseCount"] | ||
) | ||
df_performance["lastErrorRate"] = ( | ||
df_performance["wrongLastResponseCount"] / df_performance["responseCount"] | ||
) | ||
|
||
return df_performance |
30 changes: 30 additions & 0 deletions
30
apps/analytics/src/modules/participant_performance/save_participant_performance.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
def save_participant_performance(db, df_performance, course_id): | ||
for _, row in df_performance.iterrows(): | ||
db.participantperformance.upsert( | ||
where={ | ||
"participantId_courseId": { | ||
"participantId": row["participantId"], | ||
"courseId": course_id, | ||
} | ||
}, | ||
data={ | ||
"create": { | ||
"firstErrorRate": row["firstErrorRate"], | ||
"firstPerformance": row["firstPerformance"], | ||
"lastErrorRate": row["lastErrorRate"], | ||
"lastPerformance": row["lastPerformance"], | ||
"totalErrorRate": row["totalErrorRate"], | ||
"totalPerformance": row["totalPerformance"], | ||
"participant": {"connect": {"id": row["participantId"]}}, | ||
"course": {"connect": {"id": course_id}}, | ||
}, | ||
"update": { | ||
"firstErrorRate": row["firstErrorRate"], | ||
"firstPerformance": row["firstPerformance"], | ||
"lastErrorRate": row["lastErrorRate"], | ||
"lastPerformance": row["lastPerformance"], | ||
"totalErrorRate": row["totalErrorRate"], | ||
"totalPerformance": row["totalPerformance"], | ||
}, | ||
}, | ||
) |
126 changes: 126 additions & 0 deletions
126
apps/analytics/src/notebooks/participant_performance.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Preparation" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import os\n", | ||
"import json\n", | ||
"from datetime import datetime\n", | ||
"from prisma import Prisma\n", | ||
"import pandas as pd\n", | ||
"import sys\n", | ||
"\n", | ||
"# set the python path correctly for module imports to work\n", | ||
"sys.path.append(\"../../\")\n", | ||
"\n", | ||
"from src.modules.participant_course_analytics.get_running_past_courses import (\n", | ||
" get_running_past_courses,\n", | ||
")\n", | ||
"from src.modules.participant_performance.compute_response_error_rates import (\n", | ||
" compute_response_error_rates,\n", | ||
")\n", | ||
"from src.modules.participant_performance.compute_performance_levels import (\n", | ||
" compute_performance_levels,\n", | ||
")\n", | ||
"from src.modules.participant_performance.save_participant_performance import (\n", | ||
" save_participant_performance,\n", | ||
")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"db = Prisma()\n", | ||
"\n", | ||
"# set the environment variable DATABASE_URL to the connection string of your database\n", | ||
"os.environ[\"DATABASE_URL\"] = \"postgresql://klicker:klicker@localhost:5432/klicker-prod\"\n", | ||
"\n", | ||
"db.connect()\n", | ||
"\n", | ||
"# Script settings\n", | ||
"verbose = False" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Compute Participant Performance" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Fetch all courses from the database\n", | ||
"df_courses = get_running_past_courses(db)\n", | ||
"\n", | ||
"# Iterate over the course and fetch all question responses linked to it\n", | ||
"for idx, course in df_courses.iterrows():\n", | ||
" course_id = course[\"id\"]\n", | ||
" print(f\"Processing course\", idx, \"of\", len(df_courses), \"with id\", course_id)\n", | ||
"\n", | ||
" # fetch all question responses linked to this course\n", | ||
" question_responses = db.questionresponse.find_many(where={\"courseId\": course_id})\n", | ||
" df_responses = pd.DataFrame(list(map(lambda x: x.dict(), question_responses)))\n", | ||
"\n", | ||
" # if no responses are linked to the course, skip the iteration\n", | ||
" if df_responses.empty:\n", | ||
" print(\"No responses linked to course\", course_id)\n", | ||
" continue\n", | ||
"\n", | ||
" df_performance = compute_response_error_rates(df_responses)\n", | ||
" df_performance = compute_performance_levels(df_performance)\n", | ||
"\n", | ||
" # store computed performance analytics in the corresponding database table\n", | ||
" save_participant_performance(db, df_performance, course_id)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Disconnect from the database\n", | ||
"db.disconnect()" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "analytics-fkWWeYLw-py3.12", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.12.5" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters