Skip to content

Commit

Permalink
Merge 5d3dec5 into cb471c3
Browse files Browse the repository at this point in the history
  • Loading branch information
sjschlapbach authored Dec 6, 2024
2 parents cb471c3 + 5d3dec5 commit eb0b369
Show file tree
Hide file tree
Showing 10 changed files with 372 additions and 14 deletions.
20 changes: 6 additions & 14 deletions apps/analytics/src/modules/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,6 @@
from .participant_analytics import compute_correctness, get_participant_responses
from .aggregated_analytics import compute_aggregated_analytics
from .participant_course_analytics import (
get_running_past_courses,
get_active_weeks,
compute_participant_activity,
save_participant_course_analytics,
)
from .aggregated_course_analytics import compute_weekday_activity
from .participant_performance import (
compute_response_error_rates,
compute_performance_levels,
save_participant_performance,
)
from .participant_analytics import *
from .aggregated_analytics import *
from .participant_course_analytics import *
from .aggregated_course_analytics import *
from .participant_performance import *
from .instance_activity_performance import *
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .get_course_activities import get_course_activities
from .compute_instance_performance import compute_instance_performance
from .agg_activity_performance import agg_activity_performance
from .save_instance_performances import save_instance_performances
from .save_activity_performance import save_activity_performance
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
def agg_activity_performance(df_instance_performance):
activity_performance = df_instance_performance.mean()
activity_performance.drop("instanceId", inplace=True)
activity_performance.to_dict()

return activity_performance
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import pandas as pd


def compute_instance_performance(db, activity, total_only=False):
# initialize dataframes for performance tracking
df_instance_performance = pd.DataFrame(
columns=[
"instanceId",
"responseCount",
"firstErrorRate",
"firstPartialRate",
"firstCorrectRate",
"lastErrorRate",
"lastPartialRate",
"lastCorrectRate",
"totalErrorRate",
"totalPartialRate",
"totalCorrectRate",
]
)

for stack in activity["stacks"]:
for instance in stack["elements"]:
df_responses = pd.DataFrame(instance["responses"])

if df_responses.empty:
continue

# count number of responses
num_responses = len(df_responses)

if not total_only:
# compute correctness rates for first and last response
first_error_rate = (
df_responses["firstResponseCorrectness"]
.value_counts()
.get("WRONG", 0)
/ num_responses
)
first_partial_rate = (
df_responses["firstResponseCorrectness"]
.value_counts()
.get("PARTIAL", 0)
/ num_responses
)
first_correct_rate = (
df_responses["firstResponseCorrectness"]
.value_counts()
.get("CORRECT", 0)
/ num_responses
)
last_error_rate = (
df_responses["lastResponseCorrectness"]
.value_counts()
.get("WRONG", 0)
/ num_responses
)
last_partial_rate = (
df_responses["lastResponseCorrectness"]
.value_counts()
.get("PARTIAL", 0)
/ num_responses
)
last_correct_rate = (
df_responses["lastResponseCorrectness"]
.value_counts()
.get("CORRECT", 0)
/ num_responses
)

# compute total correctness rates
df_responses["responseErrorRate"] = (
df_responses["wrongCount"] / df_responses["trialsCount"]
)
df_responses["responsePartialRate"] = (
df_responses["partialCorrectCount"] / df_responses["trialsCount"]
)
df_responses["responseCorrectRate"] = (
df_responses["correctCount"] / df_responses["trialsCount"]
)
total_error_rate = df_responses["responseErrorRate"].mean()
total_partial_rate = df_responses["responsePartialRate"].mean()
total_correct_rate = df_responses["responseCorrectRate"].mean()

# append instance values to dataframe
instance_performance = {
"instanceId": instance["id"],
"responseCount": num_responses,
"totalErrorRate": total_error_rate,
"totalPartialRate": total_partial_rate,
"totalCorrectRate": total_correct_rate,
}

if not total_only:
instance_performance.update(
{
"firstErrorRate": first_error_rate,
"firstPartialRate": first_partial_rate,
"firstCorrectRate": first_correct_rate,
"lastErrorRate": last_error_rate,
"lastPartialRate": last_partial_rate,
"lastCorrectRate": last_correct_rate,
}
)

df_instance_performance.loc[len(df_instance_performance)] = (
instance_performance
)

return df_instance_performance
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
def get_course_activities(db, course_id):
pqs = db.practicequiz.find_many(
where={"courseId": course_id},
include={"stacks": {"include": {"elements": {"include": {"responses": True}}}}},
)
pqs = list(map(lambda x: x.dict(), pqs))

mls = db.microlearning.find_many(
where={"courseId": course_id},
include={"stacks": {"include": {"elements": {"include": {"responses": True}}}}},
)
mls = list(map(lambda x: x.dict(), mls))

return pqs, mls
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
def save_activity_performance(
db, activity_performance, course_id, practice_quiz_id=None, microlearning_id=None
):
values = {
"totalErrorRate": activity_performance.totalErrorRate,
"totalPartialRate": activity_performance.totalPartialRate,
"totalCorrectRate": activity_performance.totalCorrectRate,
}

if practice_quiz_id is not None:
values.update(
{
"firstErrorRate": activity_performance.firstErrorRate,
"firstPartialRate": activity_performance.firstPartialRate,
"firstCorrectRate": activity_performance.firstCorrectRate,
"lastErrorRate": activity_performance.lastErrorRate,
"lastPartialRate": activity_performance.lastPartialRate,
"lastCorrectRate": activity_performance.lastCorrectRate,
}
)

create_values = values.copy()
create_values["practiceQuiz"] = {"connect": {"id": practice_quiz_id}}
create_values["course"] = {"connect": {"id": course_id}}
where_clause = {"practiceQuizId": practice_quiz_id}

elif microlearning_id is not None:
create_values = values.copy()
create_values["microLearning"] = {"connect": {"id": microlearning_id}}
create_values["course"] = {"connect": {"id": course_id}}
where_clause = {"microLearningId": microlearning_id}

else:
raise ValueError(
"Either practice_quiz_id or microlearning_id must be provided for activity performance creation/update"
)

db.activityperformance.upsert(
where=where_clause,
data={"create": create_values, "update": values},
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
def save_instance_performances(
db, df_instance_performance, course_id, total_only=False
):
for _, row in df_instance_performance.iterrows():
# extract values from dataframe
values = {
"responseCount": row["responseCount"],
"totalErrorRate": row["totalErrorRate"],
"totalPartialRate": row["totalPartialRate"],
"totalCorrectRate": row["totalCorrectRate"],
}

# only define first and last response rates if applicable
if not total_only:
values.update(
{
"firstErrorRate": row["firstErrorRate"],
"firstPartialRate": row["firstPartialRate"],
"firstCorrectRate": row["firstCorrectRate"],
"lastErrorRate": row["lastErrorRate"],
"lastPartialRate": row["lastPartialRate"],
"lastCorrectRate": row["lastCorrectRate"],
}
)

# add relational links during creation
create_values = values.copy()
create_values.update(
{
"instance": {"connect": {"id": row["instanceId"]}},
"course": {"connect": {"id": course_id}},
}
)

db.instanceperformance.upsert(
where={
"instanceId": row["instanceId"],
},
data={
"create": create_values,
"update": values,
},
)
144 changes: 144 additions & 0 deletions apps/analytics/src/notebooks/instance_activity_performance.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Preparation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import json\n",
"from datetime import datetime\n",
"from prisma import Prisma\n",
"import pandas as pd\n",
"import sys\n",
"\n",
"# set the python path correctly for module imports to work\n",
"sys.path.append(\"../../\")\n",
"\n",
"from src.modules.participant_course_analytics.get_running_past_courses import (\n",
" get_running_past_courses,\n",
")\n",
"from src.modules.instance_activity_performance.get_course_activities import get_course_activities\n",
"from src.modules.instance_activity_performance.compute_instance_performance import compute_instance_performance\n",
"from src.modules.instance_activity_performance.agg_activity_performance import agg_activity_performance\n",
"from src.modules.instance_activity_performance.save_instance_performances import save_instance_performances\n",
"from src.modules.instance_activity_performance.save_activity_performance import save_activity_performance"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"db = Prisma()\n",
"\n",
"# set the environment variable DATABASE_URL to the connection string of your database\n",
"os.environ[\"DATABASE_URL\"] = \"postgresql://klicker:klicker@localhost:5432/klicker-prod\"\n",
"\n",
"db.connect()\n",
"\n",
"# Script settings\n",
"verbose = False"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Compute Participant Performance"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Fetch all courses from the database\n",
"df_courses = get_running_past_courses(db)\n",
"\n",
"# Iterate over the course and fetch all question responses linked to it\n",
"for idx, course in df_courses.iterrows():\n",
" course_id = course[\"id\"]\n",
" print(f\"Processing course\", idx, \"of\", len(df_courses), \"with id\", course_id)\n",
"\n",
" # fetch all practice quizzes and microlearnings linked to the course\n",
" pqs, mls = get_course_activities(db, course_id)\n",
"\n",
" for quiz in pqs:\n",
" # compute instance performances\n",
" df_instance_performance = compute_instance_performance(db, quiz)\n",
"\n",
" # if no instances with values were found, skip the activity\n",
" if df_instance_performance.empty:\n",
" continue\n",
"\n",
" # compute the activity performance by aggregating the all instance performances\n",
" activity_performance = agg_activity_performance(df_instance_performance)\n",
"\n",
" # save instance performance data\n",
" save_instance_performances(db, df_instance_performance, course_id)\n",
"\n",
" # save activity performance data\n",
" save_activity_performance(db, activity_performance, course_id, practice_quiz_id=quiz[\"id\"])\n",
"\n",
" for ml in mls:\n",
" # compute instance performances\n",
" df_instance_performance = compute_instance_performance(db, ml, total_only=True)\n",
"\n",
" # if no instances with values were found, skip the activity\n",
" if df_instance_performance.empty:\n",
" continue\n",
"\n",
" # compute the activity performance by aggregating the all instance performances\n",
" activity_performance = agg_activity_performance(df_instance_performance)\n",
"\n",
" # save instance performance data\n",
" save_instance_performances(db, df_instance_performance, course_id, total_only=True)\n",
"\n",
" # save activity performance data\n",
" save_activity_performance(db, activity_performance, course_id, microlearning_id=ml[\"id\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Disconnect from the database\n",
"db.disconnect()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "analytics-fkWWeYLw-py3.12",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ CREATE TABLE "ParticipantPerformance" (
-- CreateTable
CREATE TABLE "InstancePerformance" (
"id" SERIAL NOT NULL,
"responseCount" INTEGER NOT NULL,
"firstErrorRate" REAL,
"firstPartialRate" REAL,
"firstCorrectRate" REAL,
Expand Down
Loading

0 comments on commit eb0b369

Please sign in to comment.