-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
57 changed files
with
2,783 additions
and
2,725 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,4 +22,5 @@ packages/prisma/src/seed | |
.turbo | ||
|
||
out/ | ||
!out/.gitkeep | ||
.rollup.cache/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# KlickerUZH Analytics | ||
|
||
This service computes learning analytics for KlickerUZH, providing insights into student learning patterns and performance metrics. | ||
|
||
## Requirements | ||
|
||
- Python 3.12.x (e.g., installed through `asdf`) | ||
- Node.js 20.x.x | ||
- Poetry | ||
|
||
## Setup | ||
|
||
- The project uses Poetry for dependency management and environment isolation. Make sure you have Poetry installed before proceeding. Then run `poetry install` in this folder to prepare the virtual environment. | ||
- The project uses PNPM to simplify the execution of scripts and to provide a watch mode for execution. Make sure that you have executed `pnpm install` in the repository before trying to run the commands below. | ||
- Make sure that all `.prisma` files are available in `prisma/`. If this is not the case, run the `util/sync-schema.sh` script first. | ||
- Make sure that a valid Python environment is used (3.12). If poetry tries to use an environment not matching specifications, the install command or script execution might fail. The Python binary to be used can be set expliticly using `poetry env use /Users/.../bin/python` (after which `poetry install` has to be run). Tools like `asdf` allow the clean management of multiple Python versions on a single machine. | ||
|
||
## Available Commands | ||
|
||
The following commands are available through PNPM: | ||
|
||
- `pnpm generate` - Generate the Prisma client for database access in Python | ||
- `pnpm main` - Run the analytics service | ||
- `pnpm dev` - Start the service in watch mode for development |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
{ | ||
"name": "@klicker-uzh/analytics", | ||
"version": "3.3.0-alpha.8", | ||
"license": "AGPL-3.0", | ||
"devDependencies": { | ||
"nodemon": "~3.1.7" | ||
}, | ||
"scripts": { | ||
"dev": "doppler run --config dev -- nodemon --exec 'poetry run poe main' --watch src,prisma --ext py,prisma", | ||
"generate": "poetry run poe generate", | ||
"main": "doppler run --config dev -- poetry run poe main" | ||
} | ||
} |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
from .modules import * | ||
from .notebooks import * |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
from .participant_analytics import compute_correctness, get_participant_responses | ||
from .aggregated_analytics import compute_aggregated_analytics |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
from .compute_aggregated_analytics import compute_aggregated_analytics | ||
from .load_participant_analytics import load_participant_analytics | ||
from .aggregate_participant_analytics import aggregate_participant_analytics | ||
from .save_aggregated_analytics import save_aggregated_analytics |
29 changes: 29 additions & 0 deletions
29
apps/analytics/src/modules/aggregated_analytics/aggregate_participant_analytics.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
def aggregate_participant_analytics(df_participant_analytics, verbose=False): | ||
# if the dataframe is empty, return None | ||
if df_participant_analytics.empty: | ||
if verbose: | ||
print("No participant analytics to aggregate") | ||
|
||
return None | ||
|
||
# aggreagte all participant analytics for the specified time range and separate courses | ||
df_aggregated_analytics = ( | ||
df_participant_analytics.groupby("courseId") | ||
.agg( | ||
{ | ||
"id": "count", | ||
"responseCount": "sum", | ||
"totalScore": "sum", | ||
"totalPoints": "sum", | ||
"totalXp": "sum", | ||
} | ||
) | ||
.reset_index() | ||
.rename( | ||
columns={ | ||
"id": "participantCount", | ||
} | ||
) | ||
) | ||
|
||
return df_aggregated_analytics |
34 changes: 34 additions & 0 deletions
34
apps/analytics/src/modules/aggregated_analytics/compute_aggregated_analytics.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
from .load_participant_analytics import load_participant_analytics | ||
from .aggregate_participant_analytics import aggregate_participant_analytics | ||
from .save_aggregated_analytics import save_aggregated_analytics | ||
|
||
|
||
def compute_aggregated_analytics( | ||
db, start_date, end_date, timestamp, analytics_type="DAILY", verbose=False | ||
): | ||
# load all participant analytics for the given timestamp and analytics time range | ||
df_participant_analytics = load_participant_analytics( | ||
db, timestamp, analytics_type, verbose | ||
) | ||
|
||
# aggregate all participant analytics values by course | ||
df_aggregated_analytics = aggregate_participant_analytics( | ||
df_participant_analytics, verbose | ||
) | ||
|
||
if df_aggregated_analytics is not None and verbose: | ||
print("Aggregated analytics for time range:" + start_date + " to " + end_date) | ||
print(df_aggregated_analytics.head()) | ||
elif df_aggregated_analytics is None: | ||
print( | ||
"No aggregated analytics to compute for time range:" | ||
+ start_date | ||
+ " to " | ||
+ end_date | ||
) | ||
|
||
# store the computed aggregated analytics in the database | ||
if df_aggregated_analytics is not None: | ||
save_aggregated_analytics( | ||
db, df_aggregated_analytics, timestamp, analytics_type | ||
) |
30 changes: 30 additions & 0 deletions
30
apps/analytics/src/modules/aggregated_analytics/load_participant_analytics.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import pandas as pd | ||
|
||
|
||
def convert_to_df(analytics): | ||
# convert the database query result into a pandas dataframe | ||
rows = [] | ||
for item in analytics: | ||
rows.append(dict(item)) | ||
|
||
return pd.DataFrame(rows) | ||
|
||
|
||
def load_participant_analytics(db, timestamp, analytics_type, verbose=False): | ||
participant_analytics = db.participantanalytics.find_many( | ||
where={"timestamp": timestamp, "type": analytics_type}, | ||
) | ||
|
||
if verbose: | ||
# Print the first participant analytics | ||
print( | ||
"Found {} analytics for the timespan from {} to {}".format( | ||
len(participant_analytics), start_date, end_date | ||
) | ||
) | ||
print(participant_analytics[0]) | ||
|
||
# convert the analytics to a dataframe | ||
df_loaded_analytics = convert_to_df(participant_analytics) | ||
|
||
return df_loaded_analytics |
108 changes: 108 additions & 0 deletions
108
apps/analytics/src/modules/aggregated_analytics/save_aggregated_analytics.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
from datetime import datetime | ||
|
||
|
||
def save_aggregated_analytics(db, df_analytics, timestamp, analytics_type="DAILY"): | ||
computedAt = datetime.now().strftime("%Y-%m-%d") + "T00:00:00.000Z" | ||
|
||
# create daily / weekly / monthly analytics entries for all participants | ||
if analytics_type in ["DAILY", "WEEKLY", "MONTHLY"]: | ||
for _, row in df_analytics.iterrows(): | ||
db.aggregatedanalytics.upsert( | ||
where={ | ||
"type_courseId_timestamp": { | ||
"type": analytics_type, | ||
"courseId": row["courseId"], | ||
"timestamp": timestamp, | ||
} | ||
}, | ||
data={ | ||
"create": { | ||
"type": analytics_type, | ||
"timestamp": timestamp, | ||
"computedAt": computedAt, | ||
"participantCount": row["participantCount"], | ||
"responseCount": row["responseCount"], | ||
"totalScore": row["totalScore"], | ||
"totalPoints": row["totalPoints"], | ||
"totalXp": row["totalXp"], | ||
# TODO: set this value correctly for rolling updates in production code | ||
# (cannot be computed for past learning analytics -> therefore set to invalid value) | ||
"totalElementsAvailable": -1, | ||
"course": {"connect": {"id": row["courseId"]}}, | ||
}, | ||
"update": {}, | ||
}, | ||
) | ||
|
||
# create or update course-wide analytics entries (should be unique for participant / course combination) | ||
elif analytics_type == "COURSE": | ||
for _, row in df_analytics.iterrows(): | ||
course = db.course.find_unique_or_raise( | ||
where={"id": row["courseId"]}, | ||
include={ | ||
"practiceQuizzes": { | ||
"include": { | ||
"stacks": { | ||
"include": {"elements": True}, | ||
} | ||
} | ||
}, | ||
"microLearnings": { | ||
"include": { | ||
"stacks": { | ||
"include": {"elements": True}, | ||
} | ||
} | ||
}, | ||
}, | ||
) | ||
course = dict(course) | ||
|
||
# add all the number of elements in all practice quizzes and microlearnings together | ||
totalElementsAvailable = 0 | ||
for practice_quiz in course["practiceQuizzes"]: | ||
pq_dict = dict(practice_quiz) | ||
for stack in pq_dict["stacks"]: | ||
stack_dict = dict(stack) | ||
totalElementsAvailable += len(stack_dict["elements"]) | ||
for microlearning in course["microLearnings"]: | ||
ml_dict = dict(microlearning) | ||
for stack in ml_dict["stacks"]: | ||
stack_dict = dict(stack) | ||
totalElementsAvailable += len(stack_dict["elements"]) | ||
|
||
db.aggregatedanalytics.upsert( | ||
where={ | ||
"type_courseId_timestamp": { | ||
"type": analytics_type, | ||
"courseId": row["courseId"], | ||
"timestamp": timestamp, | ||
} | ||
}, | ||
data={ | ||
"create": { | ||
"type": analytics_type, | ||
"timestamp": timestamp, | ||
"computedAt": computedAt, | ||
"participantCount": row["participantCount"], | ||
"responseCount": row["responseCount"], | ||
"totalScore": row["totalScore"], | ||
"totalPoints": row["totalPoints"], | ||
"totalXp": row["totalXp"], | ||
"totalElementsAvailable": totalElementsAvailable, | ||
"course": {"connect": {"id": row["courseId"]}}, | ||
}, | ||
"update": { | ||
"computedAt": computedAt, | ||
"participantCount": row["participantCount"], | ||
"responseCount": row["responseCount"], | ||
"totalScore": row["totalScore"], | ||
"totalPoints": row["totalPoints"], | ||
"totalXp": row["totalXp"], | ||
"totalElementsAvailable": totalElementsAvailable, | ||
}, | ||
}, | ||
) | ||
|
||
else: | ||
raise ValueError("Unknown analytics type: {}".format(analytics_type)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
from .compute_correctness import compute_correctness | ||
from .get_participant_responses import get_participant_responses | ||
from .aggregate_analytics import aggregate_analytics | ||
from .save_participant_analytics import save_participant_analytics | ||
from .compute_participant_course_analytics import compute_participant_course_analytics |
Oops, something went wrong.