Skip to content

Commit

Permalink
Merge 7113565 into 51bbba9
Browse files Browse the repository at this point in the history
  • Loading branch information
sjschlapbach authored Dec 3, 2024
2 parents 51bbba9 + 7113565 commit cfe3c16
Show file tree
Hide file tree
Showing 57 changed files with 2,783 additions and 2,725 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,5 @@ packages/prisma/src/seed
.turbo

out/
!out/.gitkeep
.rollup.cache/
24 changes: 24 additions & 0 deletions apps/analytics/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# KlickerUZH Analytics

This service computes learning analytics for KlickerUZH, providing insights into student learning patterns and performance metrics.

## Requirements

- Python 3.12.x (e.g., installed through `asdf`)
- Node.js 20.x.x
- Poetry

## Setup

- The project uses Poetry for dependency management and environment isolation. Make sure you have Poetry installed before proceeding. Then run `poetry install` in this folder to prepare the virtual environment.
- The project uses PNPM to simplify the execution of scripts and to provide a watch mode for execution. Make sure that you have executed `pnpm install` in the repository before trying to run the commands below.
- Make sure that all `.prisma` files are available in `prisma/`. If this is not the case, run the `util/sync-schema.sh` script first.
- Make sure that a valid Python environment is used (3.12). If poetry tries to use an environment not matching specifications, the install command or script execution might fail. The Python binary to be used can be set expliticly using `poetry env use /Users/.../bin/python` (after which `poetry install` has to be run). Tools like `asdf` allow the clean management of multiple Python versions on a single machine.

## Available Commands

The following commands are available through PNPM:

- `pnpm generate` - Generate the Prisma client for database access in Python
- `pnpm main` - Run the analytics service
- `pnpm dev` - Start the service in watch mode for development
13 changes: 13 additions & 0 deletions apps/analytics/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"name": "@klicker-uzh/analytics",
"version": "3.3.0-alpha.8",
"license": "AGPL-3.0",
"devDependencies": {
"nodemon": "~3.1.7"
},
"scripts": {
"dev": "doppler run --config dev -- nodemon --exec 'poetry run poe main' --watch src,prisma --ext py,prisma",
"generate": "poetry run poe generate",
"main": "doppler run --config dev -- poetry run poe main"
}
}
1,053 changes: 548 additions & 505 deletions apps/analytics/poetry.lock

Large diffs are not rendered by default.

16 changes: 11 additions & 5 deletions apps/analytics/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,31 @@
name = "@klicker-uzh/analytics"
version = "0.0.1"
description = ""
authors = ["Roland Schlaefli <roland.schlaefli@df.uzh.ch>"]
authors = ["Roland Schlaefli <roland.schlaefli@df.uzh.ch>", "Julius Schlapbach <julius.schlapbach@df.uzh.ch>"]
license = "AGPL-3.0"
readme = "README.md"
packages = [{include = "@klicker_uzh"}]
package-mode = false

[tool.poetry.dependencies]
python = "^3.12"
pandas = "2.2.2"
prisma = "0.14.0"
xlsxwriter = "^3.2.0"
prisma = "0.15.0"
xlsxwriter = "3.2.0"

[tool.poetry.dev-dependencies]
poethepoet = "0.27.0"
ipykernel = "6.29.5"

[tool.poetry.group.dev.dependencies]
pyright = "1.1.376"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

[tool.poe.tasks]
generate = "prisma generate"
main = "doppler run --config dev -- python main.py"
main = "doppler run --config dev -- python -m src.main"

[tool.pyright]
typeCheckingMode = "strict"
2 changes: 2 additions & 0 deletions apps/analytics/src/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .modules import *
from .notebooks import *
File renamed without changes.
2 changes: 2 additions & 0 deletions apps/analytics/src/modules/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .participant_analytics import compute_correctness, get_participant_responses
from .aggregated_analytics import compute_aggregated_analytics
4 changes: 4 additions & 0 deletions apps/analytics/src/modules/aggregated_analytics/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .compute_aggregated_analytics import compute_aggregated_analytics
from .load_participant_analytics import load_participant_analytics
from .aggregate_participant_analytics import aggregate_participant_analytics
from .save_aggregated_analytics import save_aggregated_analytics
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
def aggregate_participant_analytics(df_participant_analytics, verbose=False):
# if the dataframe is empty, return None
if df_participant_analytics.empty:
if verbose:
print("No participant analytics to aggregate")

return None

# aggreagte all participant analytics for the specified time range and separate courses
df_aggregated_analytics = (
df_participant_analytics.groupby("courseId")
.agg(
{
"id": "count",
"responseCount": "sum",
"totalScore": "sum",
"totalPoints": "sum",
"totalXp": "sum",
}
)
.reset_index()
.rename(
columns={
"id": "participantCount",
}
)
)

return df_aggregated_analytics
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from .load_participant_analytics import load_participant_analytics
from .aggregate_participant_analytics import aggregate_participant_analytics
from .save_aggregated_analytics import save_aggregated_analytics


def compute_aggregated_analytics(
db, start_date, end_date, timestamp, analytics_type="DAILY", verbose=False
):
# load all participant analytics for the given timestamp and analytics time range
df_participant_analytics = load_participant_analytics(
db, timestamp, analytics_type, verbose
)

# aggregate all participant analytics values by course
df_aggregated_analytics = aggregate_participant_analytics(
df_participant_analytics, verbose
)

if df_aggregated_analytics is not None and verbose:
print("Aggregated analytics for time range:" + start_date + " to " + end_date)
print(df_aggregated_analytics.head())
elif df_aggregated_analytics is None:
print(
"No aggregated analytics to compute for time range:"
+ start_date
+ " to "
+ end_date
)

# store the computed aggregated analytics in the database
if df_aggregated_analytics is not None:
save_aggregated_analytics(
db, df_aggregated_analytics, timestamp, analytics_type
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import pandas as pd


def convert_to_df(analytics):
# convert the database query result into a pandas dataframe
rows = []
for item in analytics:
rows.append(dict(item))

return pd.DataFrame(rows)


def load_participant_analytics(db, timestamp, analytics_type, verbose=False):
participant_analytics = db.participantanalytics.find_many(
where={"timestamp": timestamp, "type": analytics_type},
)

if verbose:
# Print the first participant analytics
print(
"Found {} analytics for the timespan from {} to {}".format(
len(participant_analytics), start_date, end_date
)
)
print(participant_analytics[0])

# convert the analytics to a dataframe
df_loaded_analytics = convert_to_df(participant_analytics)

return df_loaded_analytics
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
from datetime import datetime


def save_aggregated_analytics(db, df_analytics, timestamp, analytics_type="DAILY"):
computedAt = datetime.now().strftime("%Y-%m-%d") + "T00:00:00.000Z"

# create daily / weekly / monthly analytics entries for all participants
if analytics_type in ["DAILY", "WEEKLY", "MONTHLY"]:
for _, row in df_analytics.iterrows():
db.aggregatedanalytics.upsert(
where={
"type_courseId_timestamp": {
"type": analytics_type,
"courseId": row["courseId"],
"timestamp": timestamp,
}
},
data={
"create": {
"type": analytics_type,
"timestamp": timestamp,
"computedAt": computedAt,
"participantCount": row["participantCount"],
"responseCount": row["responseCount"],
"totalScore": row["totalScore"],
"totalPoints": row["totalPoints"],
"totalXp": row["totalXp"],
# TODO: set this value correctly for rolling updates in production code
# (cannot be computed for past learning analytics -> therefore set to invalid value)
"totalElementsAvailable": -1,
"course": {"connect": {"id": row["courseId"]}},
},
"update": {},
},
)

# create or update course-wide analytics entries (should be unique for participant / course combination)
elif analytics_type == "COURSE":
for _, row in df_analytics.iterrows():
course = db.course.find_unique_or_raise(
where={"id": row["courseId"]},
include={
"practiceQuizzes": {
"include": {
"stacks": {
"include": {"elements": True},
}
}
},
"microLearnings": {
"include": {
"stacks": {
"include": {"elements": True},
}
}
},
},
)
course = dict(course)

# add all the number of elements in all practice quizzes and microlearnings together
totalElementsAvailable = 0
for practice_quiz in course["practiceQuizzes"]:
pq_dict = dict(practice_quiz)
for stack in pq_dict["stacks"]:
stack_dict = dict(stack)
totalElementsAvailable += len(stack_dict["elements"])
for microlearning in course["microLearnings"]:
ml_dict = dict(microlearning)
for stack in ml_dict["stacks"]:
stack_dict = dict(stack)
totalElementsAvailable += len(stack_dict["elements"])

db.aggregatedanalytics.upsert(
where={
"type_courseId_timestamp": {
"type": analytics_type,
"courseId": row["courseId"],
"timestamp": timestamp,
}
},
data={
"create": {
"type": analytics_type,
"timestamp": timestamp,
"computedAt": computedAt,
"participantCount": row["participantCount"],
"responseCount": row["responseCount"],
"totalScore": row["totalScore"],
"totalPoints": row["totalPoints"],
"totalXp": row["totalXp"],
"totalElementsAvailable": totalElementsAvailable,
"course": {"connect": {"id": row["courseId"]}},
},
"update": {
"computedAt": computedAt,
"participantCount": row["participantCount"],
"responseCount": row["responseCount"],
"totalScore": row["totalScore"],
"totalPoints": row["totalPoints"],
"totalXp": row["totalXp"],
"totalElementsAvailable": totalElementsAvailable,
},
},
)

else:
raise ValueError("Unknown analytics type: {}".format(analytics_type))
5 changes: 5 additions & 0 deletions apps/analytics/src/modules/participant_analytics/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .compute_correctness import compute_correctness
from .get_participant_responses import get_participant_responses
from .aggregate_analytics import aggregate_analytics
from .save_participant_analytics import save_participant_analytics
from .compute_participant_course_analytics import compute_participant_course_analytics
Loading

0 comments on commit cfe3c16

Please sign in to comment.