Skip to content

Commit

Permalink
enhance: add first and last correctness counts to course-wide analyti…
Browse files Browse the repository at this point in the history
…cs and extend database
  • Loading branch information
sjschlapbach committed Aug 27, 2024
1 parent ca56973 commit c546071
Show file tree
Hide file tree
Showing 6 changed files with 220 additions and 43 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pandas as pd


def aggregate_analytics(df_details, verbose=False):
def aggregate_analytics(df_details, df_course_responses=None):
# Aggregate the question response details for the participant and course level
df_analytics_counts = (
df_details.groupby(["participantId", "courseId"])
Expand Down Expand Up @@ -93,9 +93,53 @@ def aggregate_analytics(df_details, verbose=False):
}
)

df_course_analytics = None
if df_course_responses is not None:
# Count entries where firstResponseCorrectness is 'CORRECT', 'WRONG' and lastResponseCorrectness is 'CORRECT', 'WRONG' into separate columns - grouped by participantId and courseId
df_course_analytics = (
df_course_responses.groupby(["participantId", "courseId"])
.agg(
{
"firstResponseCorrectness": [
("correct", lambda x: (x == "CORRECT").sum()),
("wrong", lambda x: (x == "WRONG").sum()),
],
"lastResponseCorrectness": [
("correct", lambda x: (x == "CORRECT").sum()),
("wrong", lambda x: (x == "WRONG").sum()),
],
}
)
.reset_index()
)
df_course_analytics.columns = df_course_analytics.columns.map(
"_".join
).str.strip("_")
df_course_analytics = df_course_analytics.rename(
columns={
"firstResponseCorrectness_correct": "firstCorrectCount",
"firstResponseCorrectness_wrong": "firstWrongCount",
"lastResponseCorrectness_correct": "lastCorrectCount",
"lastResponseCorrectness_wrong": "lastWrongCount",
}
)

# Combine the analytics counts and correctness dataframes based on the unique participantId and courseId combinations
df_analytics = pd.merge(
df_analytics_counts, df_analytics_correctness, on=["participantId", "courseId"]
)
if df_course_analytics is None:
df_analytics = pd.merge(
df_analytics_counts,
df_analytics_correctness,
on=["participantId", "courseId"],
)
else:
df_analytics = pd.merge(
df_analytics_counts,
pd.merge(
df_analytics_correctness,
df_course_analytics,
on=["participantId", "courseId"],
),
on=["participantId", "courseId"],
)

return df_analytics
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@ def compute_participant_analytics(
return

# Compute participant analytics (score/xp counts and correctness statistics)
df_analytics = aggregate_analytics(df_details, verbose)
if verbose:
df_analytics.head()
df_analytics = aggregate_analytics(df_details)

# Save the aggreagted analytics into the database
save_participant_analytics(db, df_analytics, timestamp, analytics_type)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ def compute_participant_course_analytics(db, df_courses, verbose=False):
"lte": course_end_date,
}
},
}
},
"questionResponses": True,
}
}
},
Expand All @@ -45,14 +46,32 @@ def compute_participant_course_analytics(db, df_courses, verbose=False):
participations_dict,
)
)
responses_dict = list(
map(lambda x: x["participant"]["questionResponses"], participations_dict)
)

details = [item for sublist in details_dict for item in sublist]
if len(details) == 0:
responses = [item for sublist in responses_dict for item in sublist]
if len(details) == 0 or len(responses) == 0:
courses_without_responses += 1
print("No detail responses found for course {}".format(course_id))
print(
"No detail responses or response entries found for course {}".format(
course_id
)
)
continue

# Create pandas dataframe containing all question response details
# Create pandas dataframe containing all question responses and details
df_details = pd.DataFrame(details)
df_responses = pd.DataFrame(responses)
df_responses = df_responses[
[
"courseId",
"participantId",
"firstResponseCorrectness",
"lastResponseCorrectness",
]
]

# Add the course start and end dates to the dataframe
df_details["course_start_date"] = course_start_date
Expand All @@ -71,9 +90,7 @@ def compute_participant_course_analytics(db, df_courses, verbose=False):
continue

# Compute participant analytics (score/xp counts and correctness statistics)
df_analytics = aggregate_analytics(df_details, verbose)
if verbose:
df_analytics.head()
df_analytics = aggregate_analytics(df_details, df_responses)

# Save the aggreagted analytics into the database
end_curr_date = datetime.now().strftime("%Y-%m-%d") + "T23:59:59.999Z"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,30 +1,92 @@
from datetime import datetime


def save_participant_analytics(db, df_analytics, timestamp, analytics_type="DAILY"):
# Create daily analytics entries for all participants
for _, row in df_analytics.iterrows():
db.participantanalytics.upsert(
where={
"type_courseId_participantId_timestamp": {
"type": analytics_type,
"courseId": row["courseId"],
"participantId": row["participantId"],
"timestamp": timestamp,
}
},
data={
"create": {
"type": analytics_type,
"timestamp": timestamp,
"trialsCount": row["trialsCount"],
"responseCount": row["responseCount"],
"totalScore": row["totalScore"],
"totalPoints": row["totalPoints"],
"totalXp": row["totalXp"],
"meanCorrectCount": row["meanCorrectCount"],
"meanPartialCorrectCount": row["meanPartialCount"],
"meanWrongCount": row["meanWrongCount"],
"participant": {"connect": {"id": row["participantId"]}},
"course": {"connect": {"id": row["courseId"]}},
computedAt = datetime.now().strftime("%Y-%m-%d") + "T00:00:00.000Z"

# Create daily / weekly / monthly analytics entries for all participants
if analytics_type in ["DAILY", "WEEKLY", "MONTHLY"]:
for _, row in df_analytics.iterrows():
db.participantanalytics.upsert(
where={
"type_courseId_participantId_timestamp": {
"type": analytics_type,
"courseId": row["courseId"],
"participantId": row["participantId"],
"timestamp": timestamp,
}
},
"update": {},
},
)
data={
"create": {
"type": analytics_type,
"timestamp": timestamp,
"computedAt": computedAt,
"trialsCount": row["trialsCount"],
"responseCount": row["responseCount"],
"totalScore": row["totalScore"],
"totalPoints": row["totalPoints"],
"totalXp": row["totalXp"],
"meanCorrectCount": row["meanCorrectCount"],
"meanPartialCorrectCount": row["meanPartialCount"],
"meanWrongCount": row["meanWrongCount"],
"participant": {"connect": {"id": row["participantId"]}},
"course": {"connect": {"id": row["courseId"]}},
},
"update": {},
},
)

# Create or update course-wide analytics entries (should be unique for participant / course combination)
elif analytics_type == "COURSE":
timestamp_const = "1970-01-01T00:00:00.000Z"
for _, row in df_analytics.iterrows():
db.participantanalytics.upsert(
where={
"type_courseId_participantId_timestamp": {
"type": analytics_type,
"courseId": row["courseId"],
"participantId": row["participantId"],
"timestamp": timestamp_const,
}
},
data={
"create": {
"type": "COURSE",
"timestamp": timestamp_const,
"computedAt": computedAt,
"trialsCount": row["trialsCount"],
"responseCount": row["responseCount"],
"totalScore": row["totalScore"],
"totalPoints": row["totalPoints"],
"totalXp": row["totalXp"],
"meanCorrectCount": row["meanCorrectCount"],
"meanPartialCorrectCount": row["meanPartialCount"],
"meanWrongCount": row["meanWrongCount"],
"firstCorrectCount": row["firstCorrectCount"],
"firstWrongCount": row["firstWrongCount"],
"lastCorrectCount": row["lastCorrectCount"],
"lastWrongCount": row["lastWrongCount"],
"participant": {"connect": {"id": row["participantId"]}},
"course": {"connect": {"id": row["courseId"]}},
},
"update": {
"timestamp": timestamp_const,
"computedAt": computedAt,
"trialsCount": row["trialsCount"],
"responseCount": row["responseCount"],
"totalScore": row["totalScore"],
"totalPoints": row["totalPoints"],
"totalXp": row["totalXp"],
"meanCorrectCount": row["meanCorrectCount"],
"meanPartialCorrectCount": row["meanPartialCount"],
"meanWrongCount": row["meanWrongCount"],
"firstCorrectCount": row["firstCorrectCount"],
"firstWrongCount": row["firstWrongCount"],
"lastCorrectCount": row["lastCorrectCount"],
"lastWrongCount": row["lastWrongCount"],
},
},
)

else:
raise ValueError("Unknown analytics type: {}".format(analytics_type))
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
Warnings:
- Added the required column `updatedAt` to the `AggregatedAnalytics` table without a default value. This is not possible if the table is not empty.
- Added the required column `updatedAt` to the `AggregatedCompetencyAnalytics` table without a default value. This is not possible if the table is not empty.
- Added the required column `updatedAt` to the `AggregatedCourseAnalytics` table without a default value. This is not possible if the table is not empty.
- Added the required column `updatedAt` to the `CompetencyAnalytics` table without a default value. This is not possible if the table is not empty.
- Added the required column `updatedAt` to the `ParticipantAnalytics` table without a default value. This is not possible if the table is not empty.
- Added the required column `updatedAt` to the `ParticipantCourseAnalytics` table without a default value. This is not possible if the table is not empty.
*/
-- AlterTable
ALTER TABLE "AggregatedAnalytics" ADD COLUMN "computedAt" DATE NOT NULL DEFAULT CURRENT_TIMESTAMP,
ADD COLUMN "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
ADD COLUMN "updatedAt" TIMESTAMP(3) NOT NULL;

-- AlterTable
ALTER TABLE "AggregatedCompetencyAnalytics" ADD COLUMN "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
ADD COLUMN "updatedAt" TIMESTAMP(3) NOT NULL;

-- AlterTable
ALTER TABLE "AggregatedCourseAnalytics" ADD COLUMN "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
ADD COLUMN "updatedAt" TIMESTAMP(3) NOT NULL;

-- AlterTable
ALTER TABLE "CompetencyAnalytics" ADD COLUMN "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
ADD COLUMN "updatedAt" TIMESTAMP(3) NOT NULL;

-- AlterTable
ALTER TABLE "ParticipantAnalytics" ADD COLUMN "computedAt" DATE NOT NULL DEFAULT CURRENT_TIMESTAMP,
ADD COLUMN "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
ADD COLUMN "updatedAt" TIMESTAMP(3) NOT NULL;

-- AlterTable
ALTER TABLE "ParticipantCourseAnalytics" ADD COLUMN "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
ADD COLUMN "updatedAt" TIMESTAMP(3) NOT NULL;
22 changes: 21 additions & 1 deletion packages/prisma/src/prisma/schema/analytics.prisma
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ model ParticipantAnalytics {
id Int @id @default(autoincrement())
type AnalyticsType
timestamp DateTime @db.Date
timestamp DateTime @db.Date
computedAt DateTime @db.Date @default(now())
// unsolvedQuestionsCount = AggregatedAnalytics.totalElementsAvailable - responseCount
trialsCount Int // total number of questions attempted
Expand Down Expand Up @@ -47,6 +48,9 @@ model ParticipantAnalytics {
course Course @relation(fields: [courseId], references: [id], onDelete: Cascade, onUpdate: Cascade)
courseId String @db.Uuid
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
@@unique([type, courseId, participantId, timestamp])
}

Expand All @@ -57,6 +61,7 @@ model AggregatedAnalytics {
// all quantities are defined as the values at the end of the selected timeframe
timestamp DateTime @db.Date
computedAt DateTime @db.Date @default(now())
responseCount Int
participantCount Int
totalScore Int
Expand All @@ -69,6 +74,9 @@ model AggregatedAnalytics {
course Course @relation(fields: [courseId], references: [id], onDelete: Cascade, onUpdate: Cascade)
courseId String @db.Uuid
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
@@unique([type, courseId, timestamp])
}

Expand All @@ -87,6 +95,9 @@ model CompetencyAnalytics {
participantAnalytics ParticipantAnalytics @relation(fields: [participantAnalyticsId], references: [id], onDelete: Cascade, onUpdate: Cascade)
participantAnalyticsId Int
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
@@unique([competencyId, participantAnalyticsId])
}

Expand All @@ -105,6 +116,9 @@ model AggregatedCompetencyAnalytics {
aggregatedAnalytics AggregatedAnalytics @relation(fields: [aggregatedAnalyticsId], references: [id], onDelete: Cascade, onUpdate: Cascade)
aggregatedAnalyticsId Int
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
@@unique([competencyId, aggregatedAnalyticsId])
}

Expand All @@ -123,6 +137,9 @@ model ParticipantCourseAnalytics {
participant Participant @relation(fields: [participantId], references: [id], onDelete: Cascade, onUpdate: Cascade)
participantId String @db.Uuid
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
@@unique([courseId, participantId])
}

Expand All @@ -140,6 +157,9 @@ model AggregatedCourseAnalytics {
course Course @relation(fields: [courseId], references: [id], onDelete: Cascade, onUpdate: Cascade)
courseId String @db.Uuid
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
}

model CompetencyTree {
Expand Down

0 comments on commit c546071

Please sign in to comment.