Skip to content

Commit

Permalink
fix: split datasets and their sql
Browse files Browse the repository at this point in the history
  • Loading branch information
Ian2012 committed Sep 20, 2024
1 parent f331026 commit 36dca8b
Show file tree
Hide file tree
Showing 7 changed files with 273 additions and 307 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ query_context:
expressionType: SQL
label: Graded
sqlExpression: |-
{{translate_column_bool('graded')}}
{% raw %}{{translate_column_bool('graded')}}{% endraw %}
custom_form_data: {}
custom_params: {}
extras:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -395,154 +395,7 @@ offset: 0
params: null
schema: null
sql: |
with
successful_responses as (
select
org, course_key, problem_id, actor_id::String as actor_id, first_success_at
from xapi.responses
where
isNotNull(first_success_at)
{% if filter_values("org") != [] %}
and org in {{ filter_values("org") | where_in }}
{% endif %}
{% if filter_values("course_name") != [] %}
and course_key in (
select course_key
from event_sink.course_names
where course_name in {{ filter_values("course_name") | where_in }}
)
{% endif %}
),
unsuccessful_responses as (
select
org,
course_key,
problem_id,
actor_id::String as actor_id,
max(last_attempt_at) as last_attempt_at
from xapi.responses
where
actor_id not in (select distinct actor_id from successful_responses)
{% if filter_values("org") != [] %}
and org in {{ filter_values("org") | where_in }}
{% endif %}
{% if filter_values("course_name") != [] %}
and course_key in (
select course_key
from event_sink.course_names
where course_name in {{ filter_values("course_name") | where_in }}
)
{% endif %}
group by org, course_key, problem_id, actor_id
),
responses as (
select org, course_key, problem_id, actor_id, first_success_at as emission_time
from successful_responses
union all
select org, course_key, problem_id, actor_id, last_attempt_at as emission_time
from unsuccessful_responses
),
full_responses as (
select
events.emission_time as emission_time,
events.org as org,
events.course_key as course_key,
events.problem_id as problem_id,
events.object_id as object_id,
events.actor_id as actor_id,
events.responses as responses,
events.success as success,
events.attempts as attempts,
events.interaction_type as interaction_type
from xapi.problem_events events
join responses using (org, course_key, problem_id, actor_id, emission_time)
),
coursewide_attempts as (
select
org,
course_key,
avg(case when success then attempts else null end) as avg_correct_attempts,
avg(
case when not success then attempts else null end
) as avg_incorrect_attempts,
sum(case when success then 1 else 0 end)::float
/ count(*) as coursewide_percent_correct
from full_responses
group by org, course_key
)
select
full_responses.emission_time as emission_time,
full_responses.org as org,
full_responses.course_key as course_key,
blocks.course_name as course_name,
blocks.course_run as course_run,
full_responses.problem_id as problem_id,
blocks.block_name as problem_name,
blocks.display_name_with_location as problem_name_with_location,
blocks.course_order as course_order,
concat(
'<a href="',
full_responses.object_id,
'" target="_blank">',
blocks.block_name,
'</a>'
) as problem_link,
full_responses.actor_id as actor_id,
full_responses.responses as responses,
full_responses.success as success,
full_responses.attempts as attempts,
full_responses.interaction_type as interaction_type,
blocks.graded as graded,
users.username as username,
users.email as email,
users.name as name,
-- Aggregated course-wide values
coursewide_attempts.avg_correct_attempts as avg_correct_attempts_coursewide,
coursewide_attempts.avg_incorrect_attempts as avg_incorrect_attempts_coursewide,
coursewide_attempts.coursewide_percent_correct as coursewide_percent_correct,
-- Learner-specific values
(
case when full_responses.success then full_responses.attempts else null end
) as correct_attempts_by_learner,
(
case when not full_responses.success then full_responses.attempts else null end
) as incorrect_attempts_by_learner,
-- Selected Learner % Correct and Incorrect
(
sum(case when full_responses.success then 1 else 0 end) over (
partition by full_responses.actor_id, full_responses.problem_id
) / count(*) over (
partition by full_responses.actor_id, full_responses.problem_id
)
) as selected_learner_percent_correct,
(
sum(case when not full_responses.success then 1 else 0 end) over (
partition by full_responses.actor_id, full_responses.problem_id
) / count(*) over (
partition by full_responses.actor_id, full_responses.problem_id
)
) as selected_learner_percent_incorrect
from full_responses
join
reporting.dim_course_blocks blocks
on (
full_responses.course_key = blocks.course_key
and full_responses.problem_id = blocks.block_id
)
left outer join
event_sink.user_pii users
on full_responses.actor_id = users.external_user_id::String
join
coursewide_attempts
on full_responses.org = coursewide_attempts.org
and full_responses.course_key = coursewide_attempts.course_key
;
{% filter indent(width=2) %} {% include 'openedx-assets/queries/at_risk_problem_coursewide_avg.sql' %} {% endfilter %}
table_name: at_risk_coursewide_avg
template_params: null
uuid: a36a7904-c8b3-43ec-ad91-335fcedb8a27
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -248,18 +248,7 @@ offset: 0
params: null
schema: null
sql: |
with
recent_activity as (
select course_key, COUNT(DISTINCT actor_id) as active_last_7_days
from `reporting`.`navigation_events`
where emission_time >= NOW() - INTERVAL 7 DAY
group by course_key
)
select fss.*, COALESCE(ra.active_last_7_days, 0) as active_within_last_7_days
from `reporting`.`fact_student_status` fss
left join recent_activity ra on fss.course_key = ra.course_key
;
{% filter indent(width=2) %} {% include 'openedx-assets/queries/active_last_7_days.sql' %} {% endfilter %}
table_name: fact_student_status_plus_7_days
template_params: null
uuid: f25fdc4e-ce79-4f4c-9fc6-3dfe91690493
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -430,152 +430,7 @@ offset: 0
params: null
schema: null
sql: |
with
successful_responses as (
select
org, course_key, problem_id, actor_id::String as actor_id, first_success_at
from xapi.responses
where
isNotNull(first_success_at)
{% if filter_values("org") != [] %}
and org in {{ filter_values("org") | where_in }}
{% endif %}
{% if filter_values("course_name") != [] %}
and course_key in (
select course_key
from event_sink.course_names
where course_name in {{ filter_values("course_name") | where_in }}
)
{% endif %}
),
unsuccessful_responses as (
select
org,
course_key,
problem_id,
actor_id::String as actor_id,
max(last_attempt_at) as last_attempt_at
from xapi.responses
where
actor_id not in (select distinct actor_id from successful_responses)
{% if filter_values("org") != [] %}
and org in {{ filter_values("org") | where_in }}
{% endif %}
{% if filter_values("course_name") != [] %}
and course_key in (
select course_key
from event_sink.course_names
where course_name in {{ filter_values("course_name") | where_in }}
)
{% endif %}
group by org, course_key, problem_id, actor_id
),
responses as (
select org, course_key, problem_id, actor_id, first_success_at as emission_time
from successful_responses
union all
select org, course_key, problem_id, actor_id, last_attempt_at as emission_time
from unsuccessful_responses
),
full_responses as (
select
events.emission_time as emission_time,
events.org as org,
events.course_key as course_key,
events.problem_id as problem_id,
events.object_id as object_id,
events.actor_id as actor_id,
events.responses as responses,
events.success as success,
events.attempts as attempts,
events.interaction_type as interaction_type
from xapi.problem_events events
join responses using (org, course_key, problem_id, actor_id, emission_time)
),
-- Aggregating course-wide averages and percentages
coursewide_attempts as (
select
org,
course_key,
avg(case when success then attempts else null end) as avg_correct_attempts,
avg(
case when not success then attempts else null end
) as avg_incorrect_attempts,
sum(case when success then 1 else 0 end)::float
/ count(*) as coursewide_percent_correct
from full_responses
group by org, course_key
)
select
full_responses.emission_time as emission_time,
full_responses.org as org,
full_responses.course_key as course_key,
blocks.course_name as course_name,
blocks.course_run as course_run,
full_responses.problem_id as problem_id,
blocks.block_name as problem_name,
blocks.display_name_with_location as problem_name_with_location,
blocks.course_order as course_order,
concat(
'<a href="',
full_responses.object_id,
'" target="_blank">',
blocks.block_name,
'</a>'
) as problem_link,
full_responses.actor_id as actor_id,
full_responses.responses as responses,
full_responses.success as success,
full_responses.attempts as attempts,
full_responses.interaction_type as interaction_type,
blocks.graded as graded,
users.username as username,
users.email as email,
users.name as name,
-- Aggregated values from the coursewide_attempts CTE
coursewide_attempts.avg_correct_attempts as avg_correct_attempts_coursewide,
coursewide_attempts.avg_incorrect_attempts as avg_incorrect_attempts_coursewide,
coursewide_attempts.coursewide_percent_correct as coursewide_percent_correct,
-- Learner-specific calculations (correcting the percentage calculations)
(
case when full_responses.success then full_responses.attempts else null end
) as correct_attempts_by_learner,
(
case when not full_responses.success then full_responses.attempts else null end
) as incorrect_attempts_by_learner,
-- Ensure we calculate percentage based on total attempts per problem (multiplied
-- by 100 only once)
(
sum(case when full_responses.success then 1 else 0 end) over (
partition by full_responses.actor_id, full_responses.problem_id
) / count(*) over (
partition by full_responses.actor_id, full_responses.problem_id
)
) as selected_learner_percent_correct,
(
sum(case when not full_responses.success then 1 else 0 end) over (
partition by full_responses.actor_id, full_responses.problem_id
) / count(*) over (
partition by full_responses.actor_id, full_responses.problem_id
)
) as selected_learner_percent_incorrect
from full_responses
join
reporting.dim_course_blocks blocks
on (
full_responses.course_key = blocks.course_key
and full_responses.problem_id = blocks.block_id
)
left outer join
event_sink.user_pii users
on full_responses.actor_id = users.external_user_id::String
join
coursewide_attempts
on full_responses.org = coursewide_attempts.org
and full_responses.course_key = coursewide_attempts.course_key
{% filter indent(width=2) %} {% include 'openedx-assets/queries/problem_coursewide_avg.sql' %} {% endfilter %}
table_name: problem_coursewide_avg
template_params: null
uuid: b820067f-8f54-48d4-9b61-532e8f9ae6fe
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
with
recent_activity as (
select course_key, COUNT(DISTINCT actor_id) as active_last_7_days
from {{DBT_PROFILE_TARGET_DATABASE}}.`navigation_events`
where emission_time >= NOW() - INTERVAL 7 DAY
group by course_key
)

select fss.*, COALESCE(ra.active_last_7_days, 0) as active_within_last_7_days
from {{DBT_PROFILE_TARGET_DATABASE}}`fact_student_status` fss
left join recent_activity ra on fss.course_key = ra.course_key
Loading

0 comments on commit 36dca8b

Please sign in to comment.