Skip to content

Commit

Permalink
Add next iteration to collect enrollment data
Browse files Browse the repository at this point in the history
This PR provides alternate execution to create the same data as before

* See the module docstring in `figures.pipeline.enrollment_metrics_next` for details
* This PR contains a new module and tests to support the new module
  • Loading branch information
johnbaldwin committed Feb 25, 2022
1 parent 839b322 commit f6a177d
Show file tree
Hide file tree
Showing 2 changed files with 186 additions and 0 deletions.
68 changes: 68 additions & 0 deletions figures/pipeline/enrollment_metrics_next.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""This module updates Figures enrollment data and calculates aggregate progress
* It updates `EnrollmentData` and `LearnerCourseGradeMetrics` records
* It calculate course progress from EnrollmentData records
This generates the same metrics as the original enrollment_metrics modules,
but does it differently.
## How it differs from the previous version
This module improves on the existing enrollment metrics collection module,
`figures.pipeline.enrollment_metrics`
* It separates the activities to create and update Figures per-enrollment data
collected
* This separation lets Figures run metrics in distinct stages
* First, collect per-enrollment data
* Second, aggregate metrics based on collected data
* This provides a workflow that is easier to resume if interrupted
* This provides workflow that is simpler to debug
* This simplifies and speeds up aggregate progress metrics, collapsing complex
code into a single Django queryset aggregation
* This update lays groundwork for further metrics improvements and enhancements
such as metrics on subsets of learners in a course or progress of subsets of
learners across courses
# Developer Notes
This module provides
"""
from django.db.models import Avg
from figures.course import Course
from figures.helpers import utc_yesterday
from figures.models import EnrollmentData
from figures.sites import UnlinkedCourseError


def update_enrollment_data_for_course(course_id):
"""Updates Figures per-enrollment data for enrollments in the course
Checks for and creates new `LearnerCourseGradeMetrics` records and updates
`EnrollmentData` records
Return results are a list of the results returned by `update_enrollment_data`
"""
date_for = utc_yesterday()
the_course = Course(course_id)
if not the_course.site:
raise UnlinkedCourseError('No site found for course "{}"'.format(course_id))

# Any updated student module records? if so, then get the unique enrollments
# for each enrollment, check if LGCM is out of date or up to date
active_enrollments = the_course.enrollments_active_on_date(date_for)
return [EnrollmentData.objects.update_metrics(the_course.site, ce)
for ce in active_enrollments]


def calculate_course_progress(course_id):
"""Return average progress percentage for all enrollments in the course
"""
results = EnrollmentData.objects.filter(course_id=str(course_id)).aggregate(
average_progress=Avg('progress_percent'))

# This is a bit of a hack. When we overhaul progress data, we should really
# have None for progress if there's no data. But check how SQL AVG performs
if results['average_progress'] is None:
results['average_progress'] = 0.0
return results
118 changes: 118 additions & 0 deletions tests/pipeline/test_enrollment_metrics_next.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
"""Test next iteration to collect enrollment metrics
These tests exercies the module, `figures.pipeline.enrollment_metrics_next`.
See the module docstring for details.
"""
import pytest
from mock import patch

from django.contrib.sites.models import Site

from figures.compat import CourseEnrollment
from figures.pipeline.enrollment_metrics_next import (
update_enrollment_data_for_course,
calculate_course_progress,
)
from figures.sites import UnlinkedCourseError

from tests.factories import (
CourseEnrollmentFactory,
CourseOverviewFactory,
EnrollmentDataFactory,
)


@pytest.mark.django_db
class TestUpdateMetrics(object):
"""Tests `update_enrollment_data_for_course`
Since `figures.models.EnrollmentDataManager.update_metrics` is tested in
`tests/models/test_enrollment_data_update_metrics.py`, we can mock this
method in our tests here.
"""

@pytest.fixture(autouse=True)
def setup(self, db, settings):
self.course_overview = CourseOverviewFactory()
self.site = Site.objects.first()

def test_course_has_no_enrollments(self, monkeypatch):
"""We have a new course with no enrollments
"""
monkeypatch.setattr('figures.course.get_site_for_course', lambda val: self.site)
result = update_enrollment_data_for_course(self.course_overview.id)
assert result == []

def test_course_has_enrollments_but_no_active_for_yesterday(self, monkeypatch):
"""We have enrollments, but none were active yesterday
"""
monkeypatch.setattr('figures.course.get_site_for_course', lambda val: self.site)
[CourseEnrollmentFactory(course_id=self.course_overview.id)
for _ in range(2)]
result = update_enrollment_data_for_course(self.course_overview.id)
assert result == []

def test_course_has_active_enrollments_for_yesterday(self):
"""We have enrollments who were active yesterday
"""
expected_ce = [CourseEnrollmentFactory(course_id=self.course_overview.id)
for _ in range(2)]
ce = CourseEnrollment.objects.filter(course_id=self.course_overview.id)

def mock_update_metrics(site, ce):
return ce

with patch('figures.pipeline.enrollment_metrics_next.Course') as course_class:
with patch('figures.pipeline.enrollment_metrics_next.EnrollmentData.objects') as edm:
course_class.return_value.enrollments_active_on_date.return_value = ce
course_class.return_value.site = self.site
edm.update_metrics = mock_update_metrics
result = update_enrollment_data_for_course(self.course_overview.id)
assert set(result) == set(expected_ce)

def test_course_is_unlinked(self, monkeypatch):
"""Function should raise `UnlinkedCourseError` if there's not a site match
For Tahoe's multisite implementation, this can happen if the course's
organization is not linked to a site
For standalone sites, this should never happen
To learn more, see the `figures.sites.get_site_for_course` function.
"""
monkeypatch.setattr('figures.course.get_site_for_course', lambda val: None)
with pytest.raises(UnlinkedCourseError) as excinfo:
update_enrollment_data_for_course(self.course_overview.id)
# with patch('figures.pipeline.enrollment_metrics_next.Course') as course_class:
# course_class.return_value.site = None
# with pytest.raises(UnlinkedCourseError) as excinfo:
# update_enrollment_data_for_course(self.course_overview.id)
expected_msg = 'No site found for course "{}"'.format(str(self.course_overview.id))
assert str(excinfo.value) == expected_msg


@pytest.mark.django_db
class TestCalculateProgress(object):
"""Tests `calculate_course_progress`
"""
@pytest.fixture(autouse=True)
def setup(self, db, settings):
self.course_overview = CourseOverviewFactory()

def test_calc_course_progress_empty(self):
"""The course doesn't have any EnrollmentData records
"""
results = calculate_course_progress(self.course_overview.id)
assert results['average_progress'] == pytest.approx(0.0)

def test_calc_course_progress(self):
"""The course has EnrollmentData records
"""
some_percentages = [0.0, 25.0, 50.0]
expected_average = sum(some_percentages)/len(some_percentages)
[
EnrollmentDataFactory(course_id=str(self.course_overview.id), progress_percent=pp)
for pp in some_percentages
]
results = calculate_course_progress(self.course_overview.id)
assert results['average_progress'] == pytest.approx(expected_average)

0 comments on commit f6a177d

Please sign in to comment.