Skip to content

Commit

Permalink
Merge pull request #34979 from dimagi/mjr/enterprise_form_limits
Browse files Browse the repository at this point in the history
Added form limits for enterprise form reports
  • Loading branch information
mjriley authored Aug 20, 2024
2 parents c7b731d + a11738e commit 37c0956
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 10 deletions.
55 changes: 46 additions & 9 deletions corehq/apps/enterprise/enterprise.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@

from django.utils.translation import gettext as _
from django.utils.translation import gettext_lazy
from django.conf import settings

from memoized import memoized

from couchforms.analytics import get_last_form_submission_received
from dimagi.utils.dates import DateSpan

from corehq.apps.enterprise.exceptions import EnterpriseReportError
from corehq.apps.enterprise.exceptions import EnterpriseReportError, TooMuchRequestedDataError
from corehq.apps.enterprise.iterators import raise_after_max_elements
from corehq.apps.accounting.models import BillingAccount
from corehq.apps.accounting.utils import get_default_domain_url
from corehq.apps.app_manager.dbaccessors import get_brief_apps_in_domain
Expand Down Expand Up @@ -209,6 +211,8 @@ def total_for_domain(self, domain_obj):

class EnterpriseFormReport(EnterpriseReport):
title = _('Mobile Form Submissions')
MAXIMUM_USERS_PER_DOMAIN = getattr(settings, 'ENTERPRISE_REPORT_DOMAIN_USER_LIMIT', 20_000)
MAXIMUM_ROWS_PER_REQUEST = getattr(settings, 'ENTERPRISE_REPORT_ROW_LIMIT', 1_000_000)

def __init__(self, account, couch_user, start_date=None, end_date=None, num_days=30, include_form_id=False):
super().__init__(account, couch_user)
Expand Down Expand Up @@ -245,23 +249,56 @@ def headers(self):
def _query(self, domain_name):
time_filter = form_es.submitted

users_filter = form_es.user_id(UserES().domain(domain_name).mobile_users().show_inactive()
.values_list('_id', flat=True))
users_filter = form_es.user_id(
UserES().domain(domain_name).mobile_users().show_inactive().size(self.MAXIMUM_USERS_PER_DOMAIN + 1)
.values_list('_id', flat=True)
)

query = (form_es.FormES()
.domain(domain_name)
.filter(time_filter(gte=self.datespan.startdate,
lt=self.datespan.enddate_adjusted))
.filter(users_filter))
if len(users_filter) > self.MAXIMUM_USERS_PER_DOMAIN:
raise TooMuchRequestedDataError(
_('Domain {name} has too many users. Maximum allowed is: {amount}')
.format(name=domain_name, amount=self.MAXIMUM_USERS_PER_DOMAIN)
)

query = (
form_es.FormES()
.domain(domain_name)
.filter(time_filter(gte=self.datespan.startdate, lt=self.datespan.enddate_adjusted))
.filter(users_filter)
)
return query

def hits(self, domain_name):
return self._query(domain_name).run().hits
return raise_after_max_elements(
self._query(domain_name).scroll(),
self.MAXIMUM_ROWS_PER_REQUEST,
self._generate_data_error()
)

def _generate_data_error(self):
return TooMuchRequestedDataError(
_('{name} contains too many rows. Maximum allowed is: {amount}. Please narrow the date range'
' to fetch a smaller amount of data').format(
name=self.account.name, amount=self.MAXIMUM_ROWS_PER_REQUEST)
)

@property
def rows(self):
total_rows = 0
rows = []
for domain_obj in self.domains():
domain_rows = self.rows_for_domain(domain_obj)
total_rows += len(domain_rows)
if total_rows > self.MAXIMUM_ROWS_PER_REQUEST:
raise self._generate_data_error()
rows += domain_rows
return rows

def rows_for_domain(self, domain_obj):
apps = get_brief_apps_in_domain(domain_obj.name)
apps = {a.id: a.name for a in apps}
rows = []

for hit in self.hits(domain_obj.name):
if hit['form'].get('#type') == 'system':
continue
Expand Down
4 changes: 4 additions & 0 deletions corehq/apps/enterprise/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
class EnterpriseReportError(Exception):
pass


class TooMuchRequestedDataError(Exception):
pass
7 changes: 7 additions & 0 deletions corehq/apps/enterprise/iterators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
def raise_after_max_elements(it, max_elements, exception=None):
for total_yielded, ele in enumerate(it):
if total_yielded >= max_elements:
exception = exception or Exception('Too Many Elements')
raise exception

yield ele
15 changes: 14 additions & 1 deletion corehq/apps/enterprise/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
EnterpriseMobileWorkerSettings,
EnterprisePermissions,
)
from corehq.apps.enterprise.exceptions import TooMuchRequestedDataError
from corehq.apps.hqwebapp.tasks import send_html_email_async
from corehq.apps.users.models import DeactivateMobileWorkerTrigger
from corehq.const import ONE_DAY
Expand All @@ -32,7 +33,19 @@ def email_enterprise_report(domain: str, slug, couch_user):
csv_file = io.StringIO()
writer = csv.writer(csv_file)
writer.writerow(report.headers)
writer.writerows(report.rows)
try:
writer.writerows(report.rows)
except TooMuchRequestedDataError as e:
subject = _("Enterprise Dashboard Error: {}").format(report.title)
body = str(e)
send_html_email_async(
subject,
couch_user.get_email(),
body,
domain=domain,
use_domain_gateway=True,
)
return

# Store file in redis
hash_id = uuid.uuid4().hex
Expand Down
19 changes: 19 additions & 0 deletions corehq/apps/enterprise/tests/test_iterators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from django.test import SimpleTestCase

from corehq.apps.enterprise.iterators import raise_after_max_elements


class TestRaiseAfterMaxElements(SimpleTestCase):
def test_iterating_beyond_max_items_will_raise_the_default_exception(self):
it = raise_after_max_elements([1, 2, 3], 2)
with self.assertRaisesMessage(Exception, 'Too Many Elements'):
list(it)

def test_iterating_beyond_max_items_will_raise_provided_exception(self):
it = raise_after_max_elements([1, 2, 3], 2, Exception('Test Message'))
with self.assertRaisesMessage(Exception, 'Test Message'):
list(it)

def test_can_iterate_through_all_elements_with_no_exception(self):
it = raise_after_max_elements([1, 2, 3], 3)
self.assertEqual(list(it), [1, 2, 3])

0 comments on commit 37c0956

Please sign in to comment.