-
-
Notifications
You must be signed in to change notification settings - Fork 218
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Optimize case queries in dump_domain_data #34010
base: master
Are you sure you want to change the base?
Changes from 5 commits
2f0e213
1a43fd4
ed47ec6
dd5a46e
6696b9b
885b626
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
from datetime import datetime | ||
|
||
from django.test import TestCase | ||
|
||
from corehq.apps.dump_reload.sql.filters import CaseIDFilter | ||
from corehq.form_processor.models.cases import CaseTransaction | ||
from corehq.form_processor.tests.utils import create_case | ||
from corehq.sql_db.util import get_db_aliases_for_partitioned_query | ||
|
||
|
||
class TestCaseIDFilter(TestCase): | ||
""" | ||
Given this is used in the context of dumping all data associated with a domain, it is important | ||
that all cases for a a domain are included in this filter's get_ids method. | ||
""" | ||
|
||
def test_init_raises_exception_if_used_with_model_that_does_not_foreign_key_to_case(self): | ||
with self.assertRaises(ValueError): | ||
CaseIDFilter('form_processor.XFormInstance') | ||
|
||
def test_returns_case_ids_for_domain(self): | ||
create_case('test', case_id='abc123', save=True) | ||
filter = CaseIDFilter('form_processor.CaseTransaction') | ||
case_ids = list(filter.get_ids('test', self.db_alias)) | ||
self.assertEqual(case_ids, ['abc123']) | ||
|
||
def test_does_not_return_case_ids_from_other_domain(self): | ||
create_case('test', case_id='abc123', save=True) | ||
filter = CaseIDFilter('form_processor.CaseTransaction') | ||
case_ids = list(filter.get_ids('other', self.db_alias)) | ||
self.assertEqual(case_ids, []) | ||
|
||
def test_deleted_case_ids_are_included(self): | ||
create_case('test', case_id='abc123', save=True) | ||
create_case('test', case_id='def456', save=True, deleted=True) | ||
filter = CaseIDFilter('form_processor.CaseTransaction') | ||
case_ids = list(filter.get_ids('test', self.db_alias)) | ||
self.assertCountEqual(case_ids, ['abc123', 'def456']) | ||
|
||
def test_count_correctly_counts_all_objects_related_to_case_id(self): | ||
case1 = create_case('test', case_id='abc123', save=True) | ||
CaseTransaction.objects.partitioned_query(case1.case_id).create( | ||
case=case1, server_date=datetime.utcnow(), type=1 | ||
) | ||
filter = CaseIDFilter('form_processor.CaseTransaction') | ||
count = filter.count('test') | ||
self.assertEqual(count, 2) | ||
|
||
def test_count_includes_deleted_cases(self): | ||
create_case('test', case_id='abc123', save=True) | ||
create_case('test', case_id='def456', save=True, deleted=True) | ||
filter = CaseIDFilter('form_processor.CaseTransaction') | ||
count = filter.count('test') | ||
self.assertEqual(count, 2) | ||
|
||
@classmethod | ||
def setUpClass(cls): | ||
super().setUpClass() | ||
cls.db_alias = get_db_aliases_for_partitioned_query()[0] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,45 +5,82 @@ | |
|
||
|
||
class TestQuerysetToIterator(TestCase): | ||
@classmethod | ||
def setUpClass(cls): | ||
super().setUpClass() | ||
cls.users = [ | ||
User.objects.create_user(f'user{i}@example.com', last_name="Tenenbaum") | ||
for i in range(1, 11) | ||
] | ||
|
||
@classmethod | ||
def tearDownClass(cls): | ||
for user in cls.users: | ||
user.delete() | ||
super().tearDownClass() | ||
def test_correct_results_are_returned(self): | ||
query = User.objects.filter(last_name="Tenenbaum") | ||
|
||
results = list(queryset_to_iterator(query, User, limit=10)) | ||
|
||
self.assertEqual( | ||
[u.username for u in results], | ||
[u.username for u in self.users], | ||
) | ||
|
||
def test_results_returned_in_one_query_if_limit_is_greater_than_result_size(self): | ||
query = User.objects.filter(last_name="Tenenbaum") | ||
|
||
with self.assertNumQueries(1): | ||
# query 1: Users 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 | ||
results = list(queryset_to_iterator(query, User, limit=11)) | ||
|
||
self.assertEqual(len(results), 10) | ||
|
||
def test_results_returned_in_two_queries_if_limit_is_equal_to_result_size(self): | ||
query = User.objects.filter(last_name="Tenenbaum") | ||
|
||
def test_queryset_to_iterator(self): | ||
with self.assertNumQueries(2): | ||
# query 1: Users 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 | ||
# query 2: Check that there are no users past #10 | ||
results = list(queryset_to_iterator(query, User, limit=10)) | ||
|
||
self.assertEqual(len(results), 10) | ||
|
||
def test_results_return_in_three_queries_if_limit_is_less_than_or_equal_to_half_of_result_size(self): | ||
query = User.objects.filter(last_name="Tenenbaum") | ||
self.assertEqual(query.count(), 10) | ||
|
||
with self.assertNumQueries(4): | ||
with self.assertNumQueries(3): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @esoergel given this behavior was defined in tests as well, I wanted to clarify if it was intentional. Basically, is there a concern that breaking the pagination loop when the # of docs returned in for a page is less than the limit set could lead to prematurely exiting pagination? This maps to the addition of:
in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (also I assumed you added this based on commit history, but I admittedly did not look at it very hard so if you don't have context, totally fine) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah I think I added this - that does seem reasonable, not sure why I didn't do that in the first place. Reviewing this now, my initial thought was that the dataset can change while the query is executing, which could cause weirdness, but the limit is part of that last query, so the number of results returned should be a valid data point in determining whether there are any remaining. |
||
# query 1: Users 1, 2, 3, 4 | ||
# query 2: Users 5, 6, 7, 8 | ||
# query 3: Users 9, 10 | ||
# query 4: Check that there are no users past #10 | ||
all_users = list(queryset_to_iterator(query, User, limit=4)) | ||
results = list(queryset_to_iterator(query, User, limit=4)) | ||
|
||
self.assertEqual( | ||
[u.username for u in all_users], | ||
[u.username for u in self.users], | ||
) | ||
self.assertEqual(len(results), 10) | ||
|
||
def test_ordered_queryset(self): | ||
def test_ordered_queryset_raises_assertion_error_when_ignore_ordering_is_false(self): | ||
query = User.objects.filter(last_name="Tenenbaum").order_by('username') | ||
|
||
with self.assertRaises(AssertionError): | ||
# ignore_ordering defaults to False | ||
list(queryset_to_iterator(query, User, limit=4)) | ||
|
||
def test_ordered_queryset_ignored(self): | ||
def test_ordered_queryset_does_not_raise_assertion_error_when_ignore_ordering_is_true(self): | ||
query = User.objects.filter(last_name="Tenenbaum").order_by('username') | ||
all_users = list(queryset_to_iterator(query, User, limit=4, ignore_ordering=True)) | ||
# test succeeds is AssertionError is not raised | ||
list(queryset_to_iterator(query, User, limit=4, ignore_ordering=True)) | ||
|
||
def test_results_ordered_by_pagination_key_when_paginate_by_is_defined(self): | ||
query = User.objects.filter(last_name="Tenenbaum") | ||
|
||
results = list(queryset_to_iterator(query, User, limit=4, paginate_by={"username": "gt"})) | ||
|
||
self.assertEqual( | ||
[u.username for u in all_users], | ||
[u.username for u in self.users], | ||
) | ||
[u.username for u in results], | ||
['alice-user4@example.com', | ||
'alice-user8@example.com', | ||
'bob-user1@example.com', | ||
'bob-user5@example.com', | ||
'bob-user9@example.com', | ||
'jane-user3@example.com', | ||
'jane-user7@example.com', | ||
'john-user10@example.com', | ||
'john-user2@example.com', | ||
'john-user6@example.com']) | ||
|
||
@classmethod | ||
def setUpClass(cls): | ||
super().setUpClass() | ||
first_names = ['alice', 'bob', 'john', 'jane'] | ||
cls.users = [ | ||
User.objects.create_user(f'{first_names[i % 4]}-user{i}@example.com', last_name="Tenenbaum") | ||
for i in range(1, 11) | ||
] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why does
db_alias
not have a default here while it does for some of the other filters?