From 2445577d78130aa498191f32014e23585d2b8a00 Mon Sep 17 00:00:00 2001 From: Alexander Kozlovsky Date: Thu, 4 Apr 2024 04:08:26 +0200 Subject: [PATCH] Fix slow database queries, by using partial index `idx_torrentstate__last_check__partial` --- .../components/metadata_store/db/store.py | 9 ++++---- .../tests/test_torrentchecker.py | 2 +- .../torrent_checker/torrent_checker.py | 21 ++++++++++++------- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/src/tribler/core/components/metadata_store/db/store.py b/src/tribler/core/components/metadata_store/db/store.py index b8addf25816..54ce0c21713 100644 --- a/src/tribler/core/components/metadata_store/db/store.py +++ b/src/tribler/core/components/metadata_store/db/store.py @@ -690,9 +690,9 @@ def get_entries_query( t = time() - POPULAR_TORRENTS_FRESHNESS_PERIOD health_list = list( select( - health - for health in self.TorrentState - if health.last_check >= t and (health.seeders > 0 or health.leechers > 0) + health for health in self.TorrentState + if health.has_data == 1 # The condition had to be written this way for the partial index to work + and health.last_check >= t and (health.seeders > 0 or health.leechers > 0) ).order_by( lambda health: (desc(health.seeders), desc(health.leechers), desc(health.last_check)) )[:POPULAR_TORRENTS_COUNT] @@ -748,7 +748,8 @@ def get_entries_query( ) if health_checked_after is not None: - pony_query = pony_query.where(lambda g: g.health.last_check >= health_checked_after) + pony_query = pony_query.where(lambda g: g.health.has_data == 1 # Has to be written this way for index + and g.health.last_check >= health_checked_after) # Sort the query pony_query = pony_query.sort_by("desc(g.rowid)" if sort_desc else "g.rowid") diff --git a/src/tribler/core/components/torrent_checker/torrent_checker/tests/test_torrentchecker.py b/src/tribler/core/components/torrent_checker/torrent_checker/tests/test_torrentchecker.py index a72837e5845..f596489a69b 100644 --- a/src/tribler/core/components/torrent_checker/torrent_checker/tests/test_torrentchecker.py +++ b/src/tribler/core/components/torrent_checker/torrent_checker/tests/test_torrentchecker.py @@ -173,7 +173,7 @@ async def test_check_random_tracker_not_alive(torrent_checker): async def test_task_select_tracker(torrent_checker): with db_session: tracker = torrent_checker.mds.TrackerState(url="http://localhost/tracker") - torrent_checker.mds.TorrentState(infohash=b'a' * 20, seeders=5, leechers=10, trackers={tracker}) + torrent_checker.mds.TorrentState(infohash=b'a' * 20, seeders=5, leechers=10, trackers={tracker}, last_check=1) controlled_session = HttpTrackerSession("127.0.0.1", ("localhost", 8475), "/announce", 5, None) controlled_session.connect_to_tracker = lambda: succeed(None) diff --git a/src/tribler/core/components/torrent_checker/torrent_checker/torrent_checker.py b/src/tribler/core/components/torrent_checker/torrent_checker/torrent_checker.py index 0125c2c08a2..15a8f01c7de 100644 --- a/src/tribler/core/components/torrent_checker/torrent_checker/torrent_checker.py +++ b/src/tribler/core/components/torrent_checker/torrent_checker/torrent_checker.py @@ -124,7 +124,9 @@ async def check_random_tracker(self): url = tracker.url with db_session: dynamic_interval = TORRENT_CHECK_RETRY_INTERVAL * (2 ** tracker.failures) - torrents = select(ts for ts in tracker.torrents if ts.last_check + dynamic_interval < int(time.time())) + torrents = select(ts for ts in tracker.torrents + if ts.has_data == 1 # The condition had to be written this way for the index to work + and ts.last_check + dynamic_interval < int(time.time())) infohashes = [t.infohash for t in torrents[:MAX_TORRENTS_CHECKED_PER_SESSION]] if len(infohashes) == 0: @@ -198,8 +200,8 @@ def load_torrents_checked_from_db(self) -> Dict[bytes, HealthInfo]: now = int(time.time()) last_fresh_time = now - HEALTH_FRESHNESS_SECONDS checked_torrents = list(self.mds.TorrentState - .select(lambda g: g.has_data and g.self_checked - and between(g.last_check, last_fresh_time, now)) + .select(lambda g: g.has_data == 1 # Had to be written this way for index to work + and g.self_checked and between(g.last_check, last_fresh_time, now)) .order_by(lambda g: (desc(g.seeders), g.last_check)) .limit(TORRENTS_CHECKED_RETURN_SIZE)) @@ -223,11 +225,15 @@ def torrents_to_check(self): By old torrents, we refer to those checked quite farther in the past, sorted by the last_check value. """ last_fresh_time = time.time() - HEALTH_FRESHNESS_SECONDS - popular_torrents = list(self.mds.TorrentState.select(lambda g: g.last_check < last_fresh_time). - order_by(lambda g: (desc(g.seeders), g.last_check)).limit(TORRENT_SELECTION_POOL_SIZE)) + popular_torrents = list(self.mds.TorrentState.select( + lambda g: g.has_data == 1 # The condition had to be written this way for the partial index to work + and g.last_check < last_fresh_time + ).order_by(lambda g: (desc(g.seeders), g.last_check)).limit(TORRENT_SELECTION_POOL_SIZE)) - old_torrents = list(self.mds.TorrentState.select(lambda g: g.last_check < last_fresh_time). - order_by(lambda g: (g.last_check, desc(g.seeders))).limit(TORRENT_SELECTION_POOL_SIZE)) + old_torrents = list(self.mds.TorrentState.select( + lambda g: g.has_data == 1 # The condition had to be written this way for the partial index to work + and g.last_check < last_fresh_time + ).order_by(lambda g: (g.last_check, desc(g.seeders))).limit(TORRENT_SELECTION_POOL_SIZE)) selected_torrents = popular_torrents + old_torrents selected_torrents = random.sample(selected_torrents, min(TORRENT_SELECTION_POOL_SIZE, len(selected_torrents))) @@ -254,6 +260,7 @@ def torrents_to_check_in_user_channel(self): channel_torrents = list(self.mds.TorrentMetadata.select( lambda g: g.public_key == self.mds.my_public_key_bin and g.metadata_type == REGULAR_TORRENT + and g.health.has_data == 1 # The condition had to be written this way for the index to work and g.health.last_check < last_fresh_time) .order_by(lambda g: g.health.last_check) .limit(USER_CHANNEL_TORRENT_SELECTION_POOL_SIZE))