From c82fbdeffaa26bf099973c918a68a88e9f0d0d29 Mon Sep 17 00:00:00 2001 From: James Greenhill Date: Fri, 7 Jun 2024 19:29:17 -0700 Subject: [PATCH] chore: randomize the selection of nodes to balance the load --- housewatch/clickhouse/backups.py | 4 ++-- housewatch/clickhouse/clusters.py | 5 ++++- housewatch/settings/__init__.py | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/housewatch/clickhouse/backups.py b/housewatch/clickhouse/backups.py index 54ff9d7..3c0a181 100644 --- a/housewatch/clickhouse/backups.py +++ b/housewatch/clickhouse/backups.py @@ -30,8 +30,8 @@ def execute_backup( ): """ This function will execute a backup on each shard in a cluster - This is very similar to run_query_on_shards but it has very specific things for backups - specifically around base_backup settings + This is very similar to run_query_on_shards but it has very specific params + for backups - specifically around base_backup settings """ nodes = get_node_per_shard(cluster) responses = [] diff --git a/housewatch/clickhouse/clusters.py b/housewatch/clickhouse/clusters.py index ec12a7a..7ea8065 100644 --- a/housewatch/clickhouse/clusters.py +++ b/housewatch/clickhouse/clusters.py @@ -1,3 +1,4 @@ +import random from collections import defaultdict from housewatch.clickhouse.client import run_query @@ -38,11 +39,13 @@ def get_node_per_shard(cluster): preferred = PreferredReplica.objects.filter(cluster=cluster).values_list("replica", flat=True) for shard, n in shards.items(): preferred_replica_found = False + # shuffle the nodes so we don't always pick the first preferred one + random.shuffle(n) for node in n: if node["host_name"] in preferred: nodes.append((shard, node)) preferred_replica_found = True break if not preferred_replica_found: - nodes.append((shard, n[0])) + nodes.append((shard, random.choice(n))) return nodes diff --git a/housewatch/settings/__init__.py b/housewatch/settings/__init__.py index 54f05fe..8d2a3a3 100644 --- a/housewatch/settings/__init__.py +++ b/housewatch/settings/__init__.py @@ -26,7 +26,7 @@ sentry_sdk.init( - dsn="https://6a05afd8bf4e2d54c81833ca1ff98cca@o607503.ingest.sentry.io/4505874503237633", + dsn="https://8874d21e05d62df688505df70c9f053d@o1015702.ingest.us.sentry.io/4507393944846336", integrations=[DjangoIntegration()], # If you wish to associate users to errors (assuming you are using # django.contrib.auth) you may enable sending PII data.