Skip to content

Commit

Permalink
[test]Add rolling update test (milvus-io#22144)
Browse files Browse the repository at this point in the history
Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>
Co-authored-by: Sheldon <chuanfeng.liu@zilliz.com>
  • Loading branch information
zhuwenxing and locustbaby committed Feb 14, 2023
1 parent e5a6d90 commit 33de788
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 8 deletions.
6 changes: 3 additions & 3 deletions tests/python_client/chaos/chaos_commons.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,17 +87,17 @@ def reconnect(connections, alias='default', timeout=360):
return connections.connect(alias)


def assert_statistic(checkers, expectations={}):
def assert_statistic(checkers, expectations={}, succ_rate_threshold=0.95, fail_rate_threshold=0.49):
for k in checkers.keys():
# expect succ if no expectations
succ_rate = checkers[k].succ_rate()
total = checkers[k].total()
average_time = checkers[k].average_time
if expectations.get(k, '') == constants.FAIL:
log.info(f"Expect Fail: {str(k)} succ rate {succ_rate}, total: {total}, average time: {average_time:.4f}")
expect(succ_rate < 0.49 or total < 2,
expect(succ_rate < fail_rate_threshold or total < 2,
f"Expect Fail: {str(k)} succ rate {succ_rate}, total: {total}, average time: {average_time:.4f}")
else:
log.info(f"Expect Succ: {str(k)} succ rate {succ_rate}, total: {total}, average time: {average_time:.4f}")
expect(succ_rate > 0.90 and total > 2,
expect(succ_rate > succ_rate_threshold and total > 2,
f"Expect Succ: {str(k)} succ rate {succ_rate}, total: {total}, average time: {average_time:.4f}")
13 changes: 10 additions & 3 deletions tests/python_client/chaos/checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,14 @@ def trace(fmt=DEFAULT_FMT, prefix='chaos-test', flag=True):
def decorate(func):
@functools.wraps(func)
def inner_wrapper(self, *args, **kwargs):
start_time = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
start_time = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S.%fZ')
t0 = time.perf_counter()
res, result = func(self, *args, **kwargs)
elapsed = time.perf_counter() - t0
end_time = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
end_time = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S.%fZ')
operation_name = func.__name__
if flag:
collection_name = self.c_wrap.name
operation_name = func.__name__
log_str = f"[{prefix}]" + fmt.format(**locals())
# TODO: add report function in this place, like uploading to influxdb
# it is better a async way to do this, in case of blocking the request processing
Expand All @@ -56,8 +56,12 @@ def inner_wrapper(self, *args, **kwargs):
self.average_time = (
elapsed + self.average_time * self._succ) / (self._succ + 1)
self._succ += 1
if len(self.fail_records) > 0 and self.fail_records[-1][0] == "failure" and \
self._succ + self._fail == self.fail_records[-1][1] + 1:
self.fail_records.append(("success", self._succ + self._fail, start_time))
else:
self._fail += 1
self.fail_records.append(("failure", self._succ + self._fail, start_time))
return res, result
return inner_wrapper
return decorate
Expand Down Expand Up @@ -91,6 +95,7 @@ class Checker:
def __init__(self, collection_name=None, shards_num=2, dim=ct.default_dim):
self._succ = 0
self._fail = 0
self.fail_records = []
self._keep_running = True
self.rsp_times = []
self.average_time = 0
Expand Down Expand Up @@ -126,6 +131,8 @@ def check_result(self):
checkers_result = f"{checker_name}, succ_rate: {succ_rate:.2f}, total: {total:03d}, average_time: {average_time:.4f}, max_time: {max_time:.4f}, min_time: {min_time:.4f}"
log.info(checkers_result)
log.info(f"{checker_name} rsp times: {self.rsp_times}")
if len(self.fail_records) > 0:
log.info(f"{checker_name} failed at {self.fail_records}")
return checkers_result

def terminate(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,6 @@ def test_operations(self, request_duration, is_check):
for k,v in self.health_checkers.items():
v.check_result()
if is_check:
assert_statistic(self.health_checkers)
assert_statistic(self.health_checkers, succ_rate_threshold=0.98)
assert_expectations()
log.info("*********************Chaos Test Completed**********************")
53 changes: 53 additions & 0 deletions tests/python_client/deploy/scripts/breakdown_rolling_update.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import psutil
import time
from loguru import logger


if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='config for rolling update process')
parser.add_argument('--wait_time', type=int, default=60, help='wait time after rolling update started')
args = parser.parse_args()
wait_time = args.wait_time
logger.info("start to watch rolling update process")
start_time = time.time()
end_time = time.time()
flag = False
while not flag and end_time - start_time < 360:
process_list = [p.info for p in psutil.process_iter(attrs=['pid', 'name','cmdline'])]
for process in process_list:
logger.debug(process)
logger.debug("##"*30)
for process in process_list:
if isinstance(process.get("cmdline", []), list):
cmdline_list = process.get("cmdline", [])
for cmdline in cmdline_list:
if "rollingUpdate.sh" in cmdline:
logger.info(f"rolling update process: {process} started")
flag = True
break
if flag:
break
time.sleep(0.5)
end_time = time.time()
if not flag:
logger.info(f"rolling update process not found, wait for {end_time - start_time} seconds")
else:
logger.info(f"rolling update process {process} found, wait for {end_time - start_time} seconds")
if flag:
logger.info(f"wait {wait_time}s to kill rolling update process")
time.sleep(wait_time)
logger.info("start to kill rolling update process")
try:
p = psutil.Process(process["pid"])
p.terminate()
logger.info(f"rolling update process: {process} killed")
except Exception as e:
logger.error(f"rolling update process: {process} kill failed, {e}")
else:
logger.info("all process info")
for process in process_list:
logger.info(process)



5 changes: 4 additions & 1 deletion tests/python_client/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,14 @@ pytest-random-order
python-benedict==0.24.3
timeout-decorator==0.5.0

# for bulk load test
# for bulk insert test
minio==7.1.5

# for benchmark
h5py==3.7.0

# for log
loguru==0.6.0

# util
psutil==5.8.0
37 changes: 37 additions & 0 deletions tests/scripts/breakdown_rolling_update.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import psutil
import time
from loguru import logger


if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='config for rolling update process')
parser.add_argument('--wait_time', type=int, default=60, help='wait time after rolling update started')
args = parser.parse_args()
wait_time = args.wait_time
logger.info("start to watch rolling update process")
start_time = time.time()
end_time = time.time()
flag = True
while flag and end_time - start_time < 360:
process_list = [p.info for p in psutil.process_iter(attrs=['pid', 'name','cmdline'])]
for process in process_list:
if isinstance(process.get("cmdline", []), list):
if "rollingUpdate.sh" in process.get("cmdline", []):
logger.info(f"rolling update process: {process} started")
flag = False
break
time.sleep(0.5)
end_time = time.time()
if flag:
logger.info(f"rolling update process not found, wait for {end_time - start_time} seconds")
logger.info(f"wait {wait_time}s to kill rolling update process")
time.sleep(wait_time)
logger.info("start to kill rolling update process")
try:
p = psutil.Process(process["pid"])
p.terminate()
logger.info(f"rolling update process: {process} killed")
except Exception as e:
logger.error(f"rolling update process: {process} kill failed, {e}")

0 comments on commit 33de788

Please sign in to comment.