-
-
Notifications
You must be signed in to change notification settings - Fork 1.6k
/
test_profiling_router.py
116 lines (99 loc) · 3.87 KB
/
test_profiling_router.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# #### What this tests ####
# # This profiles a router call to find where calls are taking the most time.
# import sys, os, time, logging
# import traceback, asyncio, uuid
# import pytest
# import cProfile
# from pstats import Stats
# sys.path.insert(
# 0, os.path.abspath("../..")
# ) # Adds the parent directory to the system path
# import litellm
# from litellm import Router
# from concurrent.futures import ThreadPoolExecutor
# from dotenv import load_dotenv
# from aiodebug import log_slow_callbacks # Import the aiodebug utility for logging slow callbacks
# load_dotenv()
# logging.basicConfig(
# level=logging.DEBUG,
# format='%(asctime)s %(levelname)s: %(message)s',
# datefmt='%I:%M:%S %p',
# filename='aiologs.log', # Name of the log file where logs will be written
# filemode='w' # 'w' to overwrite the log file on each run, use 'a' to append
# )
# model_list = [{
# "model_name": "azure-model",
# "litellm_params": {
# "model": "azure/gpt-turbo",
# "api_key": "os.environ/AZURE_FRANCE_API_KEY",
# "api_base": "https://openai-france-1234.openai.azure.com",
# "rpm": 1440,
# }
# }, {
# "model_name": "azure-model",
# "litellm_params": {
# "model": "azure/gpt-35-turbo",
# "api_key": "os.environ/AZURE_EUROPE_API_KEY",
# "api_base": "https://my-endpoint-europe-berri-992.openai.azure.com",
# "rpm": 6
# }
# }, {
# "model_name": "azure-model",
# "litellm_params": {
# "model": "azure/gpt-35-turbo",
# "api_key": "os.environ/AZURE_CANADA_API_KEY",
# "api_base": "https://my-endpoint-canada-berri992.openai.azure.com",
# "rpm": 6
# }
# }]
# router = Router(model_list=model_list, set_verbose=False, num_retries=3)
# async def router_completion():
# try:
# messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}]
# response = await router.acompletion(model="azure-model", messages=messages)
# return response
# except asyncio.exceptions.CancelledError:
# print("Task was cancelled")
# return None
# except Exception as e:
# return None
# async def loadtest_fn(n = 1000):
# start = time.time()
# tasks = [router_completion() for _ in range(n)]
# chat_completions = await asyncio.gather(*tasks)
# successful_completions = [c for c in chat_completions if c is not None]
# print(n, time.time() - start, len(successful_completions))
# # loop = asyncio.get_event_loop()
# # loop.set_debug(True)
# # log_slow_callbacks.enable(0.05) # Log callbacks slower than 0.05 seconds
# # # Excute the load testing function within the asyncio event loop
# # loop.run_until_complete(loadtest_fn())
# ### SUSTAINED LOAD TESTS ###
# import time, asyncio
# async def make_requests(n):
# tasks = [router_completion() for _ in range(n)]
# print(f"num tasks: {len(tasks)}")
# chat_completions = await asyncio.gather(*tasks)
# successful_completions = [c for c in chat_completions if c is not None]
# print(f"successful_completions: {len(successful_completions)}")
# return successful_completions
# async def main():
# start_time = time.time()
# total_successful_requests = 0
# request_limit = 1000
# batches = 2 # batches of 1k requests
# start = time.time()
# tasks = [] # list to hold all tasks
# async def request_loop():
# nonlocal tasks
# for _ in range(batches):
# # Make 1,000 requests
# task = asyncio.create_task(make_requests(request_limit))
# tasks.append(task)
# # Introduce a delay to achieve 1,000 requests per second
# await asyncio.sleep(1)
# await request_loop()
# results = await asyncio.gather(*tasks)
# total_successful_requests = sum(len(res) for res in results)
# print(request_limit*batches, time.time() - start, total_successful_requests)
# asyncio.run(main())