-
Notifications
You must be signed in to change notification settings - Fork 280
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Metrics Monitoring: A minimal viable product #348
Changes from 7 commits
6d5f9e2
439d9f8
3166bc0
8fbba2f
7947a5f
79f3145
e1c7227
fd3b078
bc55071
5efc602
d86d79a
3ffd51b
21b3926
ceb6065
0922c3d
ebf2df6
ccff8ae
6756b18
92d2c01
4fc5c10
d84be8d
9e955b3
457f792
808935b
5816ca7
26e0024
a93606d
3dd8298
56425ae
e2d8f46
086e305
87a67e2
b89cbae
aa4b7c8
dd9d2e5
b218000
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
import yaml | ||
import requests | ||
import random | ||
import os | ||
|
||
|
||
def ensure_clipper_tmp(): | ||
""" | ||
Make sure /tmp/clipper directory exist. If not, make one. | ||
:return: None | ||
""" | ||
try: | ||
os.makedirs('/tmp/clipper') | ||
except OSError as e: | ||
# Equivalent to os.makedirs(., exist_ok=True) in py3 | ||
pass | ||
|
||
|
||
def get_prometheus_base_config(): | ||
""" | ||
Generate a basic configuration dictionary for prometheus | ||
:return: dictionary | ||
""" | ||
conf = dict() | ||
conf['global'] = {'evaluation_interval': '5s', 'scrape_interval': '5s'} | ||
conf['scrape_configs'] = [] | ||
return conf | ||
|
||
|
||
def run_query_frontend_metric_image(name, docker_client, query_name, | ||
common_labels, extra_container_kwargs): | ||
""" | ||
Use docker_client to run a frontend-exporter image. | ||
:param name: Name to pass in, need to be unique. | ||
:param docker_client: The docker_client object. | ||
:param query_name: The corresponding frontend name | ||
:param common_labels: Labels to pass in. | ||
:param extra_container_kwargs: Kwargs to pass in. | ||
:return: None | ||
""" | ||
|
||
query_frontend_metric_cmd = "--query_frontend_name {}".format(query_name) | ||
query_frontend_metric_labels = common_labels.copy() | ||
|
||
docker_client.containers.run( | ||
"clipper/frontend-exporter", | ||
query_frontend_metric_cmd, | ||
name=name, | ||
labels=query_frontend_metric_labels, | ||
**extra_container_kwargs) | ||
|
||
|
||
def setup_metric_config(query_frontend_metric_name, | ||
CLIPPER_INTERNAL_METRIC_PORT): | ||
""" | ||
Write to file prometheus.yml after frontend-metric is setup. | ||
:param query_frontend_metric_name: Corresponding image name | ||
:param CLIPPER_INTERNAL_METRIC_PORT: Default port. | ||
:return: None | ||
""" | ||
|
||
ensure_clipper_tmp() | ||
|
||
with open('/tmp/clipper/prometheus.yml', 'w') as f: | ||
prom_config = get_prometheus_base_config() | ||
prom_config_query_frontend = { | ||
'job_name': | ||
'query', | ||
'static_configs': [{ | ||
'targets': [ | ||
'{name}:{port}'.format( | ||
name=query_frontend_metric_name, | ||
port=CLIPPER_INTERNAL_METRIC_PORT) | ||
] | ||
}] | ||
} | ||
prom_config['scrape_configs'].append(prom_config_query_frontend) | ||
|
||
yaml.dump(prom_config, f) | ||
|
||
|
||
def run_metric_image(docker_client, common_labels, extra_container_kwargs): | ||
""" | ||
Run the prometheus image. | ||
:param docker_client: The docker client object | ||
:param common_labels: Labels to pass in | ||
:param extra_container_kwargs: Kwargs to pass in. | ||
:return: None | ||
""" | ||
|
||
metric_cmd = [ | ||
"--config.file=/etc/prometheus/prometheus.yml", | ||
"--storage.tsdb.path=/prometheus", | ||
"--web.console.libraries=/etc/prometheus/console_libraries", | ||
"--web.console.templates=/etc/prometheus/consoles", | ||
"--web.enable-lifecycle" | ||
] | ||
metric_labels = common_labels.copy() | ||
docker_client.containers.run( | ||
"prom/prometheus", | ||
metric_cmd, | ||
name="metric_frontend-{}".format(random.randint(0, 100000)), | ||
ports={'9090/tcp': 9090}, | ||
volumes={ | ||
'/tmp/clipper/prometheus.yml': { | ||
'bind': '/etc/prometheus/prometheus.yml', | ||
'mode': 'ro' | ||
} | ||
}, | ||
labels=metric_labels, | ||
**extra_container_kwargs) | ||
|
||
|
||
def update_metric_config(model_container_name, CLIPPER_INTERNAL_METRIC_PORT): | ||
""" | ||
Update the prometheus.yml configuration file. | ||
:param model_container_name: New model container_name, need to be unique. | ||
:param CLIPPER_INTERNAL_METRIC_PORT: Default port | ||
:return: None | ||
""" | ||
with open('/tmp/clipper/prometheus.yml', 'r') as f: | ||
conf = yaml.load(f) | ||
|
||
new_job_dict = { | ||
'job_name': | ||
'{}'.format(model_container_name), | ||
'static_configs': [{ | ||
'targets': [ | ||
'{name}:{port}'.format( | ||
name=model_container_name, | ||
port=CLIPPER_INTERNAL_METRIC_PORT) | ||
] | ||
}] | ||
} | ||
conf['scrape_configs'].append(new_job_dict) | ||
|
||
with open('/tmp/clipper/prometheus.yml', 'w') as f: | ||
yaml.dump(conf, f) | ||
|
||
requests.post('http://localhost:9090/-/reload') |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,3 +5,4 @@ docker==2.5.1 | |
kubernetes==3.0.0 | ||
six==1.10.0 | ||
mock | ||
prometheus_client |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,6 +31,7 @@ | |
'pyyaml', | ||
'docker', | ||
'kubernetes', | ||
'prometheus_client', | ||
'six', | ||
], | ||
extras_require={ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
import requests | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The containers directory is for stuff related to model containers. Can you create a separate monitoring directory ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you also put a short README in the monitoring directory that provides instructions on how to access the Prometheus server once it is up? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Got it! This is a much better idea. |
||
from flatten_json import flatten | ||
import itertools | ||
import time | ||
from prometheus_client import start_http_server | ||
from prometheus_client.core import GaugeMetricFamily, REGISTRY | ||
import argparse | ||
|
||
parser = argparse.ArgumentParser( | ||
description='Spin up a node exporter for query_frontend.') | ||
parser.add_argument( | ||
'--query_frontend_name', | ||
metavar='str', | ||
type=str, | ||
required=True, | ||
help='The name of docker container in clipper_network') | ||
args = parser.parse_args() | ||
|
||
query_frontend_id = args.query_frontend_name | ||
|
||
ADDRESS = 'http://{}:1337/metrics'.format(query_frontend_id) #Sub with name | ||
|
||
|
||
def load_metric(): | ||
res = requests.get(ADDRESS) | ||
return res.json() | ||
|
||
|
||
def multi_dict_unpacking(lst): | ||
""" | ||
Receive a list of dictionaries, join them into one big dictionary | ||
""" | ||
result = {} | ||
for d in lst: | ||
result = {**result, **d} | ||
return result | ||
|
||
|
||
def parse_metric(metrics): | ||
wo_type = list(itertools.chain.from_iterable(metrics.values())) | ||
wo_type_flattened = list(itertools.chain([flatten(d) for d in wo_type])) | ||
wo_type_joined = multi_dict_unpacking(wo_type_flattened) | ||
return wo_type_joined | ||
|
||
|
||
class ClipperCollector(object): | ||
def __init__(self): | ||
pass | ||
|
||
def collect(self): | ||
metrics = parse_metric(load_metric()) | ||
|
||
for name, val in metrics.items(): | ||
try: | ||
if '.' or 'e' in val: | ||
val = float(val) | ||
else: | ||
val = int(val) | ||
name = name.replace(':', '_').replace('-', '_') | ||
yield GaugeMetricFamily(name, 'help', value=val) | ||
except ValueError: | ||
pass | ||
|
||
|
||
if __name__ == '__main__': | ||
REGISTRY.register(ClipperCollector()) | ||
start_http_server(1390) | ||
while True: | ||
time.sleep(1) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's use the same random integer as the
query_frontend_name
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.