This repository has been archived by the owner on Mar 10, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
393 lines (326 loc) · 16.2 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
#!/usr/bin/env python3
# Lint as: python3
"""Placeholder main
This will monitor some wpt jobs eventually
"""
import json
import logging
import sys
import time
from collections import defaultdict
from datetime import datetime
from datetime import timezone
import flask
import requests
from dateutil import parser as date_parser
from github import Github
from google.api_core import exceptions as g_exceptions
from google.cloud import firestore
from google.cloud import monitoring_v3
logging.basicConfig(level=logging.DEBUG)
app = flask.Flask(__name__)
GCLOUD_PROJECT_ID = "wptmon"
DESCRIPTOR_PROJECT_NAME = "projects/wptmon"
RUNS_REQ_TIMEOUT_SECONDS = 5
RUNS_MAX_AGE_SECONDS = 24 * 60 * 60 # 1 day
OPEN_PRS_MAX_AGE_SECONDS = 30 * 24 * 60 * 60 # 30 days
ERROR_RESULT = -1
RECENT_STABLE_RUNS_FETCH_URL = "https://wpt.fyi/api/runs?labels=master,stable&products=chrome,firefox,safari&max-count=1"
RECENT_EXPERIMENTAL_RUNS_FETCH_URL = "https://wpt.fyi/api/runs?labels=master,experimental&products=chrome,edge,firefox,safari,webkitgtk&max-count=1"
METRIC_TYPE_RECENT_STABLE_RUNS = "custom.googleapis.com/wpt.fyi/recent_stable_runs"
METRIC_TYPE_RECENT_EXPERIMENTAL_RUNS = "custom.googleapis.com/wpt.fyi/recent_experimental_runs"
METRIC_TYPE_OPEN_PRS = "custom.googleapis.com/wpt-github/open_prs"
METRIC_TYPE_OPEN_PR_STATUSES = "custom.googleapis.com/wpt-github/open_pr_status"
METRIC_TYPE_OPEN_PR_NO_CHECKS = "custom.googleapis.com/wpt-github/open_pr_no_checks"
class MonitorState():
"""This is a state object for holding artifacts during metrics gathering.
Many of the APIs we call to get metrics data require HTTP calls or
heavy-weight processing, so it's useful to be able to save intermediate
data from processing one metric to later use for processing another metric.
"""
def __init__(self):
# The github.PaginatedList.PaginatedList object containing the set of open
# pull requests from github. Note that accessing this issues more HTTP
# requests.
self.github_open_prs = None
def _get_github_token():
db = firestore.Client()
doc_ref = db.collection("config").document("github").get()
if doc_ref.exists:
return doc_ref.to_dict()["gh_token"]
return None
def _create_metric_recent_runs(type, description):
"""Creates a recent runs metric (either stable or experimental).
It's OK to repeatedly create the same metric as long as none of the parameters
change. In such a case, re-creation is a no-op. If anything does change then
re-creation is an error (because we use the same name), and we have to delete
the old metric before creating it again.
:arg type: string descriptor type for the metric
:arg description: string human-readable description of what the metric is
"""
client = monitoring_v3.MetricServiceClient()
descriptor = monitoring_v3.types.MetricDescriptor()
descriptor.type = type
descriptor.value_type = monitoring_v3.enums.MetricDescriptor.ValueType.INT64
descriptor.metric_kind = monitoring_v3.enums.MetricDescriptor.MetricKind.GAUGE
descriptor.description = description
client.create_metric_descriptor(DESCRIPTOR_PROJECT_NAME, descriptor)
def _write_metric_recent_runs(type, browser_name, num_recent_runs, ):
"""Updates a 'recent runs' metric (stable or experimental).
:arg type: string descriptor type for the metric
:arg browser_name: string name of the browser, added as a label for the metric
:arg num_recent_runs: int the number of recent runs
"""
client = monitoring_v3.MetricServiceClient()
series = monitoring_v3.types.TimeSeries()
series.metric.type = type
series.resource.type = "generic_task"
series.resource.labels["namespace"] = "wpt"
series.resource.labels["location"] = "us-east1"
series.resource.labels["job"] = "wpt.fyi"
# We stick the browser name inside the task ID. Unfortunately we can't modify
# the set of labels that is available on the MonitoredResource, nor can we
# define a custom one.
series.resource.labels["task_id"] = browser_name
point = series.points.add()
point.value.int64_value = num_recent_runs
point.interval.end_time.seconds = int(time.time())
client.create_time_series(DESCRIPTOR_PROJECT_NAME, [series])
def create_and_write_metric_open_prs(num_open_prs):
client = monitoring_v3.MetricServiceClient()
descriptor = monitoring_v3.types.MetricDescriptor()
descriptor.type = METRIC_TYPE_OPEN_PRS
descriptor.value_type = monitoring_v3.enums.MetricDescriptor.ValueType.INT64
descriptor.metric_kind = monitoring_v3.enums.MetricDescriptor.MetricKind.GAUGE
descriptor.description = "Number of open Github pull requests in the WPT repository"
client.create_metric_descriptor(DESCRIPTOR_PROJECT_NAME, descriptor)
series = monitoring_v3.types.TimeSeries()
series.metric.type = METRIC_TYPE_OPEN_PRS
series.resource.type = "generic_task"
series.resource.labels["namespace"] = "wpt"
series.resource.labels["location"] = "us-east1"
series.resource.labels["job"] = "wpt-github"
series.resource.labels["task_id"] = "wpt-github"
point = series.points.add()
point.value.int64_value = num_open_prs
point.interval.end_time.seconds = int(time.time())
client.create_time_series(DESCRIPTOR_PROJECT_NAME, [series])
def create_and_write_metric_open_pr_no_checks(num_prs_with_no_checks, is_draft):
client = monitoring_v3.MetricServiceClient()
descriptor = monitoring_v3.types.MetricDescriptor()
descriptor.type = METRIC_TYPE_OPEN_PR_NO_CHECKS
descriptor.value_type = monitoring_v3.enums.MetricDescriptor.ValueType.INT64
descriptor.metric_kind = monitoring_v3.enums.MetricDescriptor.MetricKind.GAUGE
descriptor.description = "Number of open Github pull requests with no checks"
client.create_metric_descriptor(DESCRIPTOR_PROJECT_NAME, descriptor)
series = monitoring_v3.types.TimeSeries()
series.metric.type = METRIC_TYPE_OPEN_PR_NO_CHECKS
series.resource.type = "generic_task"
series.resource.labels["namespace"] = "wpt"
series.resource.labels["location"] = "us-east1"
series.resource.labels["job"] = "wpt-github"
series.resource.labels["task_id"] = "draft_pr" if is_draft else "pr"
point = series.points.add()
point.value.int64_value = num_prs_with_no_checks
point.interval.end_time.seconds = int(time.time())
client.create_time_series(DESCRIPTOR_PROJECT_NAME, [series])
def create_metric_open_pr_statuses():
client = monitoring_v3.MetricServiceClient()
descriptor = monitoring_v3.types.MetricDescriptor()
descriptor.type = METRIC_TYPE_OPEN_PR_STATUSES
descriptor.value_type = monitoring_v3.enums.MetricDescriptor.ValueType.INT64
descriptor.metric_kind = monitoring_v3.enums.MetricDescriptor.MetricKind.GAUGE
descriptor.description = "Check statuses for open pull requests in the WPT repository"
client.create_metric_descriptor(DESCRIPTOR_PROJECT_NAME, descriptor)
def write_metric_open_pr_statuses(check_name, check_status, count):
"""Writes a single point to the 'open pr statuses' metric.
:arg check_name: str the name of the check that ran on the PR
:arg check_status: str the status of the check (eg: success, failure, error)
:arg count: int the number of PRs that had this check with this status.
"""
client = monitoring_v3.MetricServiceClient()
series = monitoring_v3.types.TimeSeries()
series.metric.type = METRIC_TYPE_OPEN_PR_STATUSES
series.resource.type = "generic_task"
series.resource.labels["namespace"] = "wpt"
series.resource.labels["location"] = "us-east1"
# We are mis-using labels here because cloud monitoring won't let us define
# our own labels.
series.resource.labels["job"] = check_name
series.resource.labels["task_id"] = check_status
point = series.points.add()
point.value.int64_value = count
point.interval.end_time.seconds = int(time.time())
client.create_time_series(DESCRIPTOR_PROJECT_NAME, [series])
def create_metric_recent_stable_runs():
"""This creates a metric for tracking recent stable runs."""
_create_metric_recent_runs(METRIC_TYPE_RECENT_STABLE_RUNS,
"Number of recent stable runs on wpt.fyi")
def create_metric_recent_experimental_runs():
"""This creates a metric for tracking recent experimental runs."""
_create_metric_recent_runs(METRIC_TYPE_RECENT_EXPERIMENTAL_RUNS,
"Number of recent experimental runs on wpt.fyi")
def write_metric_recent_stable_runs(browser_name, num_stable_runs):
"""Updates the 'recent stable runs' metric.
Sets the number of recent stable runs for the specified browser.
"""
_write_metric_recent_runs(METRIC_TYPE_RECENT_STABLE_RUNS, browser_name,
num_stable_runs)
def write_metric_recent_experimental_runs(browser_name, num_experimental_runs):
"""Updates the 'recent experimental runs' metric.
Sets the the number of recent experimental runs for the specified browser.
"""
_write_metric_recent_runs(METRIC_TYPE_RECENT_EXPERIMENTAL_RUNS,
browser_name, num_experimental_runs)
def _get_recent_runs(fetch_url, max_age_seconds):
"""Get a list of latest runs and identify which browsers have recent runs.
:arg fetch_url: string url where to fetch the list of runs
:arg max_age: int the age, in seconds, of the oldest run that is considered recent.
:return list of strings containing the browser names with a recent run, or None
if recent runs could not be fetched.
"""
try:
fyi_runs = requests.get(fetch_url, timeout=RUNS_REQ_TIMEOUT_SECONDS)
# This will raise an exception if there was an error code in the response,
# or do nothing if the request was successful.
fyi_runs.raise_for_status()
except requests.exceptions.RequestException as e:
logging.error("Failed to fetch runs, e=%s" % e)
return None
try:
if not fyi_runs.json():
logging.error("Received empty JSON")
return None
except:
logging.error("Failed to parse JSON, content=%s" % fyi_runs.content)
return None
# At this point we should have a response in |fyi_runs|, which is a list of
# dicts. Each dict contains info about a single run, one for each browser.
recent_browsers = []
for run in fyi_runs.json():
logging.debug("Processing run %s" % json.dumps(run))
run_end_time = date_parser.parse(run["time_end"])
run_age_seconds = (datetime.now(tz=timezone.utc) - run_end_time).total_seconds()
if run_age_seconds <= max_age_seconds:
recent_browsers.append(run["browser_name"])
logging.info("Finished processing runs, recent browser runs=%s" % recent_browsers)
return recent_browsers
def get_recent_stable_runs():
logging.info("Finding recent stable runs")
recent_browsers = _get_recent_runs(RECENT_STABLE_RUNS_FETCH_URL, RUNS_MAX_AGE_SECONDS)
if recent_browsers is None:
return "Failed to get stable runs"
try:
create_metric_recent_stable_runs()
for browser_name in recent_browsers:
write_metric_recent_stable_runs(browser_name, 1)
except g_exceptions.InvalidArgument as e:
logging.error("Failed to update stable run metrics skipping: %s" % e)
return "Recent stable runs: %s" % recent_browsers
def get_recent_experimental_runs():
logging.info("Finding recent experimental runs")
recent_browsers = _get_recent_runs(RECENT_EXPERIMENTAL_RUNS_FETCH_URL, RUNS_MAX_AGE_SECONDS)
if recent_browsers is None:
return "Failed to get experimental runs"
try:
create_metric_recent_experimental_runs()
for browser_name in recent_browsers:
write_metric_recent_experimental_runs(browser_name, 1)
except g_exceptions.InvalidArgument as e:
logging.error("Failed to update experimental run metrics skipping: %s" % e)
return "Recent experimental runs: %s" % recent_browsers
def get_open_pr_statuses(mon_state):
logging.info("Finding statuses for open PRs")
gh_token = _get_github_token()
if not gh_token:
return "Failed to load github token"
github = Github(gh_token)
wpt_repo = github.get_repo("web-platform-tests/wpt")
# NOTE: mon_state is redundant here since we set and use it in the same
# function. This is a throwback to when counting PRs and statuses were in
# two methods. Leaving it here for now since this might be split again later.
mon_state.github_open_prs = wpt_repo.get_pulls(state="open", base="master",
sort="updated", direction="desc")
# We require that open PRs have already been identified.
# TODO: make this capable of rebuilding open PRs list if needed
assert mon_state.github_open_prs is not None
num_open_prs = 0
# Status types is a 2-level dict mapping context->state->count, making it a
# dict(str->dict(str->int))
status_dict = defaultdict(lambda: defaultdict(int))
# Keep track of the PR numbers that have no checks.
prs_with_no_checks = []
draft_prs_with_no_checks = []
for open_pr in mon_state.github_open_prs:
# The timestamps reported by github are in UTC, so add the tzinfo.
pr_update_time_utc = open_pr.updated_at.replace(tzinfo=timezone.utc)
pr_age_seconds = (datetime.now(tz=timezone.utc) - pr_update_time_utc).total_seconds()
if pr_age_seconds >= OPEN_PRS_MAX_AGE_SECONDS:
logging.info("PR %d last updated on %s which exceeds the max, skipping the rest"
% (open_pr.number, open_pr.updated_at.strftime("%c%Z")))
break
num_open_prs += 1
# Commits are listed in order of increasing timestamp, meaning the last one
# is the most-recent one.
latest_commit = open_pr.get_commits()[open_pr.commits - 1]
# We try to dedupe the statuses for a given commit. This API seems to list
# the history of statuses for this commit, so you often see a "pending" check
# followed by that same check in a "success" state. We only want to keep the
# success in that example.
# Statuses are listed in reverse-chronological order, with the latest at the
# top of the list, so we traverse backwards to follow the timeline of events
# and overwrite status for any check that repeats.
deduped_dict = defaultdict(str)
for commit_status in latest_commit.get_statuses().reversed:
deduped_dict[commit_status.context] = commit_status.state
logging.info("PR %d has check status %s" % (open_pr.number, deduped_dict))
if not deduped_dict:
# Empty dict means there are no checks on this PR
if open_pr.draft:
draft_prs_with_no_checks.append(open_pr.number)
else:
prs_with_no_checks.append(open_pr.number)
# Add the deduped statuses for this PR to the combined list
for context, state in deduped_dict.items():
status_dict[context][state] += 1
try:
create_and_write_metric_open_prs(num_open_prs)
except g_exceptions.InvalidArgument as e:
logging.error("Failed to update open PRs metric, skipping: %s" % e)
try:
create_and_write_metric_open_pr_no_checks(len(prs_with_no_checks), is_draft=False)
create_and_write_metric_open_pr_no_checks(len(draft_prs_with_no_checks), is_draft=True)
except g_exceptions.InvalidArgument as e:
logging.error("Failed to PRs-with-no-checks metric, skipping: %s" % e)
create_metric_open_pr_statuses()
for context, state_dict in status_dict.items():
for state, count in state_dict.items():
try:
write_metric_open_pr_statuses(context, state, count)
except g_exceptions.InvalidArgument as e:
logging.error("Failed to PR status metric, skipping: %s" % e)
logging.info("Finished counting open PRs: %d" % num_open_prs)
logging.info("Got combined PR statuses, dict=%s", status_dict)
logging.info("PRs with no checks, %s", prs_with_no_checks)
logging.info("Draft PRs with no checks, %s", draft_prs_with_no_checks)
return "Open PRs: %d<br>\nOpen PR statuses: %s<br>\nOpen PRs with no checks: [%d] %s Drafts: [%d] %s" \
% (num_open_prs, status_dict, len(prs_with_no_checks), prs_with_no_checks,
len(draft_prs_with_no_checks), draft_prs_with_no_checks)
@app.route("/")
def get_all_metrics():
mon_state = MonitorState()
result = get_recent_stable_runs()
result += "<br>\n" + get_recent_experimental_runs()
result += "<br>\n" + get_open_pr_statuses(mon_state)
return result
def main(argv):
# This method is only used when running locally
if len(argv) > 2:
raise app.UsageError("Too many command-line arguments.")
if "localhost" in argv:
app.run(host='127.0.0.1', port=8080, debug=True)
return
result = get_all_metrics()
print("Finished main, result below\n%s" % result)
if __name__ == '__main__':
main(sys.argv[1:])