[ci] Comment runtime changes on PRs

This implements apache#12781 but needs to wait for apache#12695 before it can merge
driazati · Sep 16, 2022 · a379ebb · a379ebb
1 parent e037ae4
commit a379ebb
Show file tree

Hide file tree

Showing 6 changed files with 197 additions and 11 deletions.
diff --git a/.github/workflows/pr_comment_bot.yml b/.github/workflows/pr_comment_bot.yml
@@ -31,6 +31,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2
+      - name: Setup
+        run: |
+          set -eux
+          python3 -m pip install scipy
       - name: Comment bot comment (pr)
         if: ${{ github.event.number }}
         env:

diff --git a/ci/scripts/__init__.py b/ci/scripts/__init__.py
@@ -16,4 +16,10 @@
 # under the License.
 """Package to enable testing of CI scripts"""
 
-from . import github_skipped_tests_comment, github_pr_comment, github_tag_teams, github_docs_comment
+from . import (
+    github_skipped_tests_comment,
+    github_pr_comment,
+    github_tag_teams,
+    github_docs_comment,
+    github_ci_runtime_bot,
+)
diff --git a/ci/scripts/git_utils.py b/ci/scripts/git_utils.py
@@ -34,24 +34,35 @@ def compress_query(query: str) -> str:
     return query
 
 
-def post(url: str, body: Optional[Any] = None, auth: Optional[Tuple[str, str]] = None):
-    logging.info(f"Requesting POST to", url, "with", body)
+def _request(
+    url: str, method: str, body: Optional[Any] = None, auth: Optional[Tuple[str, str]] = None
+):
+    print(f"Requesting {method} to", url, "with", body)
     headers = {}
-    req = request.Request(url, headers=headers, method="POST")
+    req = request.Request(url, headers=headers, method=method)
     if auth is not None:
         auth_str = base64.b64encode(f"{auth[0]}:{auth[1]}".encode())
         req.add_header("Authorization", f"Basic {auth_str.decode()}")
 
     if body is None:
-        body = ""
+        with request.urlopen(req) as response:
+            return response.read()
+    else:
+        req.add_header("Content-Type", "application/json; charset=utf-8")
+        data = json.dumps(body)
+        data = data.encode("utf-8")
+        req.add_header("Content-Length", len(data))
+
+        with request.urlopen(req, data) as response:
+            return response.read()
+
+
+def post(url: str, body: Optional[Any] = None, auth: Optional[Tuple[str, str]] = None):
+    return _request(url=url, method="POST", body=body, auth=auth)
 
-    req.add_header("Content-Type", "application/json; charset=utf-8")
-    data = json.dumps(body)
-    data = data.encode("utf-8")
-    req.add_header("Content-Length", len(data))
 
-    with request.urlopen(req, data) as response:
-        return response.read()
+def get(url: str):
+    return _request(url=url, method="GET")
 
 
 def dry_run_token(is_dry_run: bool) -> Any:

diff --git a/ci/scripts/github_ci_runtime_bot.py b/ci/scripts/github_ci_runtime_bot.py
@@ -0,0 +1,85 @@
+import json
+import statistics
+from typing import List, Dict, Any
+from git_utils import get
+import scipy.stats
+import re
+import logging
+
+
+MAIN_INFO_URL = "https://ci.tlcpack.ai/blue/rest/organizations/jenkins/pipelines/tvm/runs/?branch=main&start=0&limit=26"
+
+
+def find_target_url(pr_head: Dict[str, Any]) -> str:
+    for status in pr_head["statusCheckRollup"]["contexts"]["nodes"]:
+        if status.get("context", "") == "tvm-ci/pr-head":
+            return status["targetUrl"]
+
+    raise RuntimeError(f"Unable to find tvm-ci/pr-head status in {pr_head}")
+
+
+def fetch_past_build_times_s() -> List[float]:
+    data = get(MAIN_INFO_URL).decode()
+    data = json.loads(data)
+    build_times_s = []
+    logging.info(f"Fetched {len(data)} builds from main")
+    for item in data:
+        # Only look at completed builds
+        if not can_use_build(item):
+            logging.info("Skipping failed build")
+            continue
+
+        duration = item["durationInMillis"]
+        build_times_s.append(duration / 1000.0)
+
+    return build_times_s
+
+
+def can_use_build(build: Dict[str, Any]):
+    return build["state"] == "FINISHED" and build["result"] == "SUCCESS"
+
+
+def fetch_build_time_s(branch: str, build: str) -> float:
+    build = int(build)
+    info_url = f"https://ci.tlcpack.ai/blue/rest/organizations/jenkins/pipelines/tvm/runs/?branch={branch}&start=0&limit=25"
+    data = get(info_url).decode()
+    data = json.loads(data)
+
+    for item in data:
+        if item["id"] == str(build):
+            if can_use_build(item):
+                return item["durationInMillis"] / 1000.0
+            else:
+                raise RuntimeError(
+                    f"Found build for {branch} with {build} but cannot use it: {item}"
+                )
+
+    raise RuntimeError(f"Unable to find branch {branch} with {build} in {data}")
+
+
+def ci_runtime_comment(pr: Dict[str, Any]) -> str:
+    pr_head = pr["commits"]["nodes"][0]["commit"]
+    target_url = find_target_url(pr_head)
+    logging.info(f"Got target url {target_url}")
+    m = re.search(r"/job/(PR-\d+)/(\d+)", target_url)
+    branch, build = m.groups()
+
+    logging.info(f"Calculating CI runtime for {branch} with {build}")
+    main_build_times_s = fetch_past_build_times_s()
+    if len(main_build_times_s) == 0:
+        logging.info("Found no usable builds on main, quitting")
+        return None
+    x = statistics.mean(main_build_times_s)
+    logging.info(f"Sample mean from main: {x}")
+    current_build_time_s = fetch_build_time_s(branch=branch, build=build)
+    build_url = (
+        f"https://ci.tlcpack.ai/blue/organizations/jenkins/tvm/detail/{branch}/{build}/pipeline"
+    )
+    res = scipy.stats.ttest_1samp(main_build_times_s, current_build_time_s)
+    logging.info(f"t-stats: {res}")
+    change = -(x - current_build_time_s) / x * 100.0
+    change = round(change, 2)
+    if res.pvalue < 0.05:
+        return f"This PR **significantly changed [CI runtime]({build_url}): {change}%**"
+    else:
+        return f"This PR had no significant effect on [CI runtime]({build_url}): {change}%"
diff --git a/ci/scripts/github_pr_comment.py b/ci/scripts/github_pr_comment.py
@@ -25,6 +25,7 @@
 from github_skipped_tests_comment import get_skipped_tests_comment
 from github_tag_teams import get_tags
 from github_docs_comment import get_doc_url
+from github_ci_runtime_bot import ci_runtime_comment
 
 PR_QUERY = """
     query ($owner: String!, $name: String!, $number: Int!) {
@@ -128,14 +129,17 @@
         skipped_tests = test_comments["skipped-tests"]
         ccs = test_comments["ccs"]
         docs_info = test_comments["docs"]
+        ci_runtime = test_comments["ci_runtime"]
     else:
         skipped_tests = get_skipped_tests_comment(pr_data, github=github)
         ccs = get_tags(pr_data, github, team_issue=10317)
         docs_info = get_doc_url(pr_data)
+        ci_runtime = ci_runtime_comment(pr_data)
 
     items = {
         "ccs": ccs,
         "skipped-tests": skipped_tests,
         "docs": docs_info,
+        "ci_runtime": ci_runtime,
     }
     commenter.post_items(items=items.items())
diff --git a/tests/python/ci/test_ci.py b/tests/python/ci/test_ci.py
@@ -22,6 +22,7 @@
 import sys
 import logging
 from pathlib import Path
+import unittest.mock
 
 import pytest
 import tvm.testing
@@ -213,6 +214,80 @@ def write_xml_file(root_dir, xml_file, xml_content):
     assert_in(f"with target {target_url}", caplog.text)
 
 
+@tvm.testing.skip_if_wheel_test
+@parameterize_named(
+    significant=dict(
+        build_time=12,
+        main_times=[("SUCCESS", 123), ("SUCCESS", 123), ("SUCCESS", 123)],
+        expected_comment="This PR **significantly changed",
+    ),
+    insignificant=dict(
+        build_time=124,
+        main_times=[("SUCCESS", 123), ("SUCCESS", 120), ("SUCCESS", 125)],
+        expected_comment="This PR had no significant effect",
+    ),
+    main_failures=dict(
+        build_time=124,
+        main_times=[("FAILED", 123), ("FAILED", 120), ("FAILED", 125)],
+        expected_comment=None,
+    ),
+)
+def test_ci_runtime_comment(caplog, build_time, main_times, expected_comment):
+    """
+    Test the CI runtime commenter bot
+    """
+    build_id = 1
+    target_url = f"https://ci.tlcpack.ai/job/tvm/job/PR-12824/{build_id}/display/redirect"
+    commit = {
+        "statusCheckRollup": {
+            "contexts": {
+                "nodes": [
+                    {
+                        "context": "tvm-ci/pr-head",
+                        "targetUrl": target_url,
+                    }
+                ]
+            }
+        }
+    }
+    pr_data = {"commits": {"nodes": [{"commit": commit}]}}
+    main_build_data = [
+        {
+            "state": "FINISHED",
+            "result": result,
+            "durationInMillis": time,
+        }
+        for result, time in main_times
+    ]
+    pr_build_data = [
+        {
+            "id": str(build_id),
+            "state": "FINISHED",
+            "result": "SUCCESS",
+            "durationInMillis": build_time,
+        },
+    ]
+    jenkins_blue_base = "https://ci.tlcpack.ai/blue/rest/organizations/jenkins/pipelines/tvm/runs/?"
+    mock_data = {
+        jenkins_blue_base + "branch=main&start=0&limit=26": main_build_data,
+        jenkins_blue_base + "branch=PR-12824&start=0&limit=25": pr_build_data,
+    }
+
+    def mock_get(x):
+        return json.dumps(mock_data[x]).encode()
+
+    with caplog.at_level(logging.INFO):
+        with unittest.mock.patch(
+            "scripts.github_ci_runtime_bot.get", new_callable=lambda: mock_get
+        ):
+            comment = scripts.github_ci_runtime_bot.ci_runtime_comment(pr_data)
+
+    if expected_comment is None:
+        assert comment is None
+    else:
+        assert_in(expected_comment, comment)
+
+
 @tvm.testing.skip_if_wheel_test
 @parameterize_named(
     doc_link=dict(
@@ -550,6 +625,7 @@ def test_pr_comment(tmpdir_factory, pr_author, comments, expected):
         "ccs": "the cc",
         "docs": "the docs",
         "skipped-tests": "the skipped tests",
+        "ci_runtime": "the ci runtime",
     }
     proc = run_script(
         [