diff --git a/README.md b/README.md index 540ebdda..43b3ca21 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,11 @@ indice= username= password= +[.crucible] +url= +username= +password= + [ocp-server] port=8000 @@ -53,7 +58,7 @@ indice= username= password= ``` -**Note: The below applies only for the elastic search at the moment** +**Note: The below applies only for the elastic search at the moment** If you also have an archived internal instance that keeps track of older data, it can be specified with '.internal' suffix. Example of our `OCP` internal archived instance's configuration. ```toml [ocp.elasticsearch.internal] diff --git a/backend/app/api/api.py b/backend/app/api/api.py index b74b8ad4..c37735dc 100644 --- a/backend/app/api/api.py +++ b/backend/app/api/api.py @@ -1,3 +1,4 @@ +import sys from fastapi import APIRouter from app.api.v1.endpoints.ocp import results @@ -11,6 +12,7 @@ from app.api.v1.endpoints.telco import telcoJobs from app.api.v1.endpoints.telco import telcoGraphs from app.api.v1.endpoints.ocm import ocmJobs +from app.api.v1.endpoints.ilab import ilab router = APIRouter() @@ -39,3 +41,6 @@ # OCM endpoint router.include_router(ocmJobs.router, tags=['ocm']) + +# InstructLab endpoint +router.include_router(router=ilab.router, tags=['ilab']) diff --git a/backend/app/api/v1/endpoints/ilab/ilab.py b/backend/app/api/v1/endpoints/ilab/ilab.py new file mode 100644 index 00000000..b30f0e6a --- /dev/null +++ b/backend/app/api/v1/endpoints/ilab/ilab.py @@ -0,0 +1,857 @@ +"""Access RHEL AI InstructLab performance data through Crucible + +This defines an API to expose and filter performance data from InstructLab +CPT runs via a persistent Crucuble controller instance as defined in the +configuration path "ilab.crucible". +""" + +from datetime import datetime, timedelta, timezone +from typing import Annotated, Any, Optional + +from fastapi import APIRouter, Depends, Query + +from app.services.crucible_svc import CrucibleService, GraphList, Metric + +router = APIRouter() + + +CONFIGPATH = "ilab.crucible" + + +def example_response(response) -> dict[str, Any]: + return {"content": {"application/json": {"example": response}}} + + +def example_error(message: str) -> dict[str, Any]: + return example_response({"message": message}) + + +async def crucible_svc(): + crucible = None + try: + crucible = CrucibleService(CONFIGPATH) + yield crucible + finally: + if crucible: + await crucible.close() + + +@router.get( + "/api/v1/ilab/runs/filters", + summary="Returns possible filters", + description=( + "Returns a nested JSON object with all parameter and tag filter terms" + ), + responses={ + 200: example_response( + { + "param": { + "model": [ + "/home/models/granite-7b-redhat-lab", + "/home/models/granite-7b-lab/", + "/home/models/Mixtral-8x7B-Instruct-v0.1", + ], + "gpus": ["4"], + "workflow": ["train", "sdg", "train+eval"], + "data-path": [ + "/home/data/training/jun12-phase05.jsonl", + "/home/data/training/knowledge_data.jsonl", + "/home/data/training/jul19-knowledge-26k.jsonl", + "/home/data/jun12-phase05.jsonl", + ], + "nnodes": ["1"], + "train-until": ["checkpoint:1", "complete"], + "save-samples": ["5000", "2500", "10000"], + "deepspeed-cpu-offload-optimizer": ["0", "1"], + "deepspeed-cpu-offload-optimizer-pin-memory": ["0", "1"], + "batch-size": ["4", "8", "16", "12", "0"], + "cpu-offload-optimizer": ["1"], + "cpu-offload-pin-memory": ["1"], + "nproc-per-node": ["4"], + "num-runavg-samples": ["2", "6"], + "num-cpus": ["30"], + }, + "tag": {"topology": ["none"]}, + } + ) + }, +) +async def run_filters(crucible: Annotated[CrucibleService, Depends(crucible_svc)]): + return await crucible.get_run_filters() + + +@router.get( + "/api/v1/ilab/runs", + summary="Returns a list of InstructLab runs", + description="Returns a list of runs summary documents.", + responses={ + 200: example_response( + { + "results": [ + { + "benchmark": "ilab", + "email": "rhel-ai-user@example.com", + "id": "bd72561c-cc20-400b-b6f6-d9534a60033a", + "name": '"RHEL-AI User"', + "source": "n42-h01-b01-mx750c.example.com//var/lib/crucible/run/ilab--2024-09-11_19:43:53_UTC--bd72561c-cc20-400b-b6f6-d9534a60033a", + "status": "pass", + "begin_date": "1970-01-01 00:00:00+00:00", + "end_date": "1970-01-01 00:00:00+00:00", + "params": { + "gpus": "4", + "model": "/home/models/Mixtral-8x7B-Instruct-v0.1", + "workflow": "sdg", + }, + "iterations": [ + { + "iteration": 1, + "primary_metric": "ilab::sdg-samples-sec", + "primary_period": "measurement", + "status": "pass", + "params": { + "gpus": "4", + "model": "/home/models/Mixtral-8x7B-Instruct-v0.1", + "workflow": "sdg", + }, + } + ], + "primary_metrics": ["ilab::sdg-samples-sec"], + "tags": {"topology": "none"}, + } + ], + "count": 5, + "total": 21, + "startDate": "2024-08-19 20:42:52.239000+00:00", + "endDate": "2024-09-18 20:42:52.239000+00:00", + } + ), + 400: example_error( + "sort key 'bad' must be one of begin,benchmark,email,end,id,name,source,status" + ), + 422: example_error( + "invalid date format, start_date must be less than end_date" + ), + }, +) +async def runs( + crucible: Annotated[CrucibleService, Depends(crucible_svc)], + start_date: Annotated[ + Optional[str], + Query(description="Start time for search", examples=["2020-11-10"]), + ] = None, + end_date: Annotated[ + Optional[str], + Query(description="End time for search", examples=["2020-11-10"]), + ] = None, + filter: Annotated[ + Optional[list[str]], + Query( + description="Filter terms", examples=["tag:name=value", "param:name=value"] + ), + ] = None, + sort: Annotated[ + Optional[list[str]], + Query(description="Sort terms", examples=["start:asc", "status:desc"]), + ] = None, + size: Annotated[ + Optional[int], Query(description="Number of runs in a page", examples=[10]) + ] = None, + offset: Annotated[ + int, + Query(description="Page offset to start", examples=[10]), + ] = 0, +): + if start_date is None and end_date is None: + now = datetime.now(timezone.utc) + start = now - timedelta(days=30) + end = now + else: + start = start_date + end = end_date + return await crucible.get_runs( + start=start, end=end, filter=filter, sort=sort, size=size, offset=offset + ) + + +@router.get( + "/api/v1/ilab/runs/{run}/tags", + summary="Returns the Crucible tags for a run", + description="Returns tags for a specified Run ID.", + responses={ + 200: example_response({"topology": "none"}), + 400: example_error("Parameter error"), + }, +) +async def tags(crucible: Annotated[CrucibleService, Depends(crucible_svc)], run: str): + return await crucible.get_tags(run) + + +@router.get( + "/api/v1/ilab/runs/{run}/params", + summary="Returns the InstructLab parameters for a run", + description="Returns params for a specified Run ID by iteration plus common params.", + responses={ + 200: example_response( + { + "9D5AB7D6-510A-11EF-84ED-CCA69E6B5B5B": { + "num-runavg-samples": "2", + "cpu-offload-pin-memory": "1", + "nnodes": "1", + "cpu-offload-optimizer": "1", + "data-path": "/home/data/training/knowledge_data.jsonl", + "model": "/home/models/granite-7b-lab/", + "nproc-per-node": "4", + }, + "common": { + "num-runavg-samples": "2", + "cpu-offload-pin-memory": "1", + "nnodes": "1", + "cpu-offload-optimizer": "1", + "data-path": "/home/data/training/knowledge_data.jsonl", + "model": "/home/models/granite-7b-lab/", + "nproc-per-node": "4", + }, + } + ), + 400: example_error("Parameter error"), + }, +) +async def params(crucible: Annotated[CrucibleService, Depends(crucible_svc)], run: str): + return await crucible.get_params(run) + + +@router.get( + "/api/v1/ilab/runs/{run}/iterations", + summary="Returns a list of InstructLab run iterations", + description="Returns a list of iterations for a specified Run ID.", + responses={ + 200: example_response( + [ + { + "id": "6B98F650-7139-11EF-BB69-98B53E962BD1", + "num": 2, + "path": None, + "primary-metric": "ilab::sdg-samples-sec", + "primary-period": "measurement", + "status": "pass", + }, + { + "id": "6B99173E-7139-11EF-9434-F8BB3B1B9CFC", + "num": 5, + "path": None, + "primary-metric": "ilab::sdg-samples-sec", + "primary-period": "measurement", + "status": "pass", + }, + ] + ), + 400: example_error("Parameter error"), + }, +) +async def iterations( + crucible: Annotated[CrucibleService, Depends(crucible_svc)], run: str +) -> list[dict[str, Any]]: + return await crucible.get_iterations(run) + + +@router.get( + "/api/v1/ilab/runs/{run}/samples", + summary="Returns a list of InstructLab run samples", + description="Returns a list of samples for a specified Run ID.", + responses={ + 200: example_response( + [ + { + "id": "6BBE6872-7139-11EF-BFAA-8569A9399D61", + "num": "1", + "path": None, + "status": "pass", + "iteration": 5, + "primary_metric": "ilab::sdg-samples-sec", + }, + { + "id": "6BACDFA8-7139-11EF-9F33-8185DD5B4869", + "num": "1", + "path": None, + "status": "pass", + "iteration": 2, + "primary_metric": "ilab::sdg-samples-sec", + }, + ] + ), + 400: example_error("Parameter error"), + }, +) +async def run_samples( + crucible: Annotated[CrucibleService, Depends(crucible_svc)], run: str +) -> list[dict[str, Any]]: + return await crucible.get_samples(run) + + +@router.get( + "/api/v1/ilab/runs/{run}/periods", + summary="Returns a list of InstructLab run periods", + description="Returns a list of periods for a specified Run ID.", + responses={ + 200: example_response( + [ + { + "begin": "2024-10-29 14:30:56.723000+00:00", + "end": "2024-10-29 14:35:18.939000+00:00", + "id": "DDBF3584-9603-11EF-8B3C-BAA807DC31B7", + "name": "measurement", + "iteration": 1, + "sample": "1", + "is_primary": True, + "primary_metric": "ilab::actual-train-seconds", + "status": "pass", + }, + { + "begin": "1970-01-01 00:00:00+00:00", + "end": "1970-01-01 00:00:00+00:00", + "id": "DDB7FC92-9603-11EF-8FD6-97CFCD234564", + "name": "phase1", + "iteration": 1, + "sample": "1", + "is_primary": False, + "status": "pass", + }, + { + "begin": "1970-01-01 00:00:00+00:00", + "end": "1970-01-01 00:00:00+00:00", + "id": "DDBBB5B2-9603-11EF-A19F-824975057E5B", + "name": "phase2", + "iteration": 1, + "sample": "1", + "is_primary": False, + "status": "pass", + }, + ] + ), + 400: example_error("Parameter error"), + }, +) +async def run_periods( + crucible: Annotated[CrucibleService, Depends(crucible_svc)], run: str +) -> list[dict[str, Any]]: + return await crucible.get_periods(run) + + +@router.get( + "/api/v1/ilab/iterations/{iteration}/samples", + summary="Returns a list of InstructLab iteration samples", + description="Returns a list of iterations for a specified iteration ID.", + responses={ + 200: example_response( + [ + { + "id": "DDB759EA-9603-11EF-A714-9033EEFCCE93", + "num": "1", + "path": None, + "status": "pass", + "iteration": 1, + "primary_metric": "ilab::actual-train-seconds", + "primary_period": "measurement", + } + ] + ), + 400: example_error("Parameter error"), + }, +) +async def iteration_samples( + crucible: Annotated[CrucibleService, Depends(crucible_svc)], iteration: str +) -> list[dict[str, Any]]: + return await crucible.get_samples(iteration=iteration) + + +@router.get( + "/api/v1/ilab/runs/{run}/timeline", + summary="Returns the 'timeline' of a run", + description="Describes the sequence of iterations, samples, and periods.", + responses={ + 200: example_response( + { + "run": { + "id": "70d3b53f-c588-49a3-91c2-7fcf3927be7e", + "iterations": [ + { + "id": "BFC16DA6-60C8-11EF-AB10-CF940109872B", + "num": 1, + "path": None, + "primary-metric": "ilab::train-samples-sec", + "primary-period": "measurement", + "status": "pass", + "samples": [ + { + "id": "C021BECC-60C8-11EF-A619-E0BC70D6C320", + "num": "1", + "path": None, + "status": "pass", + "periods": [ + { + "begin": "2024-08-22 19:09:08.642000+00:00", + "end": "2024-08-22 20:04:32.889000+00:00", + "id": "C022CDC6-60C8-11EF-BA80-AFE7B4B2692B", + "name": "measurement", + } + ], + } + ], + } + ], + "begin": "2024-08-22 19:09:08.642000+00:00", + "end": "2024-08-22 20:04:32.889000+00:00", + } + } + ), + 400: example_error("Parameter error"), + }, +) +async def timeline( + crucible: Annotated[CrucibleService, Depends(crucible_svc)], run: str +) -> dict[str, Any]: + return await crucible.get_timeline(run) + + +@router.get( + "/api/v1/ilab/runs/{run}/metrics", + summary="Describe the metrics collected for a run", + description="Returns metric labels along with breakout names and values.", + responses={ + 200: example_response( + { + "sar-net::packets-sec": { + "periods": [], + "breakouts": { + "benchmark-name": ["none"], + "benchmark-role": ["none"], + "csid": ["remotehosts-1-sysstat-1"], + "cstype": ["profiler"], + "dev": ["lo", "eno8303", "eno12399", "eno12409"], + "direction": ["rx", "tx"], + "endpoint-label": ["remotehosts-1"], + "engine-id": ["remotehosts-1-sysstat-1"], + "engine-role": ["profiler"], + "engine-type": ["profiler"], + "hosted-by": ["x.example.com"], + "hostname": ["x.example.com"], + "hypervisor-host": ["none"], + "osruntime": ["podman"], + "tool-name": ["sysstat"], + "type": ["virtual", "physical"], + "userenv": ["rhel-ai"], + }, + }, + }, + ), + 400: example_error("Parameter error"), + }, +) +async def metrics( + crucible: Annotated[CrucibleService, Depends(crucible_svc)], run: str +): + return await crucible.get_metrics_list(run) + + +@router.get( + "/api/v1/ilab/runs/{run}/breakouts/{metric}", + summary="Returns breakout options for a metric", + description="Describes the breakout names and available values for a run.", + responses={ + 200: example_response( + { + "label": "mpstat::Busy-CPU", + "class": ["throughput"], + "type": "Busy-CPU", + "source": "mpstat", + "breakouts": {"num": ["8", "72"], "thread": [0, 1]}, + } + ), + 400: example_error("Metric name not found for run "), + }, +) +async def metric_breakouts( + crucible: Annotated[CrucibleService, Depends(crucible_svc)], + run: str, + metric: str, + name: Annotated[ + Optional[list[str]], + Query( + description="List of name[=key] to match", + examples=["cpu=10", "cpu=10,cpu=110"], + ), + ] = None, + period: Annotated[ + Optional[list[str]], + Query( + description="List of periods to match", + examples=["", ","], + ), + ] = None, +) -> dict[str, Any]: + return await crucible.get_metric_breakouts(run, metric, names=name, periods=period) + + +@router.get( + "/api/v1/ilab/runs/{run}/data/{metric}", + summary="Returns metric data collected for a run", + description="Returns data collected for a specified Run ID metric.", + responses={ + 200: example_response( + [ + { + "begin": "2024-08-22 20:04:05.072000+00:00", + "end": "2024-08-22 20:04:19.126000+00:00", + "duration": 14.055, + "value": 9.389257233311497, + }, + { + "begin": "2024-08-22 20:04:19.127000+00:00", + "end": "2024-08-22 20:04:32.889000+00:00", + "duration": 13.763, + "value": 9.552584444155011, + }, + ] + ), + 400: example_error("No matches for ilab::train-samples-sc+cpu=10"), + 422: example_response( + response={ + "detail": [ + { + "message": "More than one metric (2) probably means you should add filters", + "names": {"dev": ["sdb", "sdb3"]}, + "periods": [], + } + ] + } + ), + }, +) +async def metric_data( + crucible: Annotated[CrucibleService, Depends(crucible_svc)], + run: str, + metric: str, + name: Annotated[ + Optional[list[str]], + Query( + description="List of name[=key] to match", + examples=["cpu=10", "cpu=10,cpu=110"], + ), + ] = None, + period: Annotated[ + Optional[list[str]], + Query( + description="List of periods to match", + examples=["", ","], + ), + ] = None, + aggregate: Annotated[ + bool, Query(description="Allow aggregation of metrics") + ] = False, +) -> list[dict[str, Any]]: + return await crucible.get_metrics_data( + run, metric, names=name, periods=period, aggregate=aggregate + ) + + +@router.post( + "/api/v1/ilab/runs/multisummary", + summary="Returns metric data summaries", + description="Returns a statistical summary of metric data", + responses={ + 200: example_response( + [ + { + "count": 1625, + "min": 0.0, + "max": 375.4, + "avg": 2.364492307692308, + "sum": 3842.3, + "sum_of_squares": 773029.4976, + "variance": 470.11963618840235, + "variance_population": 470.11963618840235, + "variance_sampling": 470.4091187230011, + "std_deviation": 21.68224241605103, + "std_deviation_population": 21.68224241605103, + "std_deviation_sampling": 21.68891695597088, + "std_deviation_bounds": { + "upper": 45.72897713979437, + "lower": -40.99999252440975, + "upper_population": 45.72897713979437, + "lower_population": -40.99999252440975, + "upper_sampling": 45.742326219634066, + "lower_sampling": -41.01334160424945, + }, + "aggregate": True, + "metric": "iostat::operations-merged-sec", + "names": None, + "periods": None, + "run": "26ad48c1-fc9c-404d-bccf-d19755ca8a39", + "title": "iostat::operations-merged-sec {run 2}", + } + ] + ), + 400: example_error("No matches for ilab::train-samples-sc+cpu=10"), + 422: example_response( + response={ + "detail": [ + { + "message": "More than one metric (2) probably means you should add filters", + "names": {"dev": ["sdb", "sdb3"]}, + "periods": [], + } + ] + } + ), + }, +) +async def metric_summary_body( + crucible: Annotated[CrucibleService, Depends(crucible_svc)], + summaries: list[Metric], +) -> list[dict[str, Any]]: + return await crucible.get_metrics_summary(summaries) + + +@router.get( + "/api/v1/ilab/runs/{run}/summary/{metric}", + summary="Returns metric data collected for a run", + description="Returns data collected for a specified Run ID metric.", + responses={ + 200: example_response( + { + "count": 1625, + "min": 0.0, + "max": 375.4, + "avg": 2.364492307692308, + "sum": 3842.3, + "sum_of_squares": 773029.4976, + "variance": 470.11963618840235, + "variance_population": 470.11963618840235, + "variance_sampling": 470.4091187230011, + "std_deviation": 21.68224241605103, + "std_deviation_population": 21.68224241605103, + "std_deviation_sampling": 21.68891695597088, + "std_deviation_bounds": { + "upper": 45.72897713979437, + "lower": -40.99999252440975, + "upper_population": 45.72897713979437, + "lower_population": -40.99999252440975, + "upper_sampling": 45.742326219634066, + "lower_sampling": -41.01334160424945, + }, + "aggregate": True, + "metric": "iostat::operations-merged-sec", + "names": None, + "periods": None, + "run": "26ad48c1-fc9c-404d-bccf-d19755ca8a39", + "title": "iostat::operations-merged-sec {run 2}", + } + ), + 400: example_error("No matches for ilab::train-samples-sc+cpu=10"), + 422: example_response( + response={ + "detail": [ + { + "message": "More than one metric (2) probably means you should add filters", + "names": {"dev": ["sdb", "sdb3"]}, + "periods": [], + } + ] + } + ), + }, +) +async def metric_summary_param( + crucible: Annotated[CrucibleService, Depends(crucible_svc)], + run: str, + metric: str, + name: Annotated[ + Optional[list[str]], + Query( + description="List of name[=key] to match", + examples=["cpu=10", "cpu=10,cpu=110"], + ), + ] = None, + period: Annotated[ + Optional[list[str]], + Query( + description="List of periods to match", + examples=["", ","], + ), + ] = None, + aggregate: Annotated[ + bool, Query(description="Allow aggregation of metrics") + ] = False, +) -> dict[str, Any]: + result = await crucible.get_metrics_summary( + [ + Metric( + run=run, metric=metric, aggregate=aggregate, names=name, periods=period + ) + ] + ) + return result[0] if isinstance(result, list) and len(result) == 1 else result + + +@router.post( + "/api/v1/ilab/runs/multigraph", + summary="Returns overlaid Plotly graph objects", + description="Returns metric data in a form usable by the Plot React component.", + responses={ + 200: example_response( + response={ + "data": [ + { + "x": [ + "2024-09-05 21:50:07+00:00", + "2024-09-05 21:56:37+00:00", + "2024-09-05 21:56:37.001000+00:00", + "2024-09-05 21:56:52+00:00", + "2024-09-05 21:56:52.001000+00:00", + "2024-09-05 22:01:52+00:00", + ], + "y": [0.0, 0.0, 0.33, 0.33, 0.0, 0.0], + "name": "iostat::operations-merged-sec [cmd=read,dev=sdb]", + "type": "scatter", + "mode": "line", + "marker": {"color": "black"}, + "labels": {"x": "sample timestamp", "y": "samples / second"}, + "yaxis": "y", + }, + { + "x": [ + "2024-09-05 21:50:07+00:00", + "2024-09-05 21:56:37+00:00", + "2024-09-05 21:56:37.001000+00:00", + "2024-09-05 21:56:52+00:00", + "2024-09-05 21:56:52.001000+00:00", + "2024-09-05 22:01:52+00:00", + ], + "y": [0.0, 0.0, 0.33, 0.33, 0.0, 0.0], + "name": "iostat::operations-merged-sec [dev=sdb,cmd=read]", + "type": "scatter", + "mode": "line", + "marker": {"color": "purple"}, + "labels": {"x": "sample timestamp", "y": "samples / second"}, + "yaxis": "y", + }, + ], + "layout": { + "width": "1500", + "yaxis": { + "title": "iostat::operations-merged-sec", + "color": "black", + }, + }, + } + ), + 400: example_error("No matches for ilab::train-samples-sec"), + 422: example_response( + response={ + "detail": [ + { + "message": "More than one metric (2) probably means you should add filters", + "names": {"dev": ["sdb", "sdb3"]}, + "periods": [], + } + ] + } + ), + }, +) +async def metric_graph_body( + crucible: Annotated[CrucibleService, Depends(crucible_svc)], graphs: GraphList +): + return await crucible.get_metrics_graph(graphs) + + +@router.get( + "/api/v1/ilab/runs/{run}/graph/{metric}", + summary="Returns a single Plotly graph object for a run", + description="Returns metric data in a form usable by the Plot React component.", + responses={ + 200: example_response( + response={ + "data": [ + { + "x": [ + "2024-09-12 16:49:01+00:00", + "2024-09-12 18:04:31+00:00", + "2024-09-12 18:04:31.001000+00:00", + "2024-09-12 18:04:46+00:00", + "2024-09-12 18:04:46.001000+00:00", + "2024-09-12 18:53:16+00:00", + ], + "y": [0.0, 0.0, 1.4, 1.4, 0.0, 0.0], + "name": "iostat::operations-merged-sec [cmd=read,dev=sda]", + "type": "scatter", + "mode": "line", + "marker": {"color": "black"}, + "labels": { + "x": "sample timestamp", + "y": "samples / second", + }, + "yaxis": "y", + } + ], + "layout": { + "width": "1500", + "yaxis": { + "title": "iostat::operations-merged-sec", + "color": "black", + }, + }, + } + ), + 400: example_error("No matches for ilab::train-samples-sec"), + 422: example_response( + response={ + "detail": [ + { + "message": "More than one metric (2) probably means you should add filters", + "names": {"dev": ["sdb", "sdb3"]}, + "periods": [], + } + ] + } + ), + }, +) +async def metric_graph_param( + crucible: Annotated[CrucibleService, Depends(crucible_svc)], + run: str, + metric: str, + aggregate: Annotated[ + bool, Query(description="Allow aggregation of metrics") + ] = False, + name: Annotated[ + Optional[list[str]], + Query( + description="List of name[=key] to match", + examples=["cpu=10", "cpu=10,cpu=110"], + ), + ] = None, + period: Annotated[ + Optional[list[str]], + Query( + description="List of periods to match", + examples=["", ","], + ), + ] = None, + title: Annotated[Optional[str], Query(description="Title for graph")] = None, +): + return await crucible.get_metrics_graph( + GraphList( + name=metric, + graphs=[ + Metric( + run=run, + metric=metric, + aggregate=aggregate, + names=name, + periods=period, + title=title, + ) + ], + ) + ) diff --git a/backend/app/services/crucible_readme.md b/backend/app/services/crucible_readme.md new file mode 100644 index 00000000..0ac2e71f --- /dev/null +++ b/backend/app/services/crucible_readme.md @@ -0,0 +1,164 @@ +Crucible divides data across a set of OpenSearch (or ElasticSearch) indices, +each with a specific document mapping. CDM index names include a "root" name +(like "run") with a versioned prefix, like "cdmv7dev-run". + +Crucible timestamps are integers in "millisecond-from-the-epoch" format. + +The Crucible CDM hierarchy is roughly: + +- RUN (an instrumented benchmark run) + - TAG (metadata) + - ITERATION (a benchmark interval) + - PARAM (execution parameters) + - SAMPLE + - PERIOD (time range where data is recorded) + - METRIC_DESC (description of a specific recorded metric) + - METRIC_DATA (a specific recorded data point) + +OpenSearch doesn't support the concept of a SQL "join", but many of the indices +contain documents that could be considered a static "join" with parent documents +for convenience. For example, each `iteration` document contains a copy of it's +parent `run` document, while the `period` document contains copies of its parent +`sample`, `iteration`, and `run` documents. This means, for example, that it's +possible to make a single query returning all `period` documents for specific +iteration number of a specific run. + +
+
RUN
this contains the basic information about a performance run, including a + generated UUID, begin and end timestamps, a benchmark name, a user name and + email, the (host/directory) "source" of the indexed data (which is usable on + the controler's local file system), plus host and test harness names.
+
TAG
this contains information about a general purpose "tag" to associate some + arbitrary context with a run, for example software versions, hardware, or + other metadata. This can be considered a SQL JOIN with the run document, + adding a tag UUID, name, and value.
+
ITERATION
this contains basic information about a performance run iteration, + including the iteration UUID, number, the primary (benchmark) metric associated + with the iteration, plus the primary "period" of the iteration, and the + iteration status.
+
PARAM
this defines a key/value pair specifying behavior of the benchmark + script for an iteration. Parameters are iteration-specific, but parameters that + don't vary between iterations are often represented as run parameters.
+
SAMPLE
this contains basic information about a sample of an iteration, + including a sample UUID and sample number, along with a "path" for sample data + and a sample status.
+
PERIOD
this contains basic information about a period during which data is + collected within a sample, including the period UUID, name, and begin and end + timestamps. A set of periods can be "linked" through a "prev_id" field.
+
METRIC_DESC
this contains descriptive data about a specific series + of metric values within a specific period of a run, including the metric UUID, + the metric "class", type, and source, along with a set of "names" (key/value + pairs) defining the metric breakout details that narrow down a specific source and + type. For example source:mpstat, type:Busy-CPU data is broken down by package, cpu, + core, and other breakouts which can be isolated or aggregated for data reporting.
+
METRIC_DATA
this describes a specific data point, sampled over a specified + duration with a fixed begin and end timestamp, plus a floating point value. + Each is tied to a specific metric_desc UUID value. Depending on the varied + semantics of metric_desc breakouts, it's often valid to aggregate these + across a set of relatead metric_desc IDs, based on source and type, for + example to get aggregate CPU load across all modes, cores, or across all + modes within a core. This service allows arbitrary aggregation within a + given metric source and type, but by default will attempt to direct the + caller to specifying a set of breakouts that result in a single metric_desc + ID.
+
+ +The `crucible_svc` allows CPT project APIs to access a Crucible CDM backing +store to find information about runs, tags, params, iterations, samples, +periods, plus various ways to expose and aggregate metric data both for +primary benchmarks and non-periodic tools. + +The `get_runs` API is the primary entry point, returning an object that +supports filtering, sorting, and pagination of the Crucible run data decorated +with useful iteration, tag, and parameter data. + +The metrics data APIs (data, breakouts, summary, and graph) now allow +filtering by the metric "name" data. This allows "drilling down" through +the non-periodic "tool data". For example, IO data is per-disk, CPU +information is broken down by core and package. You can now aggregate +all global data (e.g., total system CPU), or filter by breakout names to +select by CPU, mode (usr, sys, irq), etc. + +For example, to return `Busy-CPU` ("type") graph data from the `mpstat` +("source") tool for system mode on one core, you might query: + +``` +/api/v1/ilab/runs//graph/mpstat::Busy-CPU?name=core=12,package=1,num=77,type=sys +``` + +If you make a `graph`, `data`, or `summary` query that doesn't translate +to a unique metric, and don't select aggregation, you'll get a diagnostic +message identifying possible additional filters. For example, with +`type=sys` removed, that same query will show the available values for +the `type` breakout name: + +``` +{ + "detail": [ + { + "message": "More than one metric (5) probably means you should add filters", + "names": { + "type": [ + "guest", + "irq", + "soft", + "sys", + "usr" + ] + }, + "periods": [] + } + ] +} +``` + +This capability can be used to build an interactive exploratory UI to +allow displaying breakout details. The `get_metrics` API will show all +recorded metrics, along with information the names and values used in +those. Metrics that show "names" with more than one value will need to be +filtered to produce meaningful summaries or graphs. + +You can instead aggregate metrics across breakouts using the `?aggregate` +query parameter, like `GET /api/v1/ilab/runs//graph/mpstat::Busy-CPU?aggregate` +which will aggregate all CPU busy data for the system. + +Normally you'll want to display data based on sample periods, for example the +primary period of an iteration, using `?period=`. This will +implicitly constrain the metric data based on the period ID associated with +the `metric_desc` document *and* the begin/end time period of the selected +periods. Normally, a benchmark will will separate iterations because each is +run with a different parameter value, and the default graph labeling will +look for a set of distinct parameters not used by other iterations: for +example, `mpstat::Busy-CPU (batch-size=16)`. + +The `get_breakouts` API can be used to explore the namespace recorded for that +metric in the specified run. For example, + +``` +GET /api/v1/ilab/runs//breakouts/sar-net::packets-sec?name=direction=rx +{ + "label": "sar-net::packets-sec", + "source": "sar-net", + "type": "packets-sec", + "class": [], + "names": { + "dev": [ + "lo", + "eno12409", + "eno12399" + ], + "type": [ + "physical", + "virtual" + ] + } +} +``` + +The `get_filters` API reports all the tag and param filter tags and +values for the runs. These can be used for the `filters` query parameter +on `get_runs` to restrict the set of runs reported; for example, +`/api/v1/ilab/runs?filter=param:workflow=sdg` shows only runs with the param +arg `workflow` set to the value `sdg`. You can search for a subset of the +string value using the operator "~" instead of "=". For example, +`?filter=param:user~user` will match `user` values of "A user" or "The user". diff --git a/backend/app/services/crucible_svc.py b/backend/app/services/crucible_svc.py new file mode 100644 index 00000000..cf9e7990 --- /dev/null +++ b/backend/app/services/crucible_svc.py @@ -0,0 +1,2037 @@ +"""Service to pull data from a Crucible CDM OpenSearch data store + +A set of helper methods to enable a project API to easily process data from a +Crucible controller's OpenSearch data backend. + +This includes paginated, filtered, and sorted lists of benchmark runs, along +access to the associated Crucible documents such as iterations, samples, and +periods. Metric data can be accessed by breakout names, or aggregated by +breakout subsets or collection periods as either raw data points, statistical +aggregate, or Plotly graph format for UI display. +""" + +import time +from collections import defaultdict +from dataclasses import dataclass +from datetime import datetime, timezone +from typing import Any, Iterator, Optional, Tuple, Union + +from elasticsearch import AsyncElasticsearch, NotFoundError +from fastapi import HTTPException, status +from pydantic import BaseModel + +from app import config + + +class Metric(BaseModel): + """Describe a single metric to be graphed or summarized + + This represents a JSON object provided by a caller through the + get_multigraph or get_multisummary APIs to describe a specific + metric. + + The default title (if the field is omitted) is the metric label with a + suffix denoting breakout values selected, any unique parameter values + in a selected iteration, and (if multiple runs are selected in any Graph + list) an indication of the run index. For example, + "mpstat::Busy-CPU [core=2,type=usr] (batch-size=16) {run 1}". + + Fields: + run: run ID + metric: the metric label, "ilab::train-samples-sec" + aggregate: True to aggregate unspecified breakouts + color: CSS color string ("green" or "#008000") + names: Lock in breakouts + periods: Select metrics for specific test period(s) + title: Provide a title for the graph. The default is a generated title + """ + + run: str + metric: str + aggregate: bool = False + color: Optional[str] = None + names: Optional[list[str]] = None + periods: Optional[list[str]] = None + title: Optional[str] = None + + +class GraphList(BaseModel): + """Describe a set of overlaid graphs + + This represents a JSON object provided by a caller through the get_graph + API to introduce a set of constrained metrics to be graphed. The "run + ID" here provides a default for the embedded Graph objects, and can be + omitted if all Graph objects specify a run ID. (This is most useful to + select a set of graphs all for a single run ID.) + + Normally the X axis will be the actual sample timestamp values; if you + specify relative=True, the X axis will be the duration from the first + timestamp of the metric series. This allows graphs of similar runs started + at different times to be overlaid. + + Fields: + name: Specify a name for the set of graphs + relative: True for relative timescale + graphs: a list of Graph objects + """ + + name: str + relative: bool = False + graphs: list[Metric] + + +@dataclass +class Point: + """Graph point + + Record the start & end timestamp and value of a metric data point + """ + + begin: int + end: int + value: float + + +colors = [ + "black", + "aqua", + "blue", + "fuschia", + "gray", + "green", + "maroon", + "navy", + "olive", + "teal", + "silver", + "lightskyblue", + "mediumspringgreen", + "mistyrose", + "darkgoldenrod", + "cadetblue", + "chocolate", + "coral", + "brown", + "bisque", + "deeppink", + "sienna", +] + + +@dataclass +class Term: + namespace: str + key: str + value: str + + +class Parser: + """Help parsing filter expressions.""" + + def __init__(self, term: str): + """Construct an instance to help parse query parameter expressions + + These consist of a sequence of tokens separated by delimiters. Each + token may be quoted to allow matching against strings with spaces. + + For example, `param:name="A string"` + + Args: + term: A filter expression to parse + """ + self.buffer = term + self.context = term + self.offset = 0 + + def _next_token( + self, delimiters: list[str] = [], optional: bool = False + ) -> Tuple[str, Union[str, None]]: + """Extract the next token from an expression + + Tokens may be quoted; the quotes are removed. for example, the two + expressions `'param':"workflow"='"sdg"'` and `param:workflow:sdg` are + identical. + + Args: + delimiters: a list of delimiter characters + optional: whether the terminating delimiter is optional + + Returns: + A tuple consisting of the token and the delimiter (or None if + parsing reached the end of the expression and the delimiter was + optional) + """ + + @dataclass + class Quote: + open: int + quote: str + + quoted: list[Quote] = [] + next_char = None + token = "" + first_quote = None + for o in range(len(self.buffer)): + next_char = self.buffer[o] + if next_char in delimiters and not quoted: + self.buffer = self.buffer[o + 1 :] + self.offset += o + 1 + break + elif next_char in ('"', "'"): + if o == 0: + first_quote = next_char + if quoted and quoted[-1].quote == next_char: + quoted.pop() + else: + quoted.append(Quote(o, next_char)) + token += next_char + else: + next_char = None + if quoted: + q = quoted[-1] + c = self.context + i = q.open + self.offset + annotated = c[:i] + "[" + c[i] + "]" + c[i + 1 :] + raise HTTPException( + status.HTTP_400_BAD_REQUEST, f"Unterminated quote at {annotated!r}" + ) + + # If delimiters are specified, and not optional, then we didn't + # find one, and that's an error. + if not optional and delimiters: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + f"Missing delimiter from {','.join(delimiters)} after {token!r}", + ) + self.buffer = "" + self.offset = len(self.context) + return (token, next_char) if not first_quote else (token[1:-1], next_char) + + +class CommonParams: + """Help with sorting out parameters + + Parameter values are associated with iterations, but often a set of + parameters is common across all iterations of a run, and that set can + provide useful context. + + This helps to filter out identical parameters across a set of + iterations. + """ + + def __init__(self): + self.common: dict[str, Any] = {} + self.omit = set() + + def add(self, params: dict[str, Any]): + """Add a new iteration into the param set + + Mark all parameter keys which don't appear in all iterations, or which + have different values in at least one iteration, to be omitted from the + merged "common" param set. + + Args: + params: the param dictionary of an iteration + """ + if not self.common: + self.common.update(params) + else: + for k, v in self.common.items(): + if k not in self.omit and (k not in params or v != params[k]): + self.omit.add(k) + + def render(self) -> dict[str, Any]: + """Return a new param set with only common params""" + return {k: v for k, v in self.common.items() if k not in self.omit} + + +class CrucibleService: + """Support convenient generalized access to Crucible data + + This implements access to the "v7" Crucible "Common Data Model" through + OpenSearch queries. + """ + + # OpenSearch massive limit on hits in a single query + BIGQUERY = 262144 + + # Define the 'run' document fields that support general filtering via + # `?filter=:` + # + # TODO: this excludes 'desc', which isn't used by the ilab runs, and needs + # different treatment as it's a text field rather than a term. It's not an + # immediate priority for ilab, but may be important for general use. + RUN_FILTERS = ("benchmark", "email", "name", "source", "harness", "host") + + # Define the keywords for sorting. + DIRECTIONS = ("asc", "desc") + FIELDS = ( + "begin", + "benchmark", + "desc", + "email", + "end", + "harness", + "host", + "id", + "name", + "source", + ) + + def __init__(self, configpath: str = "crucible"): + """Initialize a Crucible CDM (OpenSearch) connection. + + Generally the `configpath` should be scoped, like `ilab.crucible` so + that multiple APIs based on access to distinct Crucible controllers can + coexist. + + Initialization includes making an "info" call to confirm and record the + server response. + + Args: + configpath: The Vyper config path (e.g., "ilab.crucible") + """ + self.cfg = config.get_config() + self.user = self.cfg.get(configpath + ".username") + self.password = self.cfg.get(configpath + ".password") + self.auth = (self.user, self.password) if self.user or self.password else None + self.url = self.cfg.get(configpath + ".url") + self.elastic = AsyncElasticsearch(self.url, basic_auth=self.auth) + + @staticmethod + def _get_index(root: str) -> str: + return "cdmv7dev-" + root + + @staticmethod + def _split_list(alist: Optional[list[str]] = None) -> list[str]: + """Split a list of parameters + + For simplicity, the APIs supporting "list" query parameters allow + each element in the list to be a comma-separated list of strings. + For example, ["a", "b", "c"] is logically the same as ["a,b,c"]. + + This method normalizes the second form into first to simplify life for + consumers. + + Args: + alist: list of names or name lists + + Returns: + A simple list of options + """ + l: list[str] = [] + if alist: + for n in alist: + l.extend(n.split(",")) + return l + + @staticmethod + def _normalize_date(value: Optional[Union[int, str, datetime]]) -> int: + """Normalize date parameters + + The Crucible data model stores dates as string representations of an + integer "millseconds-from-epoch" value. To allow flexibility, this + Crucible service allows incoming dates to be specified as ISO-format + strings, as integers, or as the stringified integer. + + That is, "2024-09-12 18:29:35.123000+00:00", "1726165775123", and + 1726165775123 are identical. + + Args: + value: Representation of a date-time value + + Returns: + The integer milliseconds-from-epoch equivalent + """ + try: + if isinstance(value, int): + return value + elif isinstance(value, datetime): + return int(value.timestamp() * 1000.0) + elif isinstance(value, str): + try: + return int(value) + except ValueError: + pass + try: + d = datetime.fromisoformat(value) + return int(d.timestamp() * 1000.0) + except ValueError: + pass + except Exception as e: + print(f"normalizing {type(value).__name__} {value} failed with {str(e)}") + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + f"Date representation {value} is not a date string or timestamp", + ) + + @staticmethod + def _hits( + payload: dict[str, Any], fields: Optional[list[str]] = None + ) -> Iterator[dict[str, Any]]: + """Helper to iterate through OpenSearch query matches + + Iteratively yields the "_source" of each hit. As a convenience, can + yield a sub-object of "_source" ... for example, specifying the + optional "fields" as ["metric_desc", "id"] will yield the equivalent of + hit["_source"]["metric_desc"]["id"] + + Args: + payload: OpenSearch reponse payload + fields: Optional sub-fields of "_source" + + Returns: + Yields each object from the "greatest hits" list + """ + if "hits" not in payload: + raise HTTPException( + status_code=500, detail=f"Attempt to iterate hits for {payload}" + ) + hits = payload.get("hits", {}).get("hits", []) + for h in hits: + source = h["_source"] + if fields: + for f in fields: + source = source[f] + yield source + + @staticmethod + def _aggs(payload: dict[str, Any], aggregation: str) -> Iterator[dict[str, Any]]: + """Helper to access OpenSearch aggregations + + Iteratively yields the name and value of each aggregation returned + by an OpenSearch query. This can also be used for nested aggregations + by specifying an aggregation object. + + Args: + payload: A JSON dict containing an "aggregations" field + + Returns: + Yields each aggregation from an aggregation bucket list + """ + if "aggregations" not in payload: + raise HTTPException( + status_code=500, + detail=f"Attempt to iterate missing aggregations for {payload}", + ) + aggs = payload["aggregations"] + if aggregation not in aggs: + raise HTTPException( + status_code=500, + detail=f"Attempt to iterate missing aggregation {aggregation} for {payload}", + ) + for agg in aggs[aggregation]["buckets"]: + yield agg + + @staticmethod + def _format_timestamp(timestamp: Union[str, int]) -> str: + """Convert stringified integer milliseconds-from-epoch to ISO date""" + try: + ts = int(timestamp) + except Exception as e: + print(f"ERROR: invalid {timestamp!r}: {str(e)!r}") + ts = 0 + return str(datetime.fromtimestamp(ts / 1000.00, timezone.utc)) + + @classmethod + def _format_data(cls, data: dict[str, Any]) -> dict[str, Any]: + """Helper to format a "metric_data" object + + Crucible stores the date, duration, and value as strings, so this + converts them to more useful values. The end timestamp is converted + to an ISO date-time string; the duration and value to floating point + numbers. + + Args: + data: a "metric_data" object + + Returns: + A neatly formatted "metric_data" object + """ + return { + "begin": cls._format_timestamp(data["begin"]), + "end": cls._format_timestamp(data["end"]), + "duration": int(data["duration"]) / 1000, + "value": float(data["value"]), + } + + @classmethod + def _format_period(cls, period: dict[str, Any]) -> dict[str, Any]: + """Helper to format a "period" object + + Crucible stores the date values as stringified integers, so this + converts the begin and end timestamps to ISO date-time strings. + + Args: + period: a "period" object + + Returns: + A neatly formatted "period" object + """ + return { + "begin": cls._format_timestamp(timestamp=period["begin"]), + "end": cls._format_timestamp(period["end"]), + "id": period["id"], + "name": period["name"], + } + + @classmethod + def _build_filter_options(cls, filter: Optional[list[str]] = None) -> Tuple[ + Optional[list[dict[str, Any]]], + Optional[list[dict[str, Any]]], + Optional[list[dict[str, Any]]], + ]: + """Build filter terms for tag and parameter filter terms + + Each term has the form ":". Any term + may be quoted: quotes are stripped and ignored. (This is generally only + useful on the to include spaces.) + + We support three namespaces: + param: Match against param index arg/val + tag: Match against tag index name/val + run: Match against run index fields + + We support two operators: + =: Exact match + ~: Partial match + + Args: + filter: list of filter terms like "param:key=value" + + Returns: + A set of OpenSearch filter object lists to detect missing + and matching documents for params, tags, and run fields. For + example, to select param:batch-size=12 results in the + following param filter list: + + [ + {' + dis_max': { + 'queries': [ + { + 'bool': { + 'must': [ + {'term': {'param.arg': 'batch-size'}}, + {'term': {'param.val': '12'}} + ] + } + } + ] + } + } + ] + """ + terms = defaultdict(list) + for term in cls._split_list(filter): + p = Parser(term) + namespace, _ = p._next_token([":"]) + key, operation = p._next_token(["=", "~"]) + value, _ = p._next_token() + if operation == "~": + value = f".*{value}.*" + matcher = "regexp" + else: + matcher = "term" + if namespace in ("param", "tag"): + if namespace == "param": + key_field = "param.arg" + value_field = "param.val" + else: + key_field = "tag.name" + value_field = "tag.val" + terms[namespace].append( + { + "bool": { + "must": [ + {"term": {key_field: key}}, + {matcher: {value_field: value}}, + ] + } + } + ) + elif namespace == "run": + terms[namespace].append({matcher: {f"run.{key}": value}}) + else: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + f"unknown filter namespace {namespace!r}", + ) + param_filter = None + tag_filter = None + if "param" in terms: + param_filter = [{"dis_max": {"queries": terms["param"]}}] + if "tag" in terms: + tag_filter = [{"dis_max": {"queries": terms["tag"]}}] + return param_filter, tag_filter, terms.get("run") + + @classmethod + def _build_name_filters( + cls, namelist: Optional[list[str]] = None + ) -> list[dict[str, Any]]: + """Build filter terms for metric breakout names + + for example, "cpu=10" filters for metric data descriptors where the + breakout name "cpu" exists and has a value of 10. + + Args: + namelist: list of possibly comma-separated list values + + Returns: + A list of filters to match breakout terms + """ + names: list[str] = cls._split_list(namelist) + filters = [] + for e in names: + try: + n, v = e.split("=", maxsplit=1) + except ValueError: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, f"Filter item {e} must be '='" + ) + filters.append({"term": {f"metric_desc.names.{n}": v}}) + return filters + + @classmethod + def _build_period_filters( + cls, periodlist: Optional[list[str]] = None + ) -> list[dict[str, Any]]: + """Build period filters + + Generate metric_desc filter terms to match against a list of period IDs. + + Note that not all metric descriptions are periodic, and we don't want + these filters to exclude them -- so the filter will exclude only + documents that have a period and don't match. (That is, we won't drop + any non-periodic metrics. We expect those to be filtered by timestamp + instead.) + + Args: + period: list of possibly comma-separated period IDs + + Returns: + A filter term that requires a period.id match only for metric_desc + documents with a period. + """ + pl: list[str] = cls._split_list(periodlist) + if pl: + return [ + { + "dis_max": { + "queries": [ + {"bool": {"must_not": {"exists": {"field": "period"}}}}, + {"terms": {"period.id": pl}}, + ] + } + } + ] + else: + return [] + + @classmethod + def _build_metric_filters( + cls, + run: str, + metric: str, + names: Optional[list[str]] = None, + periods: Optional[list[str]] = None, + ) -> list[dict[str, Any]]: + """Helper for filtering metric descriptions + + We normally filter by run, metric "label", and optionally by breakout + names and periods. This encapsulates the filter construction. + + Args: + run: run ID + metric: metric label (ilab::sdg-samples-sec) + names: list of "name=value" filters + periods: list of period IDs + + Returns: + A list of OpenSearch filter expressions + """ + msource, mtype = metric.split("::") + return ( + [ + {"term": {"run.id": run}}, + {"term": {"metric_desc.source": msource}}, + {"term": {"metric_desc.type": mtype}}, + ] + + cls._build_name_filters(names) + + cls._build_period_filters(periods) + ) + + @classmethod + def _build_sort_terms(cls, sorters: Optional[list[str]]) -> list[dict[str, str]]: + """Build sort term list + + Sorters may reference any native `run` index field and must specify + either "asc"(ending) or "desc"(ending) sort order. Any number of + sorters may be combined, like ["name:asc,benchmark:desc", "end:desc"] + + Args: + sorters: list of : sort terms + + Returns: + list of OpenSearch sort terms + """ + if sorters: + sort_terms = [] + for s in sorters: + key, dir = s.split(":", maxsplit=1) + if dir not in cls.DIRECTIONS: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + f"Sort direction {dir!r} must be one of {','.join(DIRECTIONS)}", + ) + if key not in cls.FIELDS: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + f"Sort key {key!r} must be one of {','.join(FIELDS)}", + ) + sort_terms.append({f"run.{key}": dir}) + else: + sort_terms = [{"run.begin": "asc"}] + return sort_terms + + async def _search( + self, index: str, query: Optional[dict[str, Any]] = None, **kwargs + ) -> dict[str, Any]: + """Issue an OpenSearch query + + Args: + index: The "base" CDM index name, e.g., "run", "metric_desc" + query: An OpenSearch query object + kwargs: Additional OpenSearch parameters + + Returns: + The OpenSearch response payload (JSON dict) + """ + idx = self._get_index(index) + start = time.time() + value = await self.elastic.search(index=idx, body=query, **kwargs) + print( + f"QUERY on {idx} took {time.time() - start} seconds, " + f"hits: {value.get('hits', {}).get('total')}" + ) + return value + + async def close(self): + """Close the OpenSearch connection""" + if self.elastic: + await self.elastic.close() + self.elastic = None + + async def search( + self, + index: str, + filters: Optional[list[dict[str, Any]]] = None, + aggregations: Optional[dict[str, Any]] = None, + sort: Optional[list[dict[str, str]]] = None, + source: Optional[str] = None, + size: Optional[int] = None, + offset: Optional[int] = None, + **kwargs, + ) -> dict[str, Any]: + """OpenSearch query helper + + Combine index, filters, aggregations, sort, and pagination options + into an OpenSearch query. + + Args: + index: "root" CDM index name ("run", "metric_desc", ...) + filters: list of JSON dict filter terms {"term": {"name": "value}} + aggregations: list of JSON dict aggregations {"name": {"term": "name"}} + sort: list of JSON dict sort terms ("name": "asc") + size: The number of hits to return; defaults to "very large" + offset: The number of hits to skip, for pagination + kwargs: Additional OpenSearch options + + Returns: + The OpenSearch response + """ + f = filters if filters else [] + query = { + "size": self.BIGQUERY if size is None else size, + "query": {"bool": {"filter": f}}, + } + if sort: + query.update({"sort": sort}) + if source: + query.update({"_source": source}) + if offset: + query.update({"from": offset}) + if aggregations: + query.update({"aggs": aggregations}) + return await self._search(index, query, **kwargs) + + async def _get_metric_ids( + self, + run: str, + metric: str, + namelist: Optional[list[str]] = None, + periodlist: Optional[list[str]] = None, + aggregate: bool = False, + ) -> list[str]: + """Generate a list of matching metric_desc IDs + + Given a specific run and metric name, and a set of breakout filters, + returns a list of metric desc IDs that match. + + If a single ID is required to produce a consistent metric, and the + supplied filters produce more than one without aggregation, raise a + 422 HTTP error (UNPROCESSABLE CONTENT) with a response body showing + the unsatisfied breakouts (name and available values). + + Args: + run: run ID + metric: combined metric name (e.g., sar-net::packets-sec) + namelist: a list of breakout filters like "type=physical" + periodlist: a list of period IDs + aggregate: if True, allow multiple metric IDs + + Returns: + A list of matching metric_desc ID value(s) + """ + filters = self._build_metric_filters(run, metric, namelist, periodlist) + metrics = await self.search( + "metric_desc", + filters=filters, + ignore_unavailable=True, + ) + if len(metrics["hits"]["hits"]) < 1: + print(f"No metric descs: filters={filters}") + return [] + ids = [h["metric_desc"]["id"] for h in self._hits(metrics)] + if len(ids) < 2 or aggregate: + return ids + + # If we get here, the client asked for breakout data that doesn't + # resolve to a single metric stream, and didn't specify aggregation. + # Offer some help. + names = defaultdict(set) + periods = set() + response = { + "message": f"More than one metric ({len(ids)}) means " + "you should add breakout filters or aggregate." + } + for m in self._hits(metrics): + if "period" in m: + periods.add(m["period"]["id"]) + for n, v in m["metric_desc"]["names"].items(): + names[n].add(v) + + # We want to help filter a consistent summary, so only show those + # breakout names with more than one value. + response["names"] = {n: sorted(v) for n, v in names.items() if v and len(v) > 1} + response["periods"] = list(periods) + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=response + ) + + async def _build_timestamp_range_filters( + self, periods: Optional[list[str]] = None + ) -> list[dict[str, Any]]: + """Create a timestamp range filter + + This extracts the begin and end timestamps from the list of periods and + builds a timestamp filter range to select documents on or after the + earliest begin timestamp and on or before the latest end timestamp. + + Args: + periods: a list of CDM period IDs + + Returns: + Constructs a range filter for the earliest begin timestamp and the + latest end timestamp among the specified periods. + """ + + if periods: + ps = self._split_list(periods) + matches = await self.search( + "period", filters=[{"terms": {"period.id": ps}}] + ) + try: + start = min([int(h) for h in self._hits(matches, ["period", "begin"])]) + end = max([int(h) for h in self._hits(matches, ["period", "end"])]) + except Exception as e: + print( + f"At least one of periods in {ps} lacks a begin or end " + f"timestamp ({str(e)!r}): date filtering by period is " + "disabled, which may produce bad results." + ) + return [] + return [ + {"range": {"metric_data.begin": {"gte": str(start)}}}, + {"range": {"metric_data.end": {"lte": str(end)}}}, + ] + else: + return [] + + async def _get_run_ids( + self, index: str, filters: Optional[list[dict[str, Any]]] = None + ) -> set[str]: + """Return a set of run IDs matching a filter + + Documents in the specified index must have "run.id" fields. Returns + a set of unique run IDs matched by the filter in the specified index. + + Args: + index: root CDM index name + filters: a list of OpenSearch filter terms + + Returns: + a set of unique run ID values + """ + filtered = await self.search( + index, source="run.id", filters=filters, ignore_unavailable=True + ) + print(f"HITS: {filtered['hits']['hits']}") + return set([x for x in self._hits(filtered, ["run", "id"])]) + + async def _make_title( + self, + run_id: str, + run_id_list: list[str], + metric_item: Metric, + params_by_run: dict[str, Any], + periods_by_run: dict[str, Any], + ) -> str: + """Compute a default title for a graph + + Use the period, breakout name selections, run list, and iteration + parameters to construct a meaningful name for a metric. + + For example, "ilab::sdg-samples-sec (batch-size=4) {run 1}", or + "mpstat::Busy-CPU [cpu=4]" + + Args: + run_id: the Crucible run ID + run_id_list: ordered list of run IDs in our list of metrics + metric_item: the current MetricItem object + periods: list of aggregation periods, if any + params_by_run: initially empty dict used to cache parameters + periods_by_run: initially empty dict used to cache periods + + Returns: + A string title + """ + names = metric_item.names + metric = metric_item.metric + if metric_item.periods and len(metric_item.periods) == 1: + period = metric_item.periods[0] + else: + period = None + if run_id not in params_by_run: + # Gather iteration parameters outside the loop for help in + # generating useful labels. + all_params = await self.search( + "param", filters=[{"term": {"run.id": run_id}}] + ) + collector = defaultdict(defaultdict) + for h in self._hits(all_params): + collector[h["iteration"]["id"]][h["param"]["arg"]] = h["param"]["val"] + params_by_run[run_id] = collector + else: + collector = params_by_run[run_id] + + if run_id not in periods_by_run: + periods = await self.search( + "period", filters=[{"term": {"run.id": run_id}}] + ) + iteration_periods = defaultdict(list[dict[str, Any]]) + for p in self._hits(periods): + iteration_periods[p["iteration"]["id"]].append(p["period"]) + periods_by_run[run_id] = iteration_periods + else: + iteration_periods = periods_by_run[run_id] + + # We can easily end up with multiple graphs across distinct + # periods or iterations, so we want to be able to provide some + # labeling to the graphs. We do this by looking for unique + # iteration parameters values, since the iteration number and + # period name aren't useful by themselves. + name_suffix = "" + if metric_item.periods: + iteration = None + for i, plist in iteration_periods.items(): + if set(metric_item.periods) <= set([p["id"] for p in plist]): + iteration = i + if period: + for p in plist: + if p["id"] == period: + name_suffix += f" {p['name']}" + + # If the period(s) we're graphing resolve to a single + # iteration in a run with multiple iterations, then we can + # try to find a unique title suffix based on distinct param + # values for that iteration. + if iteration and len(collector) > 1: + unique = collector[iteration].copy() + for i, params in collector.items(): + if i != iteration: + for p in list(unique.keys()): + if p in params and unique[p] == params[p]: + del unique[p] + if unique: + name_suffix += ( + " (" + ",".join([f"{p}={v}" for p, v in unique.items()]) + ")" + ) + + if len(run_id_list) > 1: + name_suffix += f" {{run {run_id_list.index(run_id) + 1}}}" + + options = (" [" + ",".join(names) + "]") if names else "" + return metric + options + name_suffix + + async def get_run_filters(self) -> dict[str, dict[str, list[str]]]: + """Return possible tag and filter terms + + Return a description of tag and param filter terms meaningful + across all datasets. TODO: we should support date-range and benchmark + filtering. Consider supporting all `run` API filtering, which would + allow adjusting the filter popups to drop options no longer relevant + to a given set. + + { + "param": { + {"gpus": [4", "8"]} + } + } + + Returns: + A two-level JSON dict; the first level is the namespace (param or + tag), the second level key is the param/tag/field name and its value + is the set of values defined for that key. + """ + tags = await self.search( + "tag", + size=0, + aggregations={ + "key": { + "terms": {"field": "tag.name", "size": self.BIGQUERY}, + "aggs": { + "values": {"terms": {"field": "tag.val", "size": self.BIGQUERY}} + }, + } + }, + ignore_unavailable=True, + ) + params = await self.search( + "param", + size=0, + aggregations={ + "key": { + "terms": {"field": "param.arg", "size": self.BIGQUERY}, + "aggs": { + "values": { + "terms": {"field": "param.val", "size": self.BIGQUERY} + } + }, + } + }, + ignore_unavailable=True, + ) + aggs = { + k: {"terms": {"field": f"run.{k}", "size": self.BIGQUERY}} + for k in self.RUN_FILTERS + } + runs = await self.search( + "run", + size=0, + aggregations=aggs, + ) + result = defaultdict(lambda: defaultdict(lambda: set())) + for p in self._aggs(params, "key"): + for v in p["values"]["buckets"]: + result["param"][p["key"]].add(v["key"]) + for t in self._aggs(tags, "key"): + for v in t["values"]["buckets"]: + result["tag"][t["key"]].add(v["key"]) + for name in self.RUN_FILTERS: + for f in self._aggs(runs, name): + result["run"][name].add(f["key"]) + return {s: {k: list(v) for k, v in keys.items()} for s, keys in result.items()} + + async def get_runs( + self, + filter: Optional[list[str]] = None, + start: Optional[Union[int, str, datetime]] = None, + end: Optional[Union[int, str, datetime]] = None, + offset: int = 0, + sort: Optional[list[str]] = None, + size: Optional[int] = None, + **kwargs, + ) -> dict[str, Any]: + """Return matching Crucible runs + + Filtered and sorted list of runs. + + { + "sort": [], + "startDate": "2024-01-01T05:00:00+00:00", + "size": 1, + "offset": 0, + "results": [ + { + "begin": "1722878906342", + "benchmark": "ilab", + "email": "A@email", + "end": "1722880503544", + "id": "4e1d2c3c-b01c-4007-a92d-23a561af2c11", + "name": "\"A User\"", + "source": "node.example.com//var/lib/crucible/run/ilab--2024-08-05_17:17:13_UTC--4e1d2c3c-b01c-4007-a92d-23a561af2c11", + "tags": { + "topology": "none" + }, + "iterations": [ + { + "iteration": 1, + "primary_metric": "ilab::train-samples-sec", + "primary_period": "measurement", + "status": "pass", + "params": { + "cpu-offload-pin-memory": "1", + "model": "/home/models/granite-7b-lab/", + "data-path": "/home/data/training/knowledge_data.jsonl", + "cpu-offload-optimizer": "1", + "nnodes": "1", + "nproc-per-node": "4", + "num-runavg-samples": "2" + } + } + ], + "primary_metrics": [ + "ilab::train-samples-sec" + ], + "status": "pass", + "params": { + "cpu-offload-pin-memory": "1", + "model": "/home/models/granite-7b-lab/", + "data-path": "/home/data/training/knowledge_data.jsonl", + "cpu-offload-optimizer": "1", + "nnodes": "1", + "nproc-per-node": "4", + "num-runavg-samples": "2" + }, + "begin_date": "2024-08-05 17:28:26.342000+00:00", + "end_date": "2024-08-05 17:55:03.544000+00:00" + } + ], + "count": 1, + "total": 15, + "next_offset": 1 + } + + Args: + start: Include runs starting at timestamp + end: Include runs ending no later than timestamp + filter: List of tag/param filter terms (parm:key=value) + sort: List of sort terms (column:) + size: Include up to runs in output + offset: Use size/from pagination instead of search_after + + Returns: + JSON object with "results" list and "housekeeping" fields + """ + + # We need to remove runs which don't match against 'tag' or 'param' + # filter terms. The CDM schema doesn't make it possible to do this in + # one shot. Instead, we run queries against the param and tag indices + # separately, producing a list of run IDs which we'll exclude from the + # final collection. + # + # If there are no matches, we can exit early. (TODO: should this be an + # error, or just a success with an empty list?) + results = {} + filters = [] + sorters = self._split_list(sort) + results["sort"] = sorters + sort_terms = self._build_sort_terms(sorters) + param_filters, tag_filters, run_filters = self._build_filter_options(filter) + if run_filters: + filters.extend(run_filters) + if start or end: + s = None + e = None + if start: + s = self._normalize_date(start) + results["startDate"] = datetime.fromtimestamp( + s / 1000.0, tz=timezone.utc + ) + if end: + e = self._normalize_date(end) + results["endDate"] = datetime.fromtimestamp(e / 1000.0, tz=timezone.utc) + + if s and e and s > e: + raise HTTPException( + status_code=422, + detail={ + "error": "Invalid date format, start_date must be less than end_date" + }, + ) + cond = {} + if s: + cond["gte"] = str(s) + if e: + cond["lte"] = str(e) + filters.append({"range": {"run.begin": cond}}) + if size: + results["size"] = size + results["offset"] = offset if offset is not None else 0 + + # In order to filter by param or tag values, we need to produce a list + # of matching RUN IDs from each index. We'll then drop any RUN ID that's + # not on both lists. + if tag_filters: + tagids = await self._get_run_ids("tag", tag_filters) + if param_filters: + paramids = await self._get_run_ids("param", param_filters) + + # If it's obvious we can't produce any matches at this point, exit. + if (tag_filters and len(tagids) == 0) or (param_filters and len(paramids) == 0): + results.update({"results": [], "count": 0, "total": 0}) + return results + + hits = await self.search( + "run", + size=size, + offset=offset, + sort=sort_terms, + filters=filters, + **kwargs, + ignore_unavailable=True, + ) + rawiterations = await self.search("iteration", ignore_unavailable=True) + rawtags = await self.search("tag", ignore_unavailable=True) + rawparams = await self.search("param", ignore_unavailable=True) + + iterations = defaultdict(list) + tags = defaultdict(defaultdict) + params = defaultdict(defaultdict) + run_params = defaultdict(list) + + for i in self._hits(rawiterations): + iterations[i["run"]["id"]].append(i["iteration"]) + + # Organize tags by run ID + for t in self._hits(rawtags): + tags[t["run"]["id"]][t["tag"]["name"]] = t["tag"]["val"] + + # Organize params by iteration ID + for p in self._hits(rawparams): + run_params[p["run"]["id"]].append(p) + params[p["iteration"]["id"]][p["param"]["arg"]] = p["param"]["val"] + + runs = {} + for h in self._hits(hits): + run = h["run"] + rid = run["id"] + + # Filter the runs by our tag and param queries + if param_filters and rid not in paramids: + continue + + if tag_filters and rid not in tagids: + continue + + runs[rid] = run + + # Convert string timestamps (milliseconds from epoch) to int + try: + run["begin"] = int(run["begin"]) + run["end"] = int(run["end"]) + except Exception as e: + print( + f"Unexpected error converting timestamp {run['begin']!r} " + f"or {run['end']!r} to int: {str(e)!r}" + ) + run["tags"] = tags.get(rid, {}) + run["iterations"] = [] + run["primary_metrics"] = set() + common = CommonParams() + + # Collect unique iterations: the status is "fail" if any iteration + # for that run ID failed. + for i in iterations.get(rid, []): + iparams = params.get(i["id"], {}) + if "status" not in run: + run["status"] = i["status"] + else: + if i["status"] != "pass": + run["status"] = i["status"] + common.add(iparams) + run["primary_metrics"].add(i["primary-metric"]) + run["iterations"].append( + { + "iteration": i["num"], + "primary_metric": i["primary-metric"], + "primary_period": i["primary-period"], + "status": i["status"], + "params": iparams, + } + ) + run["iterations"].sort(key=lambda i: i["iteration"]) + run["params"] = common.render() + try: + run["begin_date"] = self._format_timestamp(run["begin"]) + run["end_date"] = self._format_timestamp(run["end"]) + except KeyError as e: + print(f"Missing 'run' key {str(e)} in {run}") + run["begin_date"] = self._format_timestamp("0") + run["end_date"] = self._format_timestamp("0") + + count = len(runs) + total = hits["hits"]["total"]["value"] + results.update( + { + "results": list(runs.values()), + "count": count, + "total": total, + } + ) + if size and (offset + count < total): + results["next_offset"] = offset + size + return results + + async def get_tags(self, run: str, **kwargs) -> dict[str, str]: + """Return the set of tags associated with a run + + Args: + run: run ID + + Returns: + JSON dict with "tag" keys showing each value + """ + tags = await self.search( + index="tag", + filters=[{"term": {"run.id": run}}], + **kwargs, + ignore_unavailable=True, + ) + return {t["name"]: t["val"] for t in self._hits(tags, ["tag"])} + + async def get_params( + self, run: Optional[str] = None, iteration: Optional[str] = None, **kwargs + ) -> dict[str, dict[str, str]]: + """Return the set of parameters for a run or iteration + + Parameters are technically associated with an iteration, but can be + aggregated for a run. This will return a set of parameters for each + iteration; plus, if a "run" was specified, a filtered list of param + values that are common across all iterations. + + Args: + run: run ID + iteration: iteration ID + kwargs: additional OpenSearch keywords + + Returns: + JSON dict of param values by iteration (plus "common" if by run ID) + """ + if not run and not iteration: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + "A params query requires either a run or iteration ID", + ) + match = {"run.id" if run else "iteration.id": run if run else iteration} + params = await self.search( + index="param", + filters=[{"term": match}], + **kwargs, + ignore_unavailable=True, + ) + response = defaultdict(defaultdict) + for param in self._hits(params): + iter = param["iteration"]["id"] + arg = param["param"]["arg"] + val = param["param"]["val"] + if response.get(iter) and response.get(iter).get(arg): + print(f"Duplicate param {arg} for iteration {iter}") + response[iter][arg] = val + + # Filter out all parameter values that don't exist in all or which have + # different values. + if run: + common = CommonParams() + for params in response.values(): + common.add(params) + response["common"] = common.render() + return response + + async def get_iterations(self, run: str, **kwargs) -> list[dict[str, Any]]: + """Return a list of iterations for a run + + Args: + run: run ID + kwargs: additional OpenSearch keywords + + Returns: + A list of iteration documents + """ + hits = await self.search( + index="iteration", + filters=[{"term": {"run.id": run}}], + sort=[{"iteration.num": "asc"}], + **kwargs, + ignore_unavailable=True, + ) + + iterations = [] + for i in self._hits(hits, ["iteration"]): + iterations.append( + { + "id": i["id"], + "num": i["num"], + "path": i["path"], + "primary_metric": i["primary-metric"], + "primary_period": i["primary-period"], + "status": i["status"], + } + ) + return iterations + + async def get_samples( + self, run: Optional[str] = None, iteration: Optional[str] = None, **kwargs + ): + """Return a list of samples for a run or iteration + + Args: + run: run ID + iteration: iteration ID + kwargs: additional OpenSearch keywords + + Returns: + A list of sample documents. + """ + if not run and not iteration: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + "A sample query requires either a run or iteration ID", + ) + match = {"run.id" if run else "iteration.id": run if run else iteration} + hits = await self.search( + index="sample", + filters=[{"term": match}], + **kwargs, + ignore_unavailable=True, + ) + samples = [] + for s in self._hits(hits): + print(f"SAMPLE's ITERATION {s['iteration']}") + sample = s["sample"] + sample["iteration"] = s["iteration"]["num"] + sample["primary_metric"] = s["iteration"]["primary-metric"] + sample["primary_period"] = s["iteration"]["primary-period"] + sample["status"] = s["iteration"]["status"] + samples.append(sample) + return samples + + async def get_periods( + self, + run: Optional[str] = None, + iteration: Optional[str] = None, + sample: Optional[str] = None, + **kwargs, + ): + """Return a list of periods associated with a run, an iteration, or a + sample + + The "period" document is normalized to represent timestamps using ISO + strings. + + Args: + run: run ID + iteration: iteration ID + sample: sample ID + kwargs: additional OpenSearch parameters + + Returns: + a list of normalized period documents + """ + if not any((run, iteration, sample)): + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + "A period query requires a run, iteration, or sample ID", + ) + match = None + if sample: + match = {"sample.id": sample} + elif iteration: + match = {"iteration.id": iteration} + else: + match = {"run.id": run} + periods = await self.search( + index="period", + filters=[{"term": match}], + sort=[{"period.begin": "asc"}], + **kwargs, + ignore_unavailable=True, + ) + body = [] + for h in self._hits(periods): + period = self._format_period(period=h["period"]) + period["iteration"] = h["iteration"]["num"] + period["sample"] = h["sample"]["num"] + is_primary = h["iteration"]["primary-period"] == h["period"]["name"] + period["is_primary"] = is_primary + if is_primary: + period["primary_metric"] = h["iteration"]["primary-metric"] + period["status"] = h["iteration"]["status"] + body.append(period) + return body + + async def get_timeline(self, run: str, **kwargs) -> dict[str, Any]: + """Report the relative timeline of a run + + With nested object lists, show runs to iterations to samples to + periods. + + Args: + run: run ID + kwargs: additional OpenSearch parameters + """ + itr = await self.search( + index="iteration", + filters=[{"term": {"run.id": run}}], + **kwargs, + ignore_unavailable=True, + ) + sam = await self.search( + index="sample", + filters=[{"term": {"run.id": run}}], + **kwargs, + ignore_unavailable=True, + ) + per = await self.search( + index="period", + filters=[{"term": {"run.id": run}}], + **kwargs, + ignore_unavailable=True, + ) + samples = defaultdict(list) + periods = defaultdict(list) + + for s in self._hits(sam): + samples[s["iteration"]["id"]].append(s) + for p in self._hits(per): + periods[p["sample"]["id"]].append(p) + + iterations = [] + robj = {"id": run, "iterations": iterations} + body = {"run": robj} + for i in self._hits(itr): + if "begin" not in robj: + robj["begin"] = self._format_timestamp(i["run"]["begin"]) + robj["end"] = self._format_timestamp(i["run"]["end"]) + iteration = i["iteration"] + iterations.append(iteration) + iteration["samples"] = [] + for s in samples.get(iteration["id"], []): + sample = s["sample"] + sample["periods"] = [] + for pr in periods.get(sample["id"], []): + period = self._format_period(pr["period"]) + sample["periods"].append(period) + iteration["samples"].append(sample) + return body + + async def get_metrics_list(self, run: str, **kwargs) -> dict[str, Any]: + """Return a list of metrics available for a run + + Each run may have multiple performance metrics stored. This API allows + retrieving a sorted list of the metrics available for a given run, with + the "names" selection criteria available for each and, for "periodic" + (benchmark) metrics, the defined periods for which data was gathered. + + { + "ilab::train-samples-sec": { + "periods": [{"id": , "name": "measurement"}], + "breakouts": {"benchmark-group" ["unknown"], ...} + }, + "iostat::avg-queue-length": { + "periods": [], + "breakouts": {"benchmark-group": ["unknown"], ...}, + }, + ... + } + + Args: + run: run ID + + Returns: + List of metrics available for the run + """ + hits = await self.search( + index="metric_desc", + filters=[{"term": {"run.id": run}}], + ignore_unavailable=True, + **kwargs, + ) + met = {} + for h in self._hits(hits): + desc = h["metric_desc"] + name = desc["source"] + "::" + desc["type"] + if name in met: + record = met[name] + else: + record = {"periods": [], "breakouts": defaultdict(set)} + met[name] = record + if "period" in h: + record["periods"].append(h["period"]["id"]) + for n, v in desc["names"].items(): + record["breakouts"][n].add(v) + return met + + async def get_metric_breakouts( + self, + run: str, + metric: str, + names: Optional[list[str]] = None, + periods: Optional[list[str]] = None, + ) -> dict[str, Any]: + """Help explore available metric breakouts + + Args: + run: run ID + metric: metric label (e.g., "mpstat::Busy-CPU") + names: list of name filters ("cpu=3") + periods: list of period IDs + + Returns: + A description of all breakout names and values, which can be + specified to narrow down metrics returns by the data, summary, and + graph APIs. + + { + "label": "mpstat::Busy-CPU", + "class": [ + "throughput" + ], + "type": "Busy-CPU", + "source": "mpstat", + "breakouts": { + "num": [ + "8", + "72" + ], + "thread": [ + 0, + 1 + ] + } + } + """ + start = time.time() + filters = self._build_metric_filters(run, metric, names, periods) + metric_name = metric + ("" if not names else ("+" + ",".join(names))) + metrics = await self.search( + "metric_desc", + filters=filters, + ignore_unavailable=True, + ) + if len(metrics["hits"]["hits"]) < 1: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + f"Metric name {metric_name} not found for run {run}", + ) + classes = set() + response = {"label": metric, "class": classes} + breakouts = defaultdict(set) + pl = set() + for m in self._hits(metrics): + desc = m["metric_desc"] + response["type"] = desc["type"] + response["source"] = desc["source"] + if desc.get("class"): + classes.add(desc["class"]) + if "period" in m: + pl.add(m["period"]["id"]) + for n, v in desc["names"].items(): + breakouts[n].add(v) + # We want to help filter a consistent summary, so only show those + # names with more than one value. + if len(pl) > 1: + response["periods"] = pl + response["breakouts"] = {n: v for n, v in breakouts.items() if len(v) > 1} + duration = time.time() - start + print(f"Processing took {duration} seconds") + return response + + async def get_metrics_data( + self, + run: str, + metric: str, + names: Optional[list[str]] = None, + periods: Optional[list[str]] = None, + aggregate: bool = False, + ) -> list[Any]: + """Return a list of metric data + + The "aggregate" option allows aggregating various metrics across + breakout streams and periods: be careful, as this is meaningful only if + the breakout streams are sufficiently related. + + Args: + run: run ID + metric: metric label (e.g., "mpstat::Busy-CPU") + names: list of name filters ("cpu=3") + periods: list of period IDs + aggregate: aggregate multiple metric data streams + + Returns: + A sequence of data samples, showing the aggregate sample along with + the duration and end timestamp of each sample interval. + + [ + { + "begin": "2024-08-22 20:03:23.028000+00:00", + "end": "2024-08-22 20:03:37.127000+00:00", + "duration": 14.1, + "value": 9.35271216694379 + }, + { + "begin": "2024-08-22 20:03:37.128000+00:00", + "end": "2024-08-22 20:03:51.149000+00:00", + "duration": 14.022, + "value": 9.405932330557683 + }, + { + "begin": "2024-08-22 20:03:51.150000+00:00", + "end": "2024-08-22 20:04:05.071000+00:00", + "duration": 13.922, + "value": 9.478773265522682 + } + ] + """ + start = time.time() + ids = await self._get_metric_ids( + run, metric, names, periodlist=periods, aggregate=aggregate + ) + + # If we're searching by periods, filter metric data by the period + # timestamp range rather than just relying on the metric desc IDs as + # we also want to filter non-periodic tool data. + filters = [{"terms": {"metric_desc.id": ids}}] + filters.extend(await self._build_timestamp_range_filters(periods)) + + response = [] + if len(ids) > 1: + # Find the minimum sample interval of the selected metrics + aggdur = await self.search( + "metric_data", + size=0, + filters=filters, + aggregations={"duration": {"min": {"field": "metric_data.duration"}}}, + ) + if aggdur["aggregations"]["duration"]["count"] > 0: + interval = int(aggdur["aggregations"]["duration"]["value"]) + data = await self.search( + index="metric_data", + size=0, + filters=filters, + aggregations={ + "interval": { + "histogram": { + "field": "metric_data.end", + "interval": interval, + }, + "aggs": {"value": {"sum": {"field": "metric_data.value"}}}, + } + }, + ) + for h in self._aggs(data, "interval"): + response.append( + { + "begin": self._format_timestamp(h["key"] - interval), + "end": self._format_timestamp(h["key"]), + "value": h["value"]["value"], + "duration": interval / 1000.0, + } + ) + else: + data = await self.search("metric_data", filters=filters) + for h in self._hits(data, ["metric_data"]): + response.append(self._format_data(h)) + response.sort(key=lambda a: a["end"]) + duration = time.time() - start + print(f"Processing took {duration} seconds") + return response + + async def get_metrics_summary( + self, summaries: list[Metric] + ) -> list[dict[str, Any]]: + """Return a statistical summary of metric data + + Provides a statistical summary of selected data samples. + + Args: + summaries: list of Summary objects to define desired metrics + + Returns: + A statistical summary of the selected metric data + """ + start = time.time() + results = [] + params_by_run = {} + periods_by_run = {} + run_id_list = [] + for s in summaries: + if not s.run: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + "each summary request must have a run ID", + ) + if s.run not in run_id_list: + run_id_list.append(s.run) + for summary in summaries: + ids = await self._get_metric_ids( + summary.run, + summary.metric, + summary.names, + periodlist=summary.periods, + aggregate=summary.aggregate, + ) + filters = [{"terms": {"metric_desc.id": ids}}] + filters.extend(await self._build_timestamp_range_filters(summary.periods)) + data = await self.search( + "metric_data", + size=0, + filters=filters, + aggregations={ + "score": {"extended_stats": {"field": "metric_data.value"}} + }, + ) + + # The caller can provide a title for each graph; but, if not, we + # journey down dark overgrown pathways to fabricate a default with + # reasonable context, including unique iteration parameters, + # breakdown selections, and which run provided the data. + if summary.title: + title = summary.title + else: + title = await self._make_title( + summary.run, run_id_list, summary, params_by_run, periods_by_run + ) + + score = data["aggregations"]["score"] + score["aggregate"] = summary.aggregate + score["metric"] = summary.metric + score["names"] = summary.names + score["periods"] = summary.periods + score["run"] = summary.run + score["title"] = title + results.append(score) + duration = time.time() - start + print(f"Processing took {duration} seconds") + return results + + async def get_metrics_graph(self, graphdata: GraphList) -> dict[str, Any]: + """Return metrics data for a run + + Each run may have multiple performance metrics stored. This API allows + retrieving graphable time-series representation of a metric over the + period of the run, in the format defined by Plotly as configuration + settings plus an x value array and a y value array. + + { + "data": [ + { + "x": [ + "2024-08-27 09:16:27.371000", + ... + ], + "y": [ + 10.23444312132161, + ... + ], + "name": "Metric ilab::train-samples-sec", + "type": "scatter", + "mode": "line", + "marker": {"color": "black"}, + "labels": {"x": "sample timestamp", "y": "samples / second"} + } + ] + "layout": { + "width": 1500, + "yaxis": { + "title": "mpstat::Busy-CPU core=2,package=0,num=112,type=usr", + "color": "black" + } + } + } + + Args: + graphdata: A GraphList object + + Returns: + A Plotly object with layout + """ + start = time.time() + graphlist = [] + layout: dict[str, Any] = {"width": "1500"} + axes = {} + yaxis = None + cindex = 0 + params_by_run = {} + periods_by_run = {} + + # Construct a de-duped ordered list of run IDs, starting with the + # default. + run_id_list = [] + for g in graphdata.graphs: + if not g.run: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, "each graph request must have a run ID" + ) + if g.run not in run_id_list: + run_id_list.append(g.run) + + for g in graphdata.graphs: + run_id = g.run + names = g.names + metric: str = g.metric + + # The caller can provide a title for each graph; but, if not, we + # journey down dark overgrown pathways to fabricate a default with + # reasonable context, including unique iteration parameters, + # breakdown selections, and which run provided the data. + if g.title: + title = g.title + else: + title = await self._make_title( + run_id, run_id_list, g, params_by_run, periods_by_run + ) + + ids = await self._get_metric_ids( + run_id, + metric, + names, + periodlist=g.periods, + aggregate=g.aggregate, + ) + filters = [{"terms": {"metric_desc.id": ids}}] + filters.extend(await self._build_timestamp_range_filters(g.periods)) + y_max = 0.0 + points: list[Point] = [] + + # If we're pulling multiple breakouts, e.g., total CPU across modes + # or cores, we want to aggregate by timestamp interval. Sample + # timstamps don't necessarily align, so the "histogram" aggregation + # normalizes within the interval (based on the minimum actual + # interval duration). + if len(ids) > 1: + # Find the minimum sample interval of the selected metrics + aggdur = await self.search( + "metric_data", + size=0, + filters=filters, + aggregations={ + "duration": {"stats": {"field": "metric_data.duration"}} + }, + ) + if aggdur["aggregations"]["duration"]["count"] > 0: + interval = int(aggdur["aggregations"]["duration"]["min"]) + data = await self.search( + index="metric_data", + size=0, + filters=filters, + aggregations={ + "interval": { + "histogram": { + "field": "metric_data.begin", + "interval": interval, + }, + "aggs": { + "value": {"sum": {"field": "metric_data.value"}} + }, + } + }, + ) + for h in self._aggs(data, "interval"): + begin = int(h["key"]) + end = begin + interval - 1 + points.append(Point(begin, end, float(h["value"]["value"]))) + else: + data = await self.search("metric_data", filters=filters) + for h in self._hits(data, ["metric_data"]): + points.append( + Point(int(h["begin"]), int(h["end"]), float(h["value"])) + ) + + # Sort the graph points by timestamp so that Ploty will draw nice + # lines. We graph both the "begin" and "end" timestamp of each + # sample against the value to more clearly show the sampling + # interval. + x = [] + y = [] + + first = None + + for p in sorted(points, key=lambda a: a.begin): + if graphdata.relative: + if not first: + first = p.begin + s = (p.begin - first) / 1000.0 + e = (p.end - first) / 1000.0 + x.extend([s, e]) + else: + x.extend( + [self._format_timestamp(p.begin), self._format_timestamp(p.end)] + ) + y.extend([p.value, p.value]) + y_max = max(y_max, p.value) + + if g.color: + color = g.color + else: + color = colors[cindex] + cindex += 1 + if cindex >= len(colors): + cindex = 0 + graphitem = { + "x": x, + "y": y, + "name": title, + "type": "scatter", + "mode": "line", + "marker": {"color": color}, + "labels": { + "x": "sample timestamp", + "y": "samples / second", + }, + } + + # Y-axis scaling and labeling is divided by benchmark label; + # so store each we've created to reuse. (E.g., if we graph + # 5 different mpstat::Busy-CPU periods, they'll share a single + # Y axis.) + if metric in axes: + yref = axes[metric] + else: + if yaxis: + name = f"yaxis{yaxis}" + yref = f"y{yaxis}" + yaxis += 1 + layout[name] = { + "title": metric, + "color": color, + "autorange": True, + "anchor": "free", + "autoshift": True, + "overlaying": "y", + } + else: + name = "yaxis" + yref = "y" + yaxis = 2 + layout[name] = { + "title": metric, + "color": color, + } + axes[metric] = yref + graphitem["yaxis"] = yref + graphlist.append(graphitem) + duration = time.time() - start + print(f"Processing took {duration} seconds") + return {"data": graphlist, "layout": layout} diff --git a/backend/skeleton.toml b/backend/skeleton.toml index 81662a55..2aac4574 100644 --- a/backend/skeleton.toml +++ b/backend/skeleton.toml @@ -15,3 +15,8 @@ personal_access_token= url= username= password= + +[crucible] +url= +username= +password= diff --git a/frontend/README.md b/frontend/README.md index 0b01bbaf..99101f03 100644 --- a/frontend/README.md +++ b/frontend/README.md @@ -21,21 +21,21 @@ The `utils` directory has all helper/utility scripts. #### [`src/reducers`](src/reducers) -Contains functions that manage store via actions +Contains functions that manage store via actions -## Cloning and Running the Application Locally +## Cloning and Running the Application Locally -- Install [Node.js](https://nodejs.org) +- Install [Node.js](https://nodejs.org) - Clone the [CPT Dashboard code](https://github.com/cloud-bulldozer/cpt-dashboard) to a local file system - Install all the npm packages -Type the following command to install all npm packages +Type the following command to install all npm packages ```bash $ npm install ``` -In order to run the application use the following command +In order to run the application use the following command ```bash $ npm run dev @@ -56,12 +56,12 @@ Then, copy the `build` folder to the proper place on the server for deployment. ## Template -This application is based on v5 of PatternFly which is a production-ready UI solution for admin interfaces. For more information regarding the foundation and template of the application, please visit [PatternFly](https://www.patternfly.org/get-started/develop) +This application is based on v5 of PatternFly which is a production-ready UI solution for admin interfaces. For more information regarding the foundation and template of the application, please visit [PatternFly](https://www.patternfly.org/get-started/develop) ## Resources -- [Vite](https://vitejs.dev/guide/) +- [Vite](https://vitejs.dev/guide/) -- [ReactJS](https://reactjs.org/) +- [ReactJS](https://reactjs.org/) - [React-Redux](https://github.com/reduxjs/react-redux) diff --git a/frontend/src/App.js b/frontend/src/App.js deleted file mode 100644 index 4b8c6382..00000000 --- a/frontend/src/App.js +++ /dev/null @@ -1,58 +0,0 @@ -import React, {useEffect} from 'react'; -import '@patternfly/react-core/dist/styles/base.css'; - -import { - Page, - PageSection, - PageSectionVariants, -} from '@patternfly/react-core'; -import {fetchOCPJobsData, fetchCPTJobsData, fetchQuayJobsData, fetchTelcoJobsData} from "./store/Actions/ActionCreator"; -import {useDispatch} from "react-redux"; -import {Route, Switch, BrowserRouter as Router} from "react-router-dom"; -import {NavBar} from "./components/NavBar/NavBar"; -import {HomeView} from "./components/Home/HomeView"; -import {OCPHome} from './components/OCP/OCPHome'; -import {QuayHome} from './components/Quay/QuayHome'; -import {TelcoHome} from './components/Telco/TelcoHome'; - - -export const App = () => { - const dispatch = useDispatch() - - useEffect(() => { - const fetchData = async () =>{ - await dispatch(fetchOCPJobsData()) - await dispatch(fetchCPTJobsData()) - await dispatch(fetchQuayJobsData()) - await dispatch(fetchTelcoJobsData()) - } - fetchData() - }, [dispatch]) - - - - - return ( - - } - groupProps={{ - stickyOnBreakpoint: { default: 'top' }, - sticky: 'top' - }} - > - - - - - - - - - - - - ); -}; - -export default App diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index c5f48549..d93c960e 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -5,6 +5,7 @@ import * as APP_ROUTES from "./utils/routeConstants"; import { BrowserRouter, Route, Routes } from "react-router-dom"; import Home from "./components/templates/Home"; +import ILab from "./components/templates/ILab"; import MainLayout from "./containers/MainLayout"; import OCP from "./components/templates/OCP"; import Quay from "./components/templates/Quay"; @@ -26,6 +27,7 @@ function App() { } /> } /> } /> + } /> diff --git a/frontend/src/actions/filterActions.js b/frontend/src/actions/filterActions.js index 7f565887..6385b0bc 100644 --- a/frontend/src/actions/filterActions.js +++ b/frontend/src/actions/filterActions.js @@ -1,3 +1,4 @@ +import { fetchILabJobs, setIlabDateFilter } from "./ilabActions"; import { removeCPTAppliedFilters, setCPTAppliedFilters, @@ -76,6 +77,9 @@ export const setDateFilter = (date, key, navigation, currType) => { dispatch(setQuayDateFilter(date, key, navigation)); } else if (currType === "telco") { dispatch(setTelcoDateFilter(date, key, navigation)); + } else if (currType === "ilab") { + dispatch(setIlabDateFilter(date, key, navigation)); + dispatch(fetchILabJobs(true)); } }; diff --git a/frontend/src/actions/ilabActions.js b/frontend/src/actions/ilabActions.js new file mode 100644 index 00000000..bca6bdd3 --- /dev/null +++ b/frontend/src/actions/ilabActions.js @@ -0,0 +1,402 @@ +import * as API_ROUTES from "@/utils/apiConstants"; +import * as TYPES from "./types.js"; + +import API from "@/utils/axiosInstance"; +import { appendQueryString } from "@/utils/helper"; +import { cloneDeep } from "lodash"; +import { showFailureToast } from "@/actions/toastActions"; + +export const fetchILabJobs = + (shouldStartFresh = false) => + async (dispatch, getState) => { + try { + dispatch({ type: TYPES.LOADING }); + const { start_date, end_date, size, offset, results } = getState().ilab; + const response = await API.get(API_ROUTES.ILABS_JOBS_API_V1, { + params: { + ...(start_date && { start_date }), + ...(end_date && { end_date }), + ...(size && { size }), + ...(offset && { offset }), + }, + }); + if (response.status === 200 && response?.data?.results.length > 0) { + const startDate = response.data.startDate, + endDate = response.data.endDate; + dispatch({ + type: TYPES.SET_ILAB_JOBS_DATA, + payload: shouldStartFresh + ? response.data.results + : [...results, ...response.data.results], + }); + + dispatch({ + type: TYPES.SET_ILAB_DATE_FILTER, + payload: { + start_date: startDate, + end_date: endDate, + }, + }); + + dispatch({ + type: TYPES.SET_ILAB_TOTAL_ITEMS, + payload: response.data.total, + }); + dispatch({ + type: TYPES.SET_ILAB_OFFSET, + payload: response.data.next_offset, + }); + + dispatch(tableReCalcValues()); + } + } catch (error) { + dispatch(showFailureToast()); + } + dispatch({ type: TYPES.COMPLETED }); + }; +export const sliceIlabTableRows = + (startIdx, endIdx) => (dispatch, getState) => { + const results = [...getState().ilab.results]; + + dispatch({ + type: TYPES.SET_ILAB_INIT_JOBS, + payload: results.slice(startIdx, endIdx), + }); + }; +export const setIlabDateFilter = + (start_date, end_date, navigate) => (dispatch, getState) => { + const appliedFilters = getState().ilab.appliedFilters; + + dispatch({ + type: TYPES.SET_ILAB_DATE_FILTER, + payload: { + start_date, + end_date, + }, + }); + + appendQueryString({ ...appliedFilters, start_date, end_date }, navigate); + }; + +export const fetchMetricsInfo = (uid) => async (dispatch) => { + try { + dispatch({ type: TYPES.LOADING }); + const response = await API.get(`/api/v1/ilab/runs/${uid}/metrics`); + if (response.status === 200) { + if ( + response.data.constructor === Object && + Object.keys(response.data).length > 0 + ) { + dispatch({ + type: TYPES.SET_ILAB_METRICS, + payload: { uid, metrics: Object.keys(response.data) }, + }); + } + } + } catch (error) { + console.error(error); + dispatch(showFailureToast()); + } + dispatch({ type: TYPES.COMPLETED }); +}; + +export const fetchPeriods = (uid) => async (dispatch) => { + try { + dispatch({ type: TYPES.LOADING }); + const response = await API.get(`/api/v1/ilab/runs/${uid}/periods`); + if (response.status === 200) { + dispatch({ + type: TYPES.SET_ILAB_PERIODS, + payload: { uid, periods: response.data }, + }); + } + } catch (error) { + console.error( + `ERROR (${error?.response?.status}): ${JSON.stringify( + error?.response?.data + )}` + ); + dispatch(showFailureToast()); + } + dispatch({ type: TYPES.COMPLETED }); +}; + +export const fetchSummaryData = + (uid, metric = null) => + async (dispatch, getState) => { + try { + const periods = getState().ilab.periods.find((i) => i.uid == uid); + const metrics = getState().ilab.metrics_selected[uid]; + dispatch({ type: TYPES.SET_ILAB_SUMMARY_LOADING }); + let summaries = []; + periods?.periods?.forEach((p) => { + if (p.is_primary) { + summaries.push({ + run: uid, + metric: p.primary_metric, + periods: [p.id], + }); + } + if (metric) { + summaries.push({ + run: uid, + metric, + aggregate: true, + periods: [p.id], + }); + } + }); + const response = await API.post( + `/api/v1/ilab/runs/multisummary`, + summaries + ); + if (response.status === 200) { + dispatch({ + type: TYPES.SET_ILAB_SUMMARY_DATA, + payload: { uid, data: response.data }, + }); + } + } catch (error) { + console.error( + `ERROR (${error?.response?.status}): ${JSON.stringify( + error?.response?.data + )}` + ); + dispatch(showFailureToast()); + } + dispatch({ type: TYPES.SET_ILAB_SUMMARY_COMPLETE }); + }; + +export const handleSummaryData = + (uids, metric = null) => + async (dispatch, getState) => { + try { + const periods = getState().ilab.periods; + const pUids = periods.map((i) => i.uid); + const missingPeriods = uids.filter(function (x) { + return pUids.indexOf(x) < 0; + }); + console.log(`Missing periods for ${missingPeriods}`); + await Promise.all( + missingPeriods.map(async (uid) => { + console.log(`Fetching periods for ${uid}`); + await dispatch(fetchPeriods(uid)); // Dispatch each item + }) + ); + await Promise.all( + uids.map(async (uid) => { + console.log(`Fetching summary data for ${uid}`); + await dispatch(fetchSummaryData(uid, metric)); + }) + ); + } catch (error) { + console.error(`ERROR: ${JSON.stringify(error)}`); + dispatch(showFailureToast()); + } + }; + +export const fetchGraphData = + (uid, metric = null) => + async (dispatch, getState) => { + try { + const periods = getState().ilab.periods.find((i) => i.uid == uid); + const graphData = cloneDeep(getState().ilab.graphData); + const filterData = graphData.filter((i) => i.uid !== uid); + dispatch({ + type: TYPES.SET_ILAB_GRAPH_DATA, + payload: filterData, + }); + const copyData = cloneDeep(filterData); + dispatch({ type: TYPES.GRAPH_LOADING }); + let graphs = []; + periods?.periods?.forEach((p) => { + if (p.is_primary) { + graphs.push({ run: uid, metric: p.primary_metric, periods: [p.id] }); + } + if (metric) { + graphs.push({ + run: uid, + metric, + aggregate: true, + periods: [p.id], + }); + } + }); + const response = await API.post(`/api/v1/ilab/runs/multigraph`, { + name: `graph ${uid}`, + graphs, + }); + if (response.status === 200) { + response.data.layout["showlegend"] = true; + response.data.layout["responsive"] = "true"; + response.data.layout["autosize"] = "true"; + response.data.layout["legend"] = { + orientation: "h", + xanchor: "left", + yanchor: "top", + y: -0.1, + }; + copyData.push({ + uid, + data: response.data.data, + layout: response.data.layout, + }); + dispatch({ + type: TYPES.SET_ILAB_GRAPH_DATA, + payload: copyData, + }); + } + } catch (error) { + console.error( + `ERROR (${error?.response?.status}): ${JSON.stringify( + error?.response?.data + )}` + ); + dispatch(showFailureToast()); + } + dispatch({ type: TYPES.GRAPH_COMPLETED }); + }; + +export const handleMultiGraph = (uids) => async (dispatch, getState) => { + try { + const periods = getState().ilab.periods; + const pUids = periods.map((i) => i.uid); + + const missingPeriods = uids.filter(function (x) { + return pUids.indexOf(x) < 0; + }); + + await Promise.all( + missingPeriods.map(async (uid) => { + await dispatch(fetchPeriods(uid)); // Dispatch each item + }) + ); + + dispatch(fetchMultiGraphData(uids)); + } catch (error) { + console.error( + `ERROR (${error?.response?.status}): ${JSON.stringify( + error?.response?.data + )}` + ); + dispatch(showFailureToast()); + } +}; +export const fetchMultiGraphData = (uids) => async (dispatch, getState) => { + try { + dispatch({ type: TYPES.LOADING }); + const periods = getState().ilab.periods; + const filterPeriods = periods.filter((item) => uids.includes(item.uid)); + + let graphs = []; + uids.forEach(async (uid) => { + const periods = filterPeriods.find((i) => i.uid == uid); + periods?.periods?.forEach((p) => { + if (p.is_primary) { + graphs.push({ + run: uid, + metric: p.primary_metric, + periods: [p.id], + }); + } + // graphs.push({ + // run: uid, + // metric, + // aggregate: true, + // periods: [p.id], + // }); + }); + }); + console.log(graphs); + const response = await API.post(`/api/v1/ilab/runs/multigraph`, { + name: "comparison", + relative: true, + graphs, + }); + if (response.status === 200) { + response.data.layout["showlegend"] = true; + response.data.layout["responsive"] = "true"; + response.data.layout["autosize"] = "true"; + response.data.layout["legend"] = { + orientation: "h", + xanchor: "left", + yanchor: "top", + }; + const graphData = []; + graphData.push({ + data: response.data.data, + layout: response.data.layout, + }); + dispatch({ + type: TYPES.SET_ILAB_MULTIGRAPH_DATA, + payload: graphData, + }); + } + } catch (error) { + console.error( + `ERROR (${error?.response?.status}): ${JSON.stringify( + error?.response?.data + )}` + ); + dispatch(showFailureToast()); + } + dispatch({ type: TYPES.COMPLETED }); +}; + +export const setIlabPage = (pageNo) => ({ + type: TYPES.SET_ILAB_PAGE, + payload: pageNo, +}); + +export const setIlabPageOptions = (page, perPage) => ({ + type: TYPES.SET_ILAB_PAGE_OPTIONS, + payload: { page, perPage }, +}); + +export const checkIlabJobs = (newPage) => (dispatch, getState) => { + const results = cloneDeep(getState().ilab.results); + const { totalItems, perPage } = getState().ilab; + + const startIdx = (newPage - 1) * perPage; + const endIdx = newPage * perPage; + + if ( + (typeof results[startIdx] === "undefined" || + typeof results[endIdx] === "undefined") && + results.length < totalItems + ) { + dispatch(fetchILabJobs()); + } +}; + +export const setSelectedMetrics = (id, metrics) => (dispatch, getState) => { + const metrics_selected = cloneDeep(getState().ilab.metrics_selected); + metrics_selected[id] = metrics; + dispatch({ + type: TYPES.SET_ILAB_SELECTED_METRICS, + payload: metrics_selected, + }); +}; + +export const tableReCalcValues = () => (dispatch, getState) => { + const { page, perPage } = getState().ilab; + + const startIdx = page !== 1 ? (page - 1) * perPage : 0; + const endIdx = page !== 1 ? page * perPage - 1 : perPage; + dispatch(sliceIlabTableRows(startIdx, endIdx)); + dispatch(getMetaRowdId()); +}; + +export const getMetaRowdId = () => (dispatch, getState) => { + const tableData = getState().ilab.tableData; + const metaId = tableData.map((item) => `metadata-toggle-${item.id}`); + dispatch(setMetaRowExpanded(metaId)); +}; +export const toggleComparisonSwitch = () => ({ + type: TYPES.TOGGLE_COMPARISON_SWITCH, +}); + +export const setMetaRowExpanded = (expandedItems) => ({ + type: TYPES.SET_EXPANDED_METAROW, + payload: expandedItems, +}); diff --git a/frontend/src/actions/paginationActions.js b/frontend/src/actions/paginationActions.js index 80a7dff1..1717a82a 100644 --- a/frontend/src/actions/paginationActions.js +++ b/frontend/src/actions/paginationActions.js @@ -3,9 +3,17 @@ import { setCPTPageOptions, sliceCPTTableRows, } from "./homeActions"; +import { + setIlabPage, + setIlabPageOptions, + sliceIlabTableRows, +} from "./ilabActions"; import { setOCPPage, setOCPPageOptions, sliceOCPTableRows } from "./ocpActions"; import { setQuayPage, setQuayPageOptions } from "./quayActions"; import { setTelcoPage, setTelcoPageOptions } from "./telcoActions"; + +import { checkIlabJobs } from "./ilabActions"; + export const setPage = (newPage, currType) => (dispatch) => { if (currType === "cpt") { dispatch(setCPTPage(newPage)); @@ -15,6 +23,8 @@ export const setPage = (newPage, currType) => (dispatch) => { dispatch(setQuayPage(newPage)); } else if (currType === "telco") { dispatch(setTelcoPage(newPage)); + } else if (currType === "ilab") { + dispatch(setIlabPage(newPage)); } }; @@ -27,6 +37,8 @@ export const setPageOptions = (newPage, newPerPage, currType) => (dispatch) => { dispatch(setQuayPageOptions(newPage, newPerPage)); } else if (currType === "telco") { dispatch(setTelcoPageOptions(newPage, newPerPage)); + } else if (currType === "ilab") { + dispatch(setIlabPageOptions(newPage, newPerPage)); } }; @@ -35,5 +47,11 @@ export const sliceTableRows = (startIdx, endIdx, currType) => (dispatch) => { dispatch(sliceCPTTableRows(startIdx, endIdx)); } else if (currType === "ocp") { dispatch(sliceOCPTableRows(startIdx, endIdx)); + } else if (currType === "ilab") { + dispatch(sliceIlabTableRows(startIdx, endIdx)); } }; + +export const fetchNextJobs = (newPage) => (dispatch) => { + dispatch(checkIlabJobs(newPage)); +}; diff --git a/frontend/src/actions/types.js b/frontend/src/actions/types.js index 1804cf21..70a67873 100644 --- a/frontend/src/actions/types.js +++ b/frontend/src/actions/types.js @@ -77,3 +77,21 @@ export const SET_TELCO_SELECTED_FILTERS = "SET_TELCO_SELECTED_FILTERS"; export const SET_TELCO_SUMMARY = "SET_TELCO_SUMMARY"; export const SET_TELCO_COLUMNS = "SET_TELCO_COLUMNS"; export const SET_TELCO_GRAPH_DATA = "SET_TELCO_GRAPH_DATA"; +/* ILAB JOBS */ +export const SET_ILAB_JOBS_DATA = "SET_ILAB_JOBS_DATA"; +export const SET_ILAB_DATE_FILTER = "SET_ILAB_DATE_FILTER"; +export const SET_ILAB_GRAPH_DATA = "SET_ILAB_GRAPH_DATA"; +export const SET_ILAB_MULTIGRAPH_DATA = "SET_ILAB_MULTIGRAPH_DATA"; +export const SET_ILAB_SUMMARY_LOADING = "SET_ILAB_SUMMARY_LOADING"; +export const SET_ILAB_SUMMARY_COMPLETE = "SET_ILAB_SUMMARY_COMPLETE"; +export const SET_ILAB_SUMMARY_DATA = "SET_ILAB_SUMMARY_DATA"; +export const SET_ILAB_TOTAL_ITEMS = "SET_ILAB_TOTAL_ITEMS"; +export const SET_ILAB_OFFSET = "SET_ILAB_OFFSET"; +export const SET_ILAB_PAGE = "SET_ILAB_PAGE"; +export const SET_ILAB_PAGE_OPTIONS = "SET_ILAB_PAGE_OPTIONS"; +export const SET_ILAB_METRICS = "SET_ILAB_METRICS"; +export const SET_ILAB_SELECTED_METRICS = "SET_ILAB_SELECTED_METRICS"; +export const SET_ILAB_PERIODS = "SET_ILAB_PERIODS"; +export const SET_ILAB_INIT_JOBS = "SET_ILAB_INIT_JOBS"; +export const TOGGLE_COMPARISON_SWITCH = "TOGGLE_COMPARISON_SWITCH"; +export const SET_EXPANDED_METAROW = "SET_EXPANDED_METAROW"; diff --git a/frontend/src/assets/constants/SidemenuConstants.js b/frontend/src/assets/constants/SidemenuConstants.js index bc04fd52..e65a2103 100644 --- a/frontend/src/assets/constants/SidemenuConstants.js +++ b/frontend/src/assets/constants/SidemenuConstants.js @@ -2,3 +2,4 @@ export const HOME_NAV = "home"; export const QUAY_NAV = "quay"; export const OCP_NAV = "ocp"; export const TELCO_NAV = "telco"; +export const ILAB_NAV = "ilab"; diff --git a/frontend/src/components/molecules/ExpandedRow/index.jsx b/frontend/src/components/molecules/ExpandedRow/index.jsx index 981d5660..8fcc2d48 100644 --- a/frontend/src/components/molecules/ExpandedRow/index.jsx +++ b/frontend/src/components/molecules/ExpandedRow/index.jsx @@ -42,7 +42,7 @@ const RowContent = (props) => { }, []); return ( - + {content.map((unit) => ( diff --git a/frontend/src/components/molecules/SideMenuOptions/index.jsx b/frontend/src/components/molecules/SideMenuOptions/index.jsx index 48bed8de..17a00160 100644 --- a/frontend/src/components/molecules/SideMenuOptions/index.jsx +++ b/frontend/src/components/molecules/SideMenuOptions/index.jsx @@ -28,6 +28,11 @@ const sideMenuOptions = [ key: "telco", displayName: "Telco", }, + { + id: CONSTANTS.ILAB_NAV, + key: "ilab", + displayName: "ILAB", + }, ]; const MenuOptions = () => { diff --git a/frontend/src/components/organisms/Pagination/index.jsx b/frontend/src/components/organisms/Pagination/index.jsx index 7b316a21..deb8d8fe 100644 --- a/frontend/src/components/organisms/Pagination/index.jsx +++ b/frontend/src/components/organisms/Pagination/index.jsx @@ -1,5 +1,6 @@ import { Pagination, PaginationVariant } from "@patternfly/react-core"; import { + fetchNextJobs, setPage, setPageOptions, sliceTableRows, @@ -13,6 +14,7 @@ const RenderPagination = (props) => { const dispatch = useDispatch(); const perPageOptions = [ + { title: "10", value: 10 }, { title: "25", value: 25 }, { title: "50", value: 50 }, { title: "100", value: 100 }, @@ -21,6 +23,7 @@ const RenderPagination = (props) => { const onSetPage = useCallback( (_evt, newPage, _perPage, startIdx, endIdx) => { dispatch(setPage(newPage, props.type)); + dispatch(sliceTableRows(startIdx, endIdx, props.type)); }, [dispatch, props.type] @@ -28,11 +31,17 @@ const RenderPagination = (props) => { const onPerPageSelect = useCallback( (_evt, newPerPage, newPage, startIdx, endIdx) => { dispatch(setPageOptions(newPage, newPerPage, props.type)); + dispatch(sliceTableRows(startIdx, endIdx, props.type)); }, [dispatch, props.type] ); + const checkAndFetch = (_evt, newPage) => { + if (props.type === "ilab") { + dispatch(fetchNextJobs(newPage)); + } + }; return ( { perPage={props.perPage} page={props.page} variant={PaginationVariant.bottom} + onNextClick={checkAndFetch} perPageOptions={perPageOptions} onSetPage={onSetPage} onPerPageSelect={onPerPageSelect} + onPageInput={checkAndFetch} /> ); }; diff --git a/frontend/src/components/organisms/TableFilters/index.jsx b/frontend/src/components/organisms/TableFilters/index.jsx index c5f5ae62..cec3fee0 100644 --- a/frontend/src/components/organisms/TableFilters/index.jsx +++ b/frontend/src/components/organisms/TableFilters/index.jsx @@ -5,6 +5,7 @@ import "./index.less"; import { Chip, ChipGroup, + Switch, Toolbar, ToolbarContent, ToolbarItem, @@ -39,6 +40,8 @@ const TableFilter = (props) => { setColumns, selectedFilters, updateSelectedFilter, + onSwitchChange, + isSwitchChecked, } = props; const category = @@ -66,7 +69,7 @@ const TableFilter = (props) => { setDateFilter(date, key, navigation, type); }; const endDateChangeHandler = (date, key) => { - setDateFilter(key, date, navigation, type); + setDateFilter(date, key, navigation, type); }; return ( @@ -123,8 +126,21 @@ const TableFilter = (props) => { )} + {type === "ilab" && ( + + + + + + )} - {Object.keys(appliedFilters).length > 0 && + {appliedFilters && + Object.keys(appliedFilters).length > 0 && Object.keys(appliedFilters).map((key) => ( {getFilterName(key)} : @@ -153,5 +169,7 @@ TableFilter.propTypes = { selectedFilters: PropTypes.array, updateSelectedFilter: PropTypes.func, navigation: PropTypes.func, + isSwitchChecked: PropTypes.bool, + onSwitchChange: PropTypes.func, }; export default TableFilter; diff --git a/frontend/src/components/organisms/TableFilters/index.less b/frontend/src/components/organisms/TableFilters/index.less index b100a012..1a479703 100644 --- a/frontend/src/components/organisms/TableFilters/index.less +++ b/frontend/src/components/organisms/TableFilters/index.less @@ -11,4 +11,8 @@ .to-text { padding: 5px 0; } + #comparison-switch { + margin-left: auto; + align-content: center; + } } \ No newline at end of file diff --git a/frontend/src/components/templates/ILab/ILabGraph.jsx b/frontend/src/components/templates/ILab/ILabGraph.jsx new file mode 100644 index 00000000..c41300ba --- /dev/null +++ b/frontend/src/components/templates/ILab/ILabGraph.jsx @@ -0,0 +1,44 @@ +import Plot from "react-plotly.js"; +import PropType from "prop-types"; +import { cloneDeep } from "lodash"; +import { uid } from "@/utils/helper"; +import { useSelector } from "react-redux"; + +const ILabGraph = (props) => { + const { item } = props; + const isGraphLoading = useSelector((state) => state.loading.isGraphLoading); + const { graphData } = useSelector((state) => state.ilab); + + const graphDataCopy = cloneDeep(graphData); + + const getGraphData = (id) => { + const data = graphDataCopy?.filter((a) => a.uid === id); + return data; + }; + const hasGraphData = (uuid) => { + const hasData = getGraphData(uuid).length > 0; + + return hasData; + }; + + return ( + <> + {hasGraphData(item.id) ? ( + + ) : isGraphLoading && !hasGraphData(item.id) ? ( +
+ ) : ( + <> + )} + + ); +}; + +ILabGraph.propTypes = { + item: PropType.object, +}; +export default ILabGraph; diff --git a/frontend/src/components/templates/ILab/ILabSummary.jsx b/frontend/src/components/templates/ILab/ILabSummary.jsx new file mode 100644 index 00000000..25526a7d --- /dev/null +++ b/frontend/src/components/templates/ILab/ILabSummary.jsx @@ -0,0 +1,89 @@ +import PropType from "prop-types"; +import { uid } from "@/utils/helper"; +import { useSelector } from "react-redux"; +import { Table, Tbody, Th, Thead, Tr, Td } from "@patternfly/react-table"; + +const ILabSummary = (props) => { + const { ids } = props; + const { isSummaryLoading, summaryData } = useSelector((state) => state.ilab); + + const getSummaryData = (id) => { + const data = summaryData?.find((a) => a.uid === id); + return data; + }; + const hasSummaryData = (ids) => { + const hasData = Boolean( + summaryData.filter((i) => ids.includes(i.uid)).length === ids.length + ); + return hasData; + }; + + return ( +
+ {hasSummaryData(ids) ? ( + + + + {ids.length > 1 ? : <>} + + + + + + + + + {ids.map((id, ridx) => + getSummaryData(id).data.map((stat, sidx) => ( + + {ids.length > 1 && sidx === 0 ? ( + + ) : undefined} + + + + + + + )) + )} + +
RunMetricMinAverageMaxStandard Deviation
{ridx + 1}{stat.title} + {typeof stat.min === "number" + ? stat.min.toPrecision(6) + : stat.min} + + {typeof stat.avg === "number" + ? stat.avg.toPrecision(6) + : stat.avg} + + {typeof stat.max === "number" + ? stat.max.toPrecision(6) + : stat.max} + + {typeof stat.std_deviation === "number" + ? stat.std_deviation.toPrecision(6) + : stat.std_deviation} +
+ ) : isSummaryLoading && !hasSummaryData(ids) ? ( +
+ ) : ( + <> + )} +
+ ); +}; + +ILabSummary.propTypes = { + item: PropType.object, +}; +export default ILabSummary; diff --git a/frontend/src/components/templates/ILab/IlabCompareComponent.jsx b/frontend/src/components/templates/ILab/IlabCompareComponent.jsx new file mode 100644 index 00000000..b2c5eb8f --- /dev/null +++ b/frontend/src/components/templates/ILab/IlabCompareComponent.jsx @@ -0,0 +1,132 @@ +import "./index.less"; + +import { + Button, + Menu, + MenuContent, + MenuItem, + MenuItemAction, + MenuList, + Stack, + StackItem, + Title, +} from "@patternfly/react-core"; +import { useDispatch, useSelector } from "react-redux"; + +import { InfoCircleIcon } from "@patternfly/react-icons"; +import Plot from "react-plotly.js"; +import PropTypes from "prop-types"; +import RenderPagination from "@/components/organisms/Pagination"; +import { cloneDeep } from "lodash"; +import { handleMultiGraph, handleSummaryData } from "@/actions/ilabActions.js"; +import { uid } from "@/utils/helper"; +import { useState } from "react"; +import ILabSummary from "./ILabSummary"; + +const IlabCompareComponent = () => { + const { page, perPage, totalItems, tableData } = useSelector( + (state) => state.ilab + ); + const dispatch = useDispatch(); + const [selectedItems, setSelectedItems] = useState([]); + const { multiGraphData, summaryData, isSummaryLoading } = useSelector( + (state) => state.ilab + ); + const isGraphLoading = useSelector((state) => state.loading.isGraphLoading); + const graphDataCopy = cloneDeep(multiGraphData); + + const onSelect = (_event, itemId) => { + const item = itemId; + if (selectedItems.includes(item)) { + setSelectedItems(selectedItems.filter((id) => id !== item)); + } else { + setSelectedItems([...selectedItems, item]); + } + }; + const dummy = () => { + dispatch(handleSummaryData(selectedItems)); + dispatch(handleMultiGraph(selectedItems)); + }; + return ( +
+
+ + Metrics + + + + + + {tableData.map((item) => { + return ( + } + actionId="code" + onClick={() => console.log("clicked on code icon")} + aria-label="Code" + /> + } + > + {`${new Date(item.begin_date).toLocaleDateString()} ${ + item.primary_metrics[0] + }`} + + ); + })} + + + + +
+ + + {isSummaryLoading ? ( +
+ ) : summaryData.filter((i) => selectedItems.includes(i.uid)).length == + selectedItems.length ? ( + + ) : ( +
No data to summarize
+ )} +
+ + {isGraphLoading ? ( +
+ ) : graphDataCopy?.length > 0 && + graphDataCopy?.[0]?.data?.length > 0 ? ( + + ) : ( +
No data to compare
+ )} +
+
+
+ ); +}; + +IlabCompareComponent.propTypes = { + data: PropTypes.array, +}; +export default IlabCompareComponent; diff --git a/frontend/src/components/templates/ILab/IlabExpandedRow.jsx b/frontend/src/components/templates/ILab/IlabExpandedRow.jsx new file mode 100644 index 00000000..495a88d7 --- /dev/null +++ b/frontend/src/components/templates/ILab/IlabExpandedRow.jsx @@ -0,0 +1,156 @@ +import { + Accordion, + AccordionContent, + AccordionItem, + AccordionToggle, + Card, + CardBody, + Stack, + StackItem, +} from "@patternfly/react-core"; +import { useDispatch, useSelector } from "react-redux"; + +import ILabGraph from "./ILabGraph"; +import MetaRow from "./MetaRow"; +import MetricsSelect from "./MetricsDropdown"; +import ILabSummary from "./ILabSummary"; +import PropTypes from "prop-types"; +import { setMetaRowExpanded } from "@/actions/ilabActions"; +import { uid } from "@/utils/helper"; + +const IlabRowContent = (props) => { + const { item } = props; + const dispatch = useDispatch(); + const { metaRowExpanded } = useSelector((state) => state.ilab); + + const onToggle = (id) => { + const index = metaRowExpanded.indexOf(id); + const newExpanded = + index >= 0 + ? [ + ...metaRowExpanded.slice(0, index), + ...metaRowExpanded.slice(index + 1, metaRowExpanded.length), + ] + : [...metaRowExpanded, id]; + + dispatch(setMetaRowExpanded(newExpanded)); + }; + return ( + + + { + onToggle(`metadata-toggle-${item.id}`); + }} + isExpanded={metaRowExpanded.includes(`metadata-toggle-${item.id}`)} + id={`metadata-toggle-${item.id}`} + > + Metadata + + + +
+ + + + + + + + + + + + + + {item.iterations.length > 1 && ( + + { + onToggle(`iterations-toggle-${item.id}`); + }} + isExpanded={metaRowExpanded.includes( + `iterations-toggle-${item.id}` + )} + id={`iterations-toggle-${item.id}`} + > + {`Unique parameters for ${item.iterations.length} Iterations`} + + + {item.iterations.map((i) => ( + !(i[0] in item.params) + )} + /> + ))} + + + )} + + +
+
+
+ + { + onToggle(`graph-toggle-${item.id}`); + }} + isExpanded={metaRowExpanded.includes(`graph-toggle-${item.id}`)} + id={`graph-toggle-${item.id}`} + > + Metrics & Graph + + +
Metrics:
+ + + + + + + + + +
+
+
+ ); +}; +IlabRowContent.propTypes = { + item: PropTypes.object, +}; +export default IlabRowContent; diff --git a/frontend/src/components/templates/ILab/MetaRow.jsx b/frontend/src/components/templates/ILab/MetaRow.jsx new file mode 100644 index 00000000..c196e79f --- /dev/null +++ b/frontend/src/components/templates/ILab/MetaRow.jsx @@ -0,0 +1,40 @@ +import { Table, Tbody, Th, Thead, Tr } from "@patternfly/react-table"; + +import Proptypes from "prop-types"; +import { Title } from "@patternfly/react-core"; +import { uid } from "@/utils/helper"; + +const MetaRow = (props) => { + const { metadata, heading } = props; + return ( + <> + + {heading} + + + + + + + + + + {metadata.map((item) => ( + + + + + ))} + +
+ Key + Value
{item[0]}{item[1]}
+ + ); +}; + +MetaRow.propTypes = { + heading: Proptypes.string, + metadata: Proptypes.array, +}; +export default MetaRow; diff --git a/frontend/src/components/templates/ILab/MetricsDropdown.jsx b/frontend/src/components/templates/ILab/MetricsDropdown.jsx new file mode 100644 index 00000000..9ebe96cf --- /dev/null +++ b/frontend/src/components/templates/ILab/MetricsDropdown.jsx @@ -0,0 +1,93 @@ +import { + MenuToggle, + Select, + SelectList, + SelectOption, + Skeleton +} from "@patternfly/react-core"; +import { fetchGraphData, fetchSummaryData, setSelectedMetrics } from "@/actions/ilabActions"; +import { useDispatch, useSelector } from "react-redux"; + +import PropTypes from "prop-types"; +import { cloneDeep } from "lodash"; +import { uid } from "@/utils/helper"; +import { useState } from "react"; + +const MetricsSelect = (props) => { + const { metrics, metrics_selected } = useSelector((state) => state.ilab); + const { item } = props; + /* Metrics select */ + const [isOpen, setIsOpen] = useState(false); + const dispatch = useDispatch(); + // const [selected, setSelected] = useState("Select a value"); + + const toggle1 = (toggleRef, selected) => ( + + {selected} + + ); + + const onToggleClick = () => { + setIsOpen(!isOpen); + }; + const onSelect = (_event, value) => { + console.log("selected", value); + const [run, metric] = value; + dispatch(setSelectedMetrics(run, metric)); + dispatch(fetchGraphData(run, metric)); + dispatch(fetchSummaryData(run, metric)); + setIsOpen(false); + }; + const metricsDataCopy = cloneDeep(metrics); + + const getMetricsData = (id) => { + const data = metricsDataCopy?.filter((a) => a.uid === id); + return data; + }; + const hasMetricsData = (uuid) => { + const hasData = getMetricsData(uuid).length > 0; + + return hasData; + }; + /* Metrics select */ + return ( + <> + {hasMetricsData(item.id) ? ( + + ): + + } + + ); +}; + +MetricsSelect.propTypes = { + item: PropTypes.object, +}; +export default MetricsSelect; diff --git a/frontend/src/components/templates/ILab/StatusCell.jsx b/frontend/src/components/templates/ILab/StatusCell.jsx new file mode 100644 index 00000000..a4bd208f --- /dev/null +++ b/frontend/src/components/templates/ILab/StatusCell.jsx @@ -0,0 +1,24 @@ +import { + CheckCircleIcon, + ExclamationCircleIcon, +} from "@patternfly/react-icons"; + +import { Label } from "@patternfly/react-core"; +import Proptype from "prop-types"; + +const StatusCell = (props) => { + return props.value?.toLowerCase() === "pass" ? ( + + ) : ( + + ); +}; +StatusCell.propTypes = { + value: Proptype.string, +}; + +export default StatusCell; diff --git a/frontend/src/components/templates/ILab/index.jsx b/frontend/src/components/templates/ILab/index.jsx new file mode 100644 index 00000000..d728b44a --- /dev/null +++ b/frontend/src/components/templates/ILab/index.jsx @@ -0,0 +1,164 @@ +import "./index.less"; + +import { + ExpandableRowContent, + Table, + Tbody, + Td, + Th, + Thead, + Tr, +} from "@patternfly/react-table"; +import { + fetchILabJobs, + fetchMetricsInfo, + fetchPeriods, + setIlabDateFilter, + toggleComparisonSwitch, +} from "@/actions/ilabActions"; +import { formatDateTime, uid } from "@/utils/helper"; +import { useDispatch, useSelector } from "react-redux"; +import { useEffect, useState } from "react"; +import { useNavigate, useSearchParams } from "react-router-dom"; + +import IlabCompareComponent from "./IlabCompareComponent"; +import IlabRowContent from "./IlabExpandedRow"; +import RenderPagination from "@/components/organisms/Pagination"; +import StatusCell from "./StatusCell"; +import TableFilter from "@/components/organisms/TableFilters"; + +const ILab = () => { + const dispatch = useDispatch(); + const navigate = useNavigate(); + const [searchParams] = useSearchParams(); + + const { + start_date, + end_date, + comparisonSwitch, + tableData, + page, + perPage, + totalItems, + } = useSelector((state) => state.ilab); + const [expandedResult, setExpandedResult] = useState([]); + + const isResultExpanded = (res) => expandedResult?.includes(res); + const setExpanded = async (run, isExpanding = true) => { + setExpandedResult((prevExpanded) => { + const otherExpandedRunNames = prevExpanded.filter((r) => r !== run.id); + return isExpanding + ? [...otherExpandedRunNames, run.id] + : otherExpandedRunNames; + }); + if (isExpanding) { + dispatch(fetchPeriods(run.id)); + dispatch(fetchMetricsInfo(run.id)); + } + }; + + useEffect(() => { + if (searchParams.size > 0) { + // date filter is set apart + const startDate = searchParams.get("start_date"); + const endDate = searchParams.get("end_date"); + + searchParams.delete("start_date"); + searchParams.delete("end_date"); + const params = Object.fromEntries(searchParams); + const obj = {}; + for (const key in params) { + obj[key] = params[key].split(","); + } + dispatch(setIlabDateFilter(startDate, endDate, navigate)); + } + }, []); + + useEffect(() => { + dispatch(fetchILabJobs()); + }, [dispatch]); + + const columnNames = { + benchmark: "Benchmark", + email: "Email", + name: "Name", + source: "Source", + metric: "Metric", + begin_date: "Start Date", + end_date: "End Date", + status: "Status", + }; + + const onSwitchChange = () => { + dispatch(toggleComparisonSwitch()); + }; + return ( + <> + + {comparisonSwitch ? ( + + ) : ( + <> + + + + + + + + + + + {tableData.map((item, rowIndex) => ( + <> + + + + + + + + + + + ))} + +
+ {columnNames.metric}{columnNames.begin_date}{columnNames.end_date}{columnNames.status}
+ setExpanded(item, !isResultExpanded(item.id)), + expandId: `expandId-${uid()}`, + }} + /> + + {item.primary_metrics[0]}{formatDateTime(item.begin_date)}{formatDateTime(item.end_date)} + +
+ + + +
+ + + )} + + ); +}; + +export default ILab; diff --git a/frontend/src/components/templates/ILab/index.less b/frontend/src/components/templates/ILab/index.less new file mode 100644 index 00000000..399c6c77 --- /dev/null +++ b/frontend/src/components/templates/ILab/index.less @@ -0,0 +1,41 @@ +.pf-v5-c-accordion__expandable-content-body { + display: block; +} +.metadata-wrapper { + display: flex; + flex-direction: row; + margin-bottom: 1vw; + .metadata-card { + flex: 1; /* additionally, equal width */ + padding: 1em; + margin-right: 1.5vw; + } +} +.comparison-container { + display: flex; + width: 100%; + height: 80%; + .metrics-container { + width: 40%; + padding: 10px; + .compare-btn { + margin: 2vh 0; + } + .pf-v5-c-menu { + height: 75%; + box-shadow: unset; + } + } + .chart-container { + width: 80%; + .js-plotly-plot { + width: 100%; + height: 100%; + overflow-x: auto; + overflow-y: visible; + } + } + .title { + margin-bottom: 2vh; + } +} diff --git a/frontend/src/reducers/ilabReducer.js b/frontend/src/reducers/ilabReducer.js new file mode 100644 index 00000000..d57c847b --- /dev/null +++ b/frontend/src/reducers/ilabReducer.js @@ -0,0 +1,84 @@ +import * as TYPES from "@/actions/types"; + +const initialState = { + results: [], + start_date: "", + end_date: "", + graphData: [], + multiGraphData: [], + isSummaryLoading: false, + summaryData: [], + totalItems: 0, + page: 1, + perPage: 10, + size: 10, + offset: 0, + metrics: [], + periods: [], + metrics_selected: {}, + tableData: [], + comparisonSwitch: false, + metaRowExpanded: [], +}; +const ILabReducer = (state = initialState, action = {}) => { + const { type, payload } = action; + switch (type) { + case TYPES.SET_ILAB_JOBS_DATA: + return { + ...state, + results: payload, + }; + case TYPES.SET_ILAB_DATE_FILTER: + return { + ...state, + start_date: payload.start_date, + end_date: payload.end_date, + }; + case TYPES.SET_ILAB_TOTAL_ITEMS: + return { + ...state, + totalItems: payload, + }; + case TYPES.SET_ILAB_OFFSET: + return { ...state, offset: payload }; + case TYPES.SET_ILAB_PAGE: + return { ...state, page: payload }; + case TYPES.SET_ILAB_PAGE_OPTIONS: + return { ...state, page: payload.page, perPage: payload.perPage }; + case TYPES.SET_ILAB_METRICS: + return { ...state, metrics: [...state.metrics, payload] }; + case TYPES.SET_ILAB_PERIODS: + return { ...state, periods: [...state.periods, payload] }; + case TYPES.SET_ILAB_SELECTED_METRICS: + return { + ...state, + metrics_selected: payload, + }; + case TYPES.SET_ILAB_GRAPH_DATA: + return { ...state, graphData: payload }; + case TYPES.SET_ILAB_INIT_JOBS: + return { ...state, tableData: payload }; + case TYPES.SET_ILAB_MULTIGRAPH_DATA: + return { ...state, multiGraphData: payload }; + case TYPES.TOGGLE_COMPARISON_SWITCH: + return { ...state, comparisonSwitch: !state.comparisonSwitch }; + case TYPES.SET_EXPANDED_METAROW: + return { ...state, metaRowExpanded: payload }; + case TYPES.SET_ILAB_SUMMARY_LOADING: + return { ...state, isSummaryLoading: true }; + case TYPES.SET_ILAB_SUMMARY_COMPLETE: + return { ...state, isSummaryLoading: false }; + case TYPES.SET_ILAB_SUMMARY_DATA: + return { + ...state, + summaryData: [ + ...state.summaryData.filter((i) => i.uid !== payload.uid), + payload, + ], + }; + default: + return state; + } +}; + +export default ILabReducer; diff --git a/frontend/src/reducers/index.js b/frontend/src/reducers/index.js index 1fb4c555..43970170 100644 --- a/frontend/src/reducers/index.js +++ b/frontend/src/reducers/index.js @@ -1,4 +1,5 @@ import HomeReducer from "./homeReducer"; +import ILabReducer from "./ilabReducer"; import LoadingReducer from "./loadingReducer"; import OCPReducer from "./ocpReducer"; import QuayReducer from "./quayReducer"; @@ -15,4 +16,5 @@ export default combineReducers({ ocp: OCPReducer, quay: QuayReducer, telco: TelcoReducer, + ilab: ILabReducer, }); diff --git a/frontend/src/reducers/loadingReducer.js b/frontend/src/reducers/loadingReducer.js index 496a4e65..52f0c732 100644 --- a/frontend/src/reducers/loadingReducer.js +++ b/frontend/src/reducers/loadingReducer.js @@ -7,7 +7,7 @@ import { const initialState = { isLoading: false, - isGraphLoading: true, + isGraphLoading: false, }; const LoadingReducer = (state = initialState, action = {}) => { diff --git a/frontend/src/store/reducers/InitialData.js b/frontend/src/store/reducers/InitialData.js deleted file mode 100644 index 80503b3c..00000000 --- a/frontend/src/store/reducers/InitialData.js +++ /dev/null @@ -1,181 +0,0 @@ - -export const OCP_INITIAL_DATA = { - initialState: true, - success: 0, - failure: 0, - total: 0, - others: 0, - duration:0, - benchmarks: ["All"], - versions: ["All"], - workers: ["All"], - ciSystems: ["All"], - networkTypes: ["All"], - jobTypes: ["All"], - rehearses: ["All"], - allIpsec: ["All"], - allFips: ["All"], - allEncrypted: ["All"], - encryptionTypes: ["All"], - allPublish: ["All"], - computeArchs: ["All"], - controlPlaneArchs: ["All"], - jobStatuses: ["All"], - selectedBenchmark: "All", - selectedVersion: "All", - selectedPlatform: "All", - selectedWorkerCount: "All", - selectedNetworkType: "All", - selectedCiSystem: "All", - selectedJobType: "All", - selectedRehearse: "All", - selectedIpsec: "All", - selectedFips: "All", - selectedEncrypted: "All", - selectedEncryptionType: "All", - selectedPublish: "All", - selectedComputeArch: "All", - selectedControlPlaneArch: "All", - selectedJobStatus: "All", - waitForUpdate: false, - platforms: ["All"], - copyData: [], - data: [], - updatedTime: 'Loading', - error: null, - startDate: '', - endDate: '', - tableData : [{ name: "Benchmark", value: "benchmark" }, - {name:"Release Stream", value: "releaseStream"}, - {name:"Build", value: "build"}, - {name: "Worker Count", value: "workerNodesCount"}, - {name: "Start Date", value: "startDate"}, - {name: "End Date", value: "endDate"}, - {name: "Status", value: "jobStatus"}], -} - -export const QUAY_INITIAL_DATA = { - initialState: true, - success: 0, - failure: 0, - total: 0, - others: 0, - duration:0, - ciSystems: ["All"], - platforms: ["All"], - benchmarks: ["All"], - releaseStreams: ["All"], - workers: ["All"], - hitSizes: ["All"], - concurrencies: ["All"], - imagePushPulls: ["All"], - selectedCiSystem: "All", - selectedPlatform: "All", - selectedBenchmark: "All", - selectedReleaseStream: "All", - selectedWorkerCount: "All", - selectedHitSize: "All", - selectedConcurrency: "All", - selectedImagePushPulls: "All", - waitForUpdate: false, - copyData: [], - data: [], - updatedTime: 'Loading', - error: null, - startDate: '', - endDate: '', - tableData : [{ name: "Benchmark", value: "benchmark" }, - {name:"Release Stream", value: "releaseStream"}, - {name:"Platform", value: "platform"}, - {name: "Worker Count", value: "workerNodesCount"}, - {name: "Start Date", value: "startDate"}, - {name: "End Date", value: "endDate"}, - {name: "Status", value: "jobStatus"}], -} - -export const TELCO_INITIAL_DATA = { - initialState: true, - success: 0, - failure: 0, - total: 0, - others: 0, - duration:0, - ciSystems: ["All"], - benchmarks: ["All"], - versions: ["All"], - releaseStreams: ["All"], - formals: ["All"], - nodeNames: ["All"], - cpus: ["All"], - selectedCiSystem: "All", - selectedBenchmark: "All", - selectedVersion: "All", - selectedReleaseStream: "All", - selectedFormal: "All", - selectedCpu: "All", - selectedNodeName: "All", - waitForUpdate: false, - copyData: [], - data: [], - updatedTime: 'Loading', - error: null, - startDate: '', - endDate: '', - tableData : [{ name: "Benchmark", value: "benchmark" }, - {name:"Release Stream", value: "releaseStream"}, - {name:"Build", value: "ocpVersion"}, - {name:"CPU", value: "cpu"}, - {name:"Node Name", value: "nodeName"}, - {name: "Start Date", value: "startDate"}, - {name: "End Date", value: "endDate"}, - {name: "Status", value: "jobStatus"}], -} - -export const CPT_INITIAL_DATA = { - initialState: true, - success: 0, - failure: 0, - total: 0, - others: 0, - testNames: ["All"], - products: ["All"], - ciSystems: ["All"], - statuses: ["All"], - releaseStreams: ["All"], - selectedCiSystem: "All", - selectedProduct: "All", - selectedTestName: "All", - selectedJobStatus: "All", - selectedReleaseStream: "All", - waitForUpdate: false, - copyData: [], - data: [], - updatedTime: 'Loading', - error: null, - startDate: '', - endDate: '', - tableData : [{name:"Product", value: "product"}, - { name: "CI System", value: "ciSystem" }, - {name: "Test Name", value: "testName"}, - {name: "Version", value: "version"}, - {name: "Release Stream", value: "releaseStream"}, - {name: "Start Date", value: "startDate"}, - {name: "End Date", value: "endDate"}, - {name: "Build URL", value: "buildUrl"}, - {name: "Status", value: "jobStatus"},], -} - -export const GRAPH_INITIAL_DATA = { - uuid_results: {}, - graphError: false, -} - -export const QUAY_GRAPH_INITIAL_DATA = { - uuid_results: {}, - graphError: false, -} - -export const TELCO_GRAPH_INITIAL_DATA = { - uuid_results: {}, - graphError: false, -} diff --git a/frontend/src/store/reducers/index.js b/frontend/src/store/reducers/index.js deleted file mode 100644 index fe4fddad..00000000 --- a/frontend/src/store/reducers/index.js +++ /dev/null @@ -1,18 +0,0 @@ -import ocpJobsReducer from "./OCPJobsReducer"; -import cptJobsReducer from "./CPTJobsReducer"; -import quayJobsReducer from "./QuayJobsReducer"; -import telcoJobsReducer from "./TelcoJobsReducer"; -import graphReducer from "./GraphReducer"; -import quayGraphReducer from "./QuayGraphReducer"; -import telcoGraphReducer from "./TelcoGraphReducer"; - - -export const rootReducer = { - 'ocpJobs': ocpJobsReducer, - 'cptJobs': cptJobsReducer, - 'quayJobs': quayJobsReducer, - 'telcoJobs': telcoJobsReducer, - 'graph': graphReducer, - 'quayGraph': quayGraphReducer, - 'telcoGraph': telcoGraphReducer, -} diff --git a/frontend/src/utils/apiConstants.js b/frontend/src/utils/apiConstants.js index 52576b4a..33fe0ccd 100644 --- a/frontend/src/utils/apiConstants.js +++ b/frontend/src/utils/apiConstants.js @@ -1,7 +1,7 @@ export const getUrl = () => { const { hostname, protocol } = window.location; return hostname === "localhost" - ? "http://localhost:8000" + ? "http://0.0.0.0:8000" : `${protocol}//${hostname}`; }; @@ -17,3 +17,6 @@ export const QUAY_GRAPH_API_V1 = "/api/v1/quay/graph"; export const TELCO_JOBS_API_V1 = "/api/v1/telco/jobs"; export const TELCO_GRAPH_API_V1 = "/api/v1/telco/graph"; + +export const ILABS_JOBS_API_V1 = "/api/v1/ilab/runs"; +export const ILAB_GRAPH_API_V1 = "/api/v1/ilab/runs/"; diff --git a/frontend/src/utils/routeConstants.js b/frontend/src/utils/routeConstants.js index 53f271fa..c46bab55 100644 --- a/frontend/src/utils/routeConstants.js +++ b/frontend/src/utils/routeConstants.js @@ -2,3 +2,4 @@ export const HOME = "Home"; export const OCP = "OCP"; export const QUAY = "QUAY"; export const TELCO = "TELCO"; +export const ILAB = "ILAB"; diff --git a/publish-containers.sh b/publish-containers.sh new file mode 100755 index 00000000..ca97f8b5 --- /dev/null +++ b/publish-containers.sh @@ -0,0 +1,39 @@ +#!/bin/sh +# +# Simple script to build and publish development containers for the backend +# and frontend components. +# +# Please don't edit this file for debugging: it's too easy to accidentally +# commit undesirable changes. Instead, I've added some convenient environment +# variables to support common changes: +# +# REGISTRY -- container registry (docker.io, quay.io) +# ACCOUNT -- user account (presumed to be logged in already) +# REPO -- account container repo (cpt) +# TAG -- container tag (latest) +# SKIP_FRONTEND -- SKIP_FRONTEND=1 to skip building and pushing frontend +# SKIP_BACKEND -- SKIP_BACKEND=1 to skip building and pushing backend +# +REGISTRY=${REGISTRY:-images.paas.redhat.com} +ACCOUNT=${ACCOUNT:-${USER}} +REPO=${REPO:-cpt} +TAG=${TAG:-latest} +SKIP_FRONTEND=${SKIP_FRONTEND:-0} +SKIP_BACKEND=${SKIP_BACKEND:-0} + +REPOSITORY=${REGISTRY}/${ACCOUNT}/${REPO} +# remove current images, if any +podman rm -f front back +now=$(date +'%Y%m%d-%H%M%S') + +if [ "$SKIP_BACKEND" != 1 ] ;then + podman build -f backend/backend.containerfile --tag backend + podman push backend "${REPOSITORY}/backend:${TAG}" + podman push backend "${REPOSITORY}/backend:${now}" +fi + +if [ "$SKIP_FRONTEND" != 1 ] ;then + podman build -f frontend/frontend.containerfile --tag frontend + podman push frontend "${REPOSITORY}/frontend:${TAG}" + podman push frontend "${REPOSITORY}/frontend:${now}" +fi