From 6717f4d3477c8c2e685528bc0e879947986da912 Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Wed, 16 Oct 2024 18:06:44 -0400 Subject: [PATCH 1/7] Add new Crucible backend service This encapsulates substantial logic to encapsulate interpretation of the Crucible Common Data Model OpenSearch schema for the use of CPT dashboard API components. By itself, it does nothing. --- backend/app/services/crucible_readme.md | 164 ++ backend/app/services/crucible_svc.py | 1988 +++++++++++++++++++++++ 2 files changed, 2152 insertions(+) create mode 100644 backend/app/services/crucible_readme.md create mode 100644 backend/app/services/crucible_svc.py diff --git a/backend/app/services/crucible_readme.md b/backend/app/services/crucible_readme.md new file mode 100644 index 00000000..0ac2e71f --- /dev/null +++ b/backend/app/services/crucible_readme.md @@ -0,0 +1,164 @@ +Crucible divides data across a set of OpenSearch (or ElasticSearch) indices, +each with a specific document mapping. CDM index names include a "root" name +(like "run") with a versioned prefix, like "cdmv7dev-run". + +Crucible timestamps are integers in "millisecond-from-the-epoch" format. + +The Crucible CDM hierarchy is roughly: + +- RUN (an instrumented benchmark run) + - TAG (metadata) + - ITERATION (a benchmark interval) + - PARAM (execution parameters) + - SAMPLE + - PERIOD (time range where data is recorded) + - METRIC_DESC (description of a specific recorded metric) + - METRIC_DATA (a specific recorded data point) + +OpenSearch doesn't support the concept of a SQL "join", but many of the indices +contain documents that could be considered a static "join" with parent documents +for convenience. For example, each `iteration` document contains a copy of it's +parent `run` document, while the `period` document contains copies of its parent +`sample`, `iteration`, and `run` documents. This means, for example, that it's +possible to make a single query returning all `period` documents for specific +iteration number of a specific run. + +
+
RUN
this contains the basic information about a performance run, including a + generated UUID, begin and end timestamps, a benchmark name, a user name and + email, the (host/directory) "source" of the indexed data (which is usable on + the controler's local file system), plus host and test harness names.
+
TAG
this contains information about a general purpose "tag" to associate some + arbitrary context with a run, for example software versions, hardware, or + other metadata. This can be considered a SQL JOIN with the run document, + adding a tag UUID, name, and value.
+
ITERATION
this contains basic information about a performance run iteration, + including the iteration UUID, number, the primary (benchmark) metric associated + with the iteration, plus the primary "period" of the iteration, and the + iteration status.
+
PARAM
this defines a key/value pair specifying behavior of the benchmark + script for an iteration. Parameters are iteration-specific, but parameters that + don't vary between iterations are often represented as run parameters.
+
SAMPLE
this contains basic information about a sample of an iteration, + including a sample UUID and sample number, along with a "path" for sample data + and a sample status.
+
PERIOD
this contains basic information about a period during which data is + collected within a sample, including the period UUID, name, and begin and end + timestamps. A set of periods can be "linked" through a "prev_id" field.
+
METRIC_DESC
this contains descriptive data about a specific series + of metric values within a specific period of a run, including the metric UUID, + the metric "class", type, and source, along with a set of "names" (key/value + pairs) defining the metric breakout details that narrow down a specific source and + type. For example source:mpstat, type:Busy-CPU data is broken down by package, cpu, + core, and other breakouts which can be isolated or aggregated for data reporting.
+
METRIC_DATA
this describes a specific data point, sampled over a specified + duration with a fixed begin and end timestamp, plus a floating point value. + Each is tied to a specific metric_desc UUID value. Depending on the varied + semantics of metric_desc breakouts, it's often valid to aggregate these + across a set of relatead metric_desc IDs, based on source and type, for + example to get aggregate CPU load across all modes, cores, or across all + modes within a core. This service allows arbitrary aggregation within a + given metric source and type, but by default will attempt to direct the + caller to specifying a set of breakouts that result in a single metric_desc + ID.
+
+ +The `crucible_svc` allows CPT project APIs to access a Crucible CDM backing +store to find information about runs, tags, params, iterations, samples, +periods, plus various ways to expose and aggregate metric data both for +primary benchmarks and non-periodic tools. + +The `get_runs` API is the primary entry point, returning an object that +supports filtering, sorting, and pagination of the Crucible run data decorated +with useful iteration, tag, and parameter data. + +The metrics data APIs (data, breakouts, summary, and graph) now allow +filtering by the metric "name" data. This allows "drilling down" through +the non-periodic "tool data". For example, IO data is per-disk, CPU +information is broken down by core and package. You can now aggregate +all global data (e.g., total system CPU), or filter by breakout names to +select by CPU, mode (usr, sys, irq), etc. + +For example, to return `Busy-CPU` ("type") graph data from the `mpstat` +("source") tool for system mode on one core, you might query: + +``` +/api/v1/ilab/runs//graph/mpstat::Busy-CPU?name=core=12,package=1,num=77,type=sys +``` + +If you make a `graph`, `data`, or `summary` query that doesn't translate +to a unique metric, and don't select aggregation, you'll get a diagnostic +message identifying possible additional filters. For example, with +`type=sys` removed, that same query will show the available values for +the `type` breakout name: + +``` +{ + "detail": [ + { + "message": "More than one metric (5) probably means you should add filters", + "names": { + "type": [ + "guest", + "irq", + "soft", + "sys", + "usr" + ] + }, + "periods": [] + } + ] +} +``` + +This capability can be used to build an interactive exploratory UI to +allow displaying breakout details. The `get_metrics` API will show all +recorded metrics, along with information the names and values used in +those. Metrics that show "names" with more than one value will need to be +filtered to produce meaningful summaries or graphs. + +You can instead aggregate metrics across breakouts using the `?aggregate` +query parameter, like `GET /api/v1/ilab/runs//graph/mpstat::Busy-CPU?aggregate` +which will aggregate all CPU busy data for the system. + +Normally you'll want to display data based on sample periods, for example the +primary period of an iteration, using `?period=`. This will +implicitly constrain the metric data based on the period ID associated with +the `metric_desc` document *and* the begin/end time period of the selected +periods. Normally, a benchmark will will separate iterations because each is +run with a different parameter value, and the default graph labeling will +look for a set of distinct parameters not used by other iterations: for +example, `mpstat::Busy-CPU (batch-size=16)`. + +The `get_breakouts` API can be used to explore the namespace recorded for that +metric in the specified run. For example, + +``` +GET /api/v1/ilab/runs//breakouts/sar-net::packets-sec?name=direction=rx +{ + "label": "sar-net::packets-sec", + "source": "sar-net", + "type": "packets-sec", + "class": [], + "names": { + "dev": [ + "lo", + "eno12409", + "eno12399" + ], + "type": [ + "physical", + "virtual" + ] + } +} +``` + +The `get_filters` API reports all the tag and param filter tags and +values for the runs. These can be used for the `filters` query parameter +on `get_runs` to restrict the set of runs reported; for example, +`/api/v1/ilab/runs?filter=param:workflow=sdg` shows only runs with the param +arg `workflow` set to the value `sdg`. You can search for a subset of the +string value using the operator "~" instead of "=". For example, +`?filter=param:user~user` will match `user` values of "A user" or "The user". diff --git a/backend/app/services/crucible_svc.py b/backend/app/services/crucible_svc.py new file mode 100644 index 00000000..7ae9ac12 --- /dev/null +++ b/backend/app/services/crucible_svc.py @@ -0,0 +1,1988 @@ +"""Service to pull data from a Crucible CDM OpenSearch data store + +A set of helper methods to enable a project API to easily process data from a +Crucible controller's OpenSearch data backend. + +This includes paginated, filtered, and sorted lists of benchmark runs, along +access to the associated Crucible documents such as iterations, samples, and +periods. Metric data can be accessed by breakout names, or aggregated by +breakout subsets or collection periods as either raw data points, statistical +aggregate, or Plotly graph format for UI display. +""" + +import time +from collections import defaultdict +from dataclasses import dataclass +from datetime import datetime, timezone +from typing import Any, Iterator, Optional, Tuple, Union + +from elasticsearch import AsyncElasticsearch, NotFoundError +from fastapi import HTTPException, status +from pydantic import BaseModel + +from app import config + + +class Graph(BaseModel): + """Describe a single graph + + This represents a JSON object provided by a caller through the get_graph + API to describe a specific metric graph. + + The default title (if the field is omitted) is the metric label with a + suffix denoting breakout values selected, any unique parameter values + in a selected iteration, and (if multiple runs are selected in any Graph + list) an indication of the run index. For example, + "mpstat::Busy-CPU [core=2,type=usr] (batch-size=16)". + + Fields: + metric: the metric label, "ilab::train-samples-sec" + aggregate: True to aggregate unspecified breakouts + color: CSS color string ("green" or "#008000") + names: Lock in breakouts + periods: Select metrics for specific test period(s) + run: Override the default run ID from GraphList + title: Provide a title for the graph. The default is a generated title + """ + + metric: str + aggregate: bool = False + color: Optional[str] = None + names: Optional[list[str]] = None + periods: Optional[list[str]] = None + run: Optional[str] = None + title: Optional[str] = None + + +class GraphList(BaseModel): + """Describe a set of overlaid graphs + + This represents a JSON object provided by a caller through the get_graph + API to introduce a set of constrained metrics to be graphed. The "run + ID" here provides a default for the embedded Graph objects, and can be + omitted if all Graph objects specify a run ID. (This is most useful to + select a set of graphs all for a single run ID.) + + Fields: + run: Specify the (default) run ID + name: Specify a name for the set of graphs + graphs: a list of Graph objects + """ + + run: Optional[str] = None + name: str + graphs: list[Graph] + + +@dataclass +class Point: + """Graph point + + Record the start & end timestamp and value of a metric data point + """ + + begin: int + end: int + value: float + + +colors = [ + "black", + "aqua", + "blue", + "fuschia", + "gray", + "green", + "maroon", + "navy", + "olive", + "teal", + "silver", + "lightskyblue", + "mediumspringgreen", + "mistyrose", + "darkgoldenrod", + "cadetblue", + "chocolate", + "coral", + "brown", + "bisque", + "deeppink", + "sienna", +] + + +@dataclass +class Term: + namespace: str + key: str + value: str + + +class Parser: + """Help parsing filter expressions.""" + + def __init__(self, term: str): + """Construct an instance to help parse query parameter expressions + + These consist of a sequence of tokens separated by delimiters. Each + token may be quoted to allow matching against strings with spaces. + + For example, `param:name="A string"` + + Args: + term: A filter expression to parse + """ + self.buffer = term + self.context = term + self.offset = 0 + + def _next_token( + self, delimiters: list[str] = [], optional: bool = False + ) -> Tuple[str, Union[str, None]]: + """Extract the next token from an expression + + Tokens may be quoted; the quotes are removed. for example, the two + expressions `'param':"workflow"='"sdg"'` and `param:workflow:sdg` are + identical. + + Args: + delimiters: a list of delimiter characters + optional: whether the terminating delimiter is optional + + Returns: + A tuple consisting of the token and the delimiter (or None if + parsing reached the end of the expression and the delimiter was + optional) + """ + + @dataclass + class Quote: + open: int + quote: str + + quoted: list[Quote] = [] + next_char = None + token = "" + first_quote = None + for o in range(len(self.buffer)): + next_char = self.buffer[o] + if next_char in delimiters and not quoted: + self.buffer = self.buffer[o + 1 :] + self.offset += o + 1 + break + elif next_char in ('"', "'"): + if o == 0: + first_quote = next_char + if quoted and quoted[-1].quote == next_char: + quoted.pop() + else: + quoted.append(Quote(o, next_char)) + token += next_char + else: + next_char = None + if quoted: + q = quoted[-1] + c = self.context + i = q.open + self.offset + annotated = c[:i] + "[" + c[i] + "]" + c[i + 1 :] + raise HTTPException( + status.HTTP_400_BAD_REQUEST, f"Unterminated quote at {annotated!r}" + ) + + # If delimiters are specified, and not optional, then we didn't + # find one, and that's an error. + if not optional and delimiters: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + f"Missing delimiter from {','.join(delimiters)} after {token!r}", + ) + self.buffer = "" + self.offset = len(self.context) + return (token, next_char) if not first_quote else (token[1:-1], next_char) + + +class CommonParams: + """Help with sorting out parameters + + Parameter values are associated with iterations, but often a set of + parameters is common across all iterations of a run, and that set can + provide useful context. + + This helps to filter out identical parameters across a set of + iterations. + """ + + def __init__(self): + self.common: dict[str, Any] = {} + self.omit = set() + + def add(self, params: dict[str, Any]): + """Add a new iteration into the param set + + Mark all parameter keys which don't appear in all iterations, or which + have different values in at least one iteration, to be omitted from the + merged "common" param set. + + Args: + params: the param dictionary of an iteration + """ + if not self.common: + self.common.update(params) + else: + for k, v in self.common.items(): + if k not in self.omit and (k not in params or v != params[k]): + self.omit.add(k) + + def render(self) -> dict[str, Any]: + """Return a new param set with only common params""" + return {k: v for k, v in self.common.items() if k not in self.omit} + + +class CrucibleService: + """Support convenient generalized access to Crucible data + + This implements access to the "v7" Crucible "Common Data Model" through + OpenSearch queries. + """ + + # OpenSearch massive limit on hits in a single query + BIGQUERY = 262144 + + # Define the 'run' document fields that support general filtering via + # `?filter=:` + # + # TODO: this excludes 'desc', which isn't used by the ilab runs, and needs + # different treatment as it's a text field rather than a term. It's not an + # immediate priority for ilab, but may be important for general use. + RUN_FILTERS = ("benchmark", "email", "name", "source", "harness", "host") + + # Define the keywords for sorting. + DIRECTIONS = ("asc", "desc") + FIELDS = ( + "begin", + "benchmark", + "desc", + "email", + "end", + "harness", + "host", + "id", + "name", + "source", + ) + + def __init__(self, configpath: str = "crucible"): + """Initialize a Crucible CDM (OpenSearch) connection. + + Generally the `configpath` should be scoped, like `ilab.crucible` so + that multiple APIs based on access to distinct Crucible controllers can + coexist. + + Initialization includes making an "info" call to confirm and record the + server response. + + Args: + configpath: The Vyper config path (e.g., "ilab.crucible") + """ + self.cfg = config.get_config() + self.user = self.cfg.get(configpath + ".username") + self.password = self.cfg.get(configpath + ".password") + self.auth = (self.user, self.password) if self.user or self.password else None + self.url = self.cfg.get(configpath + ".url") + self.elastic = AsyncElasticsearch(self.url, basic_auth=self.auth) + + @staticmethod + def _get_index(root: str) -> str: + return "cdmv7dev-" + root + + @staticmethod + def _split_list(alist: Optional[list[str]] = None) -> list[str]: + """Split a list of parameters + + For simplicity, the APIs supporting "list" query parameters allow + each element in the list to be a comma-separated list of strings. + For example, ["a", "b", "c"] is logically the same as ["a,b,c"]. + + This method normalizes the second form into first to simplify life for + consumers. + + Args: + alist: list of names or name lists + + Returns: + A simple list of options + """ + l: list[str] = [] + if alist: + for n in alist: + l.extend(n.split(",")) + return l + + @staticmethod + def _normalize_date(value: Optional[Union[int, str, datetime]]) -> int: + """Normalize date parameters + + The Crucible data model stores dates as string representations of an + integer "millseconds-from-epoch" value. To allow flexibility, this + Crucible service allows incoming dates to be specified as ISO-format + strings, as integers, or as the stringified integer. + + That is, "2024-09-12 18:29:35.123000+00:00", "1726165775123", and + 1726165775123 are identical. + + Args: + value: Representation of a date-time value + + Returns: + The integer milliseconds-from-epoch equivalent + """ + try: + if isinstance(value, int): + return value + elif isinstance(value, datetime): + return int(value.timestamp() * 1000.0) + elif isinstance(value, str): + try: + return int(value) + except ValueError: + pass + try: + d = datetime.fromisoformat(value) + return int(d.timestamp() * 1000.0) + except ValueError: + pass + except Exception as e: + print(f"normalizing {type(value).__name__} {value} failed with {str(e)}") + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + f"Date representation {value} is not a date string or timestamp", + ) + + @staticmethod + def _hits( + payload: dict[str, Any], fields: Optional[list[str]] = None + ) -> Iterator[dict[str, Any]]: + """Helper to iterate through OpenSearch query matches + + Iteratively yields the "_source" of each hit. As a convenience, can + yield a sub-object of "_source" ... for example, specifying the + optional "fields" as ["metric_desc", "id"] will yield the equivalent of + hit["_source"]["metric_desc"]["id"] + + Args: + payload: OpenSearch reponse payload + fields: Optional sub-fields of "_source" + + Returns: + Yields each object from the "greatest hits" list + """ + if "hits" not in payload: + raise HTTPException( + status_code=500, detail=f"Attempt to iterate hits for {payload}" + ) + hits = payload.get("hits", {}).get("hits", []) + for h in hits: + source = h["_source"] + if fields: + for f in fields: + source = source[f] + yield source + + @staticmethod + def _aggs(payload: dict[str, Any], aggregation: str) -> Iterator[dict[str, Any]]: + """Helper to access OpenSearch aggregations + + Iteratively yields the name and value of each aggregation returned + by an OpenSearch query. This can also be used for nested aggregations + by specifying an aggregation object. + + Args: + payload: A JSON dict containing an "aggregations" field + + Returns: + Yields each aggregation from an aggregation bucket list + """ + if "aggregations" not in payload: + raise HTTPException( + status_code=500, + detail=f"Attempt to iterate missing aggregations for {payload}", + ) + aggs = payload["aggregations"] + if aggregation not in aggs: + raise HTTPException( + status_code=500, + detail=f"Attempt to iterate missing aggregation {aggregation} for {payload}", + ) + for agg in aggs[aggregation]["buckets"]: + yield agg + + @staticmethod + def _format_timestamp(timestamp: Union[str, int]) -> str: + """Convert stringified integer milliseconds-from-epoch to ISO date""" + try: + ts = int(timestamp) + except Exception as e: + print(f"ERROR: invalid {timestamp!r}: {str(e)!r}") + ts = 0 + return str(datetime.fromtimestamp(ts / 1000.00, timezone.utc)) + + @classmethod + def _format_data(cls, data: dict[str, Any]) -> dict[str, Any]: + """Helper to format a "metric_data" object + + Crucible stores the date, duration, and value as strings, so this + converts them to more useful values. The end timestamp is converted + to an ISO date-time string; the duration and value to floating point + numbers. + + Args: + data: a "metric_data" object + + Returns: + A neatly formatted "metric_data" object + """ + return { + "begin": cls._format_timestamp(data["begin"]), + "end": cls._format_timestamp(data["end"]), + "duration": int(data["duration"]) / 1000, + "value": float(data["value"]), + } + + @classmethod + def _format_period(cls, period: dict[str, Any]) -> dict[str, Any]: + """Helper to format a "period" object + + Crucible stores the date values as stringified integers, so this + converts the begin and end timestamps to ISO date-time strings. + + Args: + period: a "period" object + + Returns: + A neatly formatted "period" object + """ + return { + "begin": cls._format_timestamp(timestamp=period["begin"]), + "end": cls._format_timestamp(period["end"]), + "id": period["id"], + "name": period["name"], + } + + @classmethod + def _build_filter_options(cls, filter: Optional[list[str]] = None) -> Tuple[ + Optional[list[dict[str, Any]]], + Optional[list[dict[str, Any]]], + Optional[list[dict[str, Any]]], + ]: + """Build filter terms for tag and parameter filter terms + + Each term has the form ":". Any term + may be quoted: quotes are stripped and ignored. (This is generally only + useful on the to include spaces.) + + We support three namespaces: + param: Match against param index arg/val + tag: Match against tag index name/val + run: Match against run index fields + + We support two operators: + =: Exact match + ~: Partial match + + Args: + filter: list of filter terms like "param:key=value" + + Returns: + A set of OpenSearch filter object lists to detect missing + and matching documents for params, tags, and run fields. For + example, to select param:batch-size=12 results in the + following param filter list: + + [ + {' + dis_max': { + 'queries': [ + { + 'bool': { + 'must': [ + {'term': {'param.arg': 'batch-size'}}, + {'term': {'param.val': '12'}} + ] + } + } + ] + } + } + ] + """ + terms = defaultdict(list) + for term in cls._split_list(filter): + p = Parser(term) + namespace, _ = p._next_token([":"]) + key, operation = p._next_token(["=", "~"]) + value, _ = p._next_token() + if operation == "~": + value = f".*{value}.*" + matcher = "regexp" + else: + matcher = "term" + if namespace in ("param", "tag"): + if namespace == "param": + key_field = "param.arg" + value_field = "param.val" + else: + key_field = "tag.name" + value_field = "tag.val" + terms[namespace].append( + { + "bool": { + "must": [ + {"term": {key_field: key}}, + {matcher: {value_field: value}}, + ] + } + } + ) + elif namespace == "run": + terms[namespace].append({matcher: {f"run.{key}": value}}) + else: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + f"unknown filter namespace {namespace!r}", + ) + param_filter = None + tag_filter = None + if "param" in terms: + param_filter = [{"dis_max": {"queries": terms["param"]}}] + if "tag" in terms: + tag_filter = [{"dis_max": {"queries": terms["tag"]}}] + return param_filter, tag_filter, terms.get("run") + + @classmethod + def _build_name_filters( + cls, namelist: Optional[list[str]] = None + ) -> list[dict[str, Any]]: + """Build filter terms for metric breakout names + + for example, "cpu=10" filters for metric data descriptors where the + breakout name "cpu" exists and has a value of 10. + + Args: + namelist: list of possibly comma-separated list values + + Returns: + A list of filters to match breakout terms + """ + names: list[str] = cls._split_list(namelist) + filters = [] + for e in names: + try: + n, v = e.split("=", maxsplit=1) + except ValueError: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, f"Filter item {e} must be '='" + ) + filters.append({"term": {f"metric_desc.names.{n}": v}}) + return filters + + @classmethod + def _build_period_filters( + cls, periodlist: Optional[list[str]] = None + ) -> list[dict[str, Any]]: + """Build period filters + + Generate metric_desc filter terms to match against a list of period IDs. + + Note that not all metric descriptions are periodic, and we don't want + these filters to exclude them -- so the filter will exclude only + documents that have a period and don't match. (That is, we won't drop + any non-periodic metrics. We expect those to be filtered by timestamp + instead.) + + Args: + period: list of possibly comma-separated period IDs + + Returns: + A filter term that requires a period.id match only for metric_desc + documents with a period. + """ + pl: list[str] = cls._split_list(periodlist) + if pl: + return [ + { + "dis_max": { + "queries": [ + {"bool": {"must_not": {"exists": {"field": "period"}}}}, + {"terms": {"period.id": pl}}, + ] + } + } + ] + else: + return [] + + @classmethod + def _build_metric_filters( + cls, + run: str, + metric: str, + names: Optional[list[str]] = None, + periods: Optional[list[str]] = None, + ) -> list[dict[str, Any]]: + """Helper for filtering metric descriptions + + We normally filter by run, metric "label", and optionally by breakout + names and periods. This encapsulates the filter construction. + + Args: + run: run ID + metric: metric label (ilab::sdg-samples-sec) + names: list of "name=value" filters + periods: list of period IDs + + Returns: + A list of OpenSearch filter expressions + """ + msource, mtype = metric.split("::") + return ( + [ + {"term": {"run.id": run}}, + {"term": {"metric_desc.source": msource}}, + {"term": {"metric_desc.type": mtype}}, + ] + + cls._build_name_filters(names) + + cls._build_period_filters(periods) + ) + + @classmethod + def _build_sort_terms(cls, sorters: Optional[list[str]]) -> list[dict[str, str]]: + """Build sort term list + + Sorters may reference any native `run` index field and must specify + either "asc"(ending) or "desc"(ending) sort order. Any number of + sorters may be combined, like ["name:asc,benchmark:desc", "end:desc"] + + Args: + sorters: list of : sort terms + + Returns: + list of OpenSearch sort terms + """ + if sorters: + sort_terms = [] + for s in sorters: + key, dir = s.split(":", maxsplit=1) + if dir not in cls.DIRECTIONS: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + f"Sort direction {dir!r} must be one of {','.join(DIRECTIONS)}", + ) + if key not in cls.FIELDS: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + f"Sort key {key!r} must be one of {','.join(FIELDS)}", + ) + sort_terms.append({f"run.{key}": dir}) + else: + sort_terms = [{"run.begin": "asc"}] + return sort_terms + + async def _search( + self, index: str, query: Optional[dict[str, Any]] = None, **kwargs + ) -> dict[str, Any]: + """Issue an OpenSearch query + + Args: + index: The "base" CDM index name, e.g., "run", "metric_desc" + query: An OpenSearch query object + kwargs: Additional OpenSearch parameters + + Returns: + The OpenSearch response payload (JSON dict) + """ + idx = self._get_index(index) + start = time.time() + value = await self.elastic.search(index=idx, body=query, **kwargs) + print( + f"QUERY on {idx} took {time.time() - start} seconds, " + f"hits: {value.get('hits', {}).get('total')}" + ) + return value + + async def close(self): + """Close the OpenSearch connection""" + if self.elastic: + await self.elastic.close() + self.elastic = None + + async def search( + self, + index: str, + filters: Optional[list[dict[str, Any]]] = None, + aggregations: Optional[dict[str, Any]] = None, + sort: Optional[list[dict[str, str]]] = None, + source: Optional[str] = None, + size: Optional[int] = None, + offset: Optional[int] = None, + **kwargs, + ) -> dict[str, Any]: + """OpenSearch query helper + + Combine index, filters, aggregations, sort, and pagination options + into an OpenSearch query. + + Args: + index: "root" CDM index name ("run", "metric_desc", ...) + filters: list of JSON dict filter terms {"term": {"name": "value}} + aggregations: list of JSON dict aggregations {"name": {"term": "name"}} + sort: list of JSON dict sort terms ("name": "asc") + size: The number of hits to return; defaults to "very large" + offset: The number of hits to skip, for pagination + kwargs: Additional OpenSearch options + + Returns: + The OpenSearch response + """ + f = filters if filters else [] + query = { + "size": self.BIGQUERY if size is None else size, + "query": {"bool": {"filter": f}}, + } + if sort: + query.update({"sort": sort}) + if source: + query.update({"_source": source}) + if offset: + query.update({"from": offset}) + if aggregations: + query.update({"aggs": aggregations}) + return await self._search(index, query, **kwargs) + + async def _get_metric_ids( + self, + run: str, + metric: str, + namelist: Optional[list[str]] = None, + periodlist: Optional[list[str]] = None, + aggregate: bool = False, + ) -> list[str]: + """Generate a list of matching metric_desc IDs + + Given a specific run and metric name, and a set of breakout filters, + returns a list of metric desc IDs that match. + + If a single ID is required to produce a consistent metric, and the + supplied filters produce more than one without aggregation, raise a + 422 HTTP error (UNPROCESSABLE CONTENT) with a response body showing + the unsatisfied breakouts (name and available values). + + Args: + run: run ID + metric: combined metric name (e.g., sar-net::packets-sec) + namelist: a list of breakout filters like "type=physical" + periodlist: a list of period IDs + aggregate: if True, allow multiple metric IDs + + Returns: + A list of matching metric_desc ID value(s) + """ + filters = self._build_metric_filters(run, metric, namelist, periodlist) + metrics = await self.search( + "metric_desc", + filters=filters, + ignore_unavailable=True, + ) + if len(metrics["hits"]["hits"]) < 1: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + ( + f"No matches for {metric}" + f"{('+' + ','.join(namelist) if namelist else '')}" + ), + ) + ids = [h["metric_desc"]["id"] for h in self._hits(metrics)] + if len(ids) < 2 or aggregate: + return ids + + # If we get here, the client asked for breakout data that doesn't + # resolve to a single metric stream, and didn't specify aggregation. + # Offer some help. + names = defaultdict(set) + periods = set() + response = { + "message": f"More than one metric ({len(ids)}) means " + "you should add breakout filters or aggregate." + } + for m in self._hits(metrics): + if "period" in m: + periods.add(m["period"]["id"]) + for n, v in m["metric_desc"]["names"].items(): + names[n].add(v) + + # We want to help filter a consistent summary, so only show those + # breakout names with more than one value. + response["names"] = {n: sorted(v) for n, v in names.items() if v and len(v) > 1} + response["periods"] = list(periods) + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=response + ) + + async def _build_timestamp_range_filters( + self, periods: Optional[list[str]] = None + ) -> list[dict[str, Any]]: + """Create a timestamp range filter + + This extracts the begin and end timestamps from the list of periods and + builds a timestamp filter range to select documents on or after the + earliest begin timestamp and on or before the latest end timestamp. + + Args: + periods: a list of CDM period IDs + + Returns: + Constructs a range filter for the earliest begin timestamp and the + latest end timestamp among the specified periods. + """ + if periods: + ps = self._split_list(periods) + matches = await self.search( + "period", filters=[{"terms": {"period.id": ps}}] + ) + start = None + end = None + for h in self._hits(matches): + p = h["period"] + st = p["begin"] + et = p["end"] + + # If any period is missing a timestamp, use the run's timestamp + # instead to avoid blowing up on a CDM error. + if st is None: + st = h["run"]["begin"] + if et is None: + et = h["run"]["end"] + if st and (not start or st < start): + start = st + if et and (not end or et > end): + end = et + if start is None or end is None: + name = ( + f"{h['run']['benchmark']}:{h['run']['begin']}-" + f"{h['iteration']['num']}-{h['sample']['num']}-" + f"{p['name']}" + ) + raise HTTPException( + status.HTTP_422_UNPROCESSABLE_ENTITY, + f"Unable to compute {name!r} time range {start!r} -> {end!r}", + ) + return [ + {"range": {"metric_data.begin": {"gte": str(start)}}}, + {"range": {"metric_data.end": {"lte": str(end)}}}, + ] + else: + return [] + + async def _get_run_ids( + self, index: str, filters: Optional[list[dict[str, Any]]] = None + ) -> set[str]: + """Return a set of run IDs matching a filter + + Documents in the specified index must have "run.id" fields. Returns + a set of unique run IDs matched by the filter in the specified index. + + Args: + index: root CDM index name + filters: a list of OpenSearch filter terms + + Returns: + a set of unique run ID values + """ + filtered = await self.search( + index, source="run.id", filters=filters, ignore_unavailable=True + ) + print(f"HITS: {filtered['hits']['hits']}") + return set([x for x in self._hits(filtered, ["run", "id"])]) + + async def get_run_filters(self) -> dict[str, dict[str, list[str]]]: + """Return possible tag and filter terms + + Return a description of tag and param filter terms meaningful + across all datasets. TODO: we should support date-range and benchmark + filtering. Consider supporting all `run` API filtering, which would + allow adjusting the filter popups to drop options no longer relevant + to a given set. + + { + "param": { + {"gpus": [4", "8"]} + } + } + + Returns: + A two-level JSON dict; the first level is the namespace (param or + tag), the second level key is the param/tag/field name and its value + is the set of values defined for that key. + """ + tags = await self.search( + "tag", + size=0, + aggregations={ + "key": { + "terms": {"field": "tag.name", "size": self.BIGQUERY}, + "aggs": { + "values": {"terms": {"field": "tag.val", "size": self.BIGQUERY}} + }, + } + }, + ignore_unavailable=True, + ) + params = await self.search( + "param", + size=0, + aggregations={ + "key": { + "terms": {"field": "param.arg", "size": self.BIGQUERY}, + "aggs": { + "values": { + "terms": {"field": "param.val", "size": self.BIGQUERY} + } + }, + } + }, + ignore_unavailable=True, + ) + aggs = { + k: {"terms": {"field": f"run.{k}", "size": self.BIGQUERY}} + for k in self.RUN_FILTERS + } + runs = await self.search( + "run", + size=0, + aggregations=aggs, + ) + result = defaultdict(lambda: defaultdict(lambda: set())) + for p in self._aggs(params, "key"): + for v in p["values"]["buckets"]: + result["param"][p["key"]].add(v["key"]) + for t in self._aggs(tags, "key"): + for v in t["values"]["buckets"]: + result["tag"][t["key"]].add(v["key"]) + for name in self.RUN_FILTERS: + for f in self._aggs(runs, name): + result["run"][name].add(f["key"]) + return {s: {k: list(v) for k, v in keys.items()} for s, keys in result.items()} + + async def get_runs( + self, + filter: Optional[list[str]] = None, + start: Optional[Union[int, str, datetime]] = None, + end: Optional[Union[int, str, datetime]] = None, + offset: int = 0, + sort: Optional[list[str]] = None, + size: Optional[int] = None, + **kwargs, + ) -> dict[str, Any]: + """Return matching Crucible runs + + Filtered and sorted list of runs. + + { + "sort": [], + "startDate": "2024-01-01T05:00:00+00:00", + "size": 1, + "offset": 0, + "results": [ + { + "begin": "1722878906342", + "benchmark": "ilab", + "email": "A@email", + "end": "1722880503544", + "id": "4e1d2c3c-b01c-4007-a92d-23a561af2c11", + "name": "\"A User\"", + "source": "node.example.com//var/lib/crucible/run/ilab--2024-08-05_17:17:13_UTC--4e1d2c3c-b01c-4007-a92d-23a561af2c11", + "tags": { + "topology": "none" + }, + "iterations": [ + { + "iteration": 1, + "primary_metric": "ilab::train-samples-sec", + "primary_period": "measurement", + "status": "pass", + "params": { + "cpu-offload-pin-memory": "1", + "model": "/home/models/granite-7b-lab/", + "data-path": "/home/data/training/knowledge_data.jsonl", + "cpu-offload-optimizer": "1", + "nnodes": "1", + "nproc-per-node": "4", + "num-runavg-samples": "2" + } + } + ], + "primary_metrics": [ + "ilab::train-samples-sec" + ], + "status": "pass", + "params": { + "cpu-offload-pin-memory": "1", + "model": "/home/models/granite-7b-lab/", + "data-path": "/home/data/training/knowledge_data.jsonl", + "cpu-offload-optimizer": "1", + "nnodes": "1", + "nproc-per-node": "4", + "num-runavg-samples": "2" + }, + "begin_date": "2024-08-05 17:28:26.342000+00:00", + "end_date": "2024-08-05 17:55:03.544000+00:00" + } + ], + "count": 1, + "total": 15, + "next_offset": 1 + } + + Args: + start: Include runs starting at timestamp + end: Include runs ending no later than timestamp + filter: List of tag/param filter terms (parm:key=value) + sort: List of sort terms (column:) + size: Include up to runs in output + offset: Use size/from pagination instead of search_after + + Returns: + JSON object with "results" list and "housekeeping" fields + """ + + # We need to remove runs which don't match against 'tag' or 'param' + # filter terms. The CDM schema doesn't make it possible to do this in + # one shot. Instead, we run queries against the param and tag indices + # separately, producing a list of run IDs which we'll exclude from the + # final collection. + # + # If there are no matches, we can exit early. (TODO: should this be an + # error, or just a success with an empty list?) + results = {} + filters = [] + sorters = self._split_list(sort) + results["sort"] = sorters + sort_terms = self._build_sort_terms(sorters) + param_filters, tag_filters, run_filters = self._build_filter_options(filter) + if run_filters: + filters.extend(run_filters) + if start or end: + s = None + e = None + if start: + s = self._normalize_date(start) + results["startDate"] = datetime.fromtimestamp( + s / 1000.0, tz=timezone.utc + ) + if end: + e = self._normalize_date(end) + results["endDate"] = datetime.fromtimestamp(e / 1000.0, tz=timezone.utc) + + if s and e and s > e: + raise HTTPException( + status_code=422, + detail={ + "error": "Invalid date format, start_date must be less than end_date" + }, + ) + cond = {} + if s: + cond["gte"] = str(s) + if e: + cond["lte"] = str(e) + filters.append({"range": {"run.begin": cond}}) + if size: + results["size"] = size + results["offset"] = offset if offset is not None else 0 + + # In order to filter by param or tag values, we need to produce a list + # of matching RUN IDs from each index. We'll then drop any RUN ID that's + # not on both lists. + if tag_filters: + tagids = await self._get_run_ids("tag", tag_filters) + if param_filters: + paramids = await self._get_run_ids("param", param_filters) + + # If it's obvious we can't produce any matches at this point, exit. + if (tag_filters and len(tagids) == 0) or (param_filters and len(paramids) == 0): + results.update({"results": [], "count": 0, "total": 0}) + return results + + hits = await self.search( + "run", + size=size, + offset=offset, + sort=sort_terms, + filters=filters, + **kwargs, + ignore_unavailable=True, + ) + rawiterations = await self.search("iteration", ignore_unavailable=True) + rawtags = await self.search("tag", ignore_unavailable=True) + rawparams = await self.search("param", ignore_unavailable=True) + + iterations = defaultdict(list) + tags = defaultdict(defaultdict) + params = defaultdict(defaultdict) + run_params = defaultdict(list) + + for i in self._hits(rawiterations): + iterations[i["run"]["id"]].append(i["iteration"]) + + # Organize tags by run ID + for t in self._hits(rawtags): + tags[t["run"]["id"]][t["tag"]["name"]] = t["tag"]["val"] + + # Organize params by iteration ID + for p in self._hits(rawparams): + run_params[p["run"]["id"]].append(p) + params[p["iteration"]["id"]][p["param"]["arg"]] = p["param"]["val"] + + runs = {} + for h in self._hits(hits): + run = h["run"] + rid = run["id"] + + # Filter the runs by our tag and param queries + if param_filters and rid not in paramids: + continue + + if tag_filters and rid not in tagids: + continue + + # Collect unique runs: the status is "fail" if any iteration for + # that run ID failed. + runs[rid] = run + run["tags"] = tags.get(rid, {}) + run["iterations"] = [] + run["primary_metrics"] = set() + common = CommonParams() + for i in iterations.get(rid, []): + iparams = params.get(i["id"], {}) + if "status" not in run: + run["status"] = i["status"] + else: + if i["status"] != "pass": + run["status"] = i["status"] + common.add(iparams) + run["primary_metrics"].add(i["primary-metric"]) + run["iterations"].append( + { + "iteration": i["num"], + "primary_metric": i["primary-metric"], + "primary_period": i["primary-period"], + "status": i["status"], + "params": iparams, + } + ) + run["params"] = common.render() + try: + run["begin_date"] = self._format_timestamp(run["begin"]) + run["end_date"] = self._format_timestamp(run["end"]) + except KeyError as e: + print(f"Missing 'run' key {str(e)} in {run}") + run["begin_date"] = self._format_timestamp("0") + run["end_date"] = self._format_timestamp("0") + + count = len(runs) + total = hits["hits"]["total"]["value"] + results.update( + { + "results": list(runs.values()), + "count": count, + "total": total, + } + ) + if size and (offset + count < total): + results["next_offset"] = offset + size + return results + + async def get_tags(self, run: str, **kwargs) -> dict[str, str]: + """Return the set of tags associated with a run + + Args: + run: run ID + + Returns: + JSON dict with "tag" keys showing each value + """ + tags = await self.search( + index="tag", + filters=[{"term": {"run.id": run}}], + **kwargs, + ignore_unavailable=True, + ) + return {t["name"]: t["val"] for t in self._hits(tags, ["tag"])} + + async def get_params( + self, run: Optional[str] = None, iteration: Optional[str] = None, **kwargs + ) -> dict[str, dict[str, str]]: + """Return the set of parameters for a run or iteration + + Parameters are technically associated with an iteration, but can be + aggregated for a run. This will return a set of parameters for each + iteration; plus, if a "run" was specified, a filtered list of param + values that are common across all iterations. + + Args: + run: run ID + iteration: iteration ID + kwargs: additional OpenSearch keywords + + Returns: + JSON dict of param values by iteration (plus "common" if by run ID) + """ + if not run and not iteration: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + "A params query requires either a run or iteration ID", + ) + match = {"run.id" if run else "iteration.id": run if run else iteration} + params = await self.search( + index="param", + filters=[{"term": match}], + **kwargs, + ignore_unavailable=True, + ) + response = defaultdict(defaultdict) + for param in self._hits(params): + iter = param["iteration"]["id"] + arg = param["param"]["arg"] + val = param["param"]["val"] + if response.get(iter) and response.get(iter).get(arg): + print(f"Duplicate param {arg} for iteration {iter}") + response[iter][arg] = val + + # Filter out all parameter values that don't exist in all or which have + # different values. + if run: + common = CommonParams() + for params in response.values(): + common.add(params) + response["common"] = common.render() + return response + + async def get_iterations(self, run: str, **kwargs) -> list[dict[str, Any]]: + """Return a list of iterations for a run + + Args: + run: run ID + kwargs: additional OpenSearch keywords + + Returns: + A list of iteration documents + """ + iterations = await self.search( + index="iteration", + filters=[{"term": {"run.id": run}}], + sort=[{"iteration.num": "asc"}], + **kwargs, + ignore_unavailable=True, + ) + return [i["iteration"] for i in self._hits(iterations)] + + async def get_samples( + self, run: Optional[str] = None, iteration: Optional[str] = None, **kwargs + ): + """Return a list of samples for a run or iteration + + Args: + run: run ID + iteration: iteration ID + kwargs: additional OpenSearch keywords + + Returns: + A list of sample documents. + """ + if not run and not iteration: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + "A sample query requires either a run or iteration ID", + ) + match = {"run.id" if run else "iteration.id": run if run else iteration} + hits = await self.search( + index="sample", + filters=[{"term": match}], + **kwargs, + ignore_unavailable=True, + ) + samples = [] + for s in self._hits(hits): + print(f"SAMPLE's ITERATION {s['iteration']}") + sample = s["sample"] + sample["iteration"] = s["iteration"]["num"] + sample["primary_metric"] = s["iteration"]["primary-metric"] + sample["status"] = s["iteration"]["status"] + samples.append(sample) + return samples + + async def get_periods( + self, + run: Optional[str] = None, + iteration: Optional[str] = None, + sample: Optional[str] = None, + **kwargs, + ): + """Return a list of periods associated with a run, an iteration, or a + sample + + The "period" document is normalized to represent timestamps using ISO + strings. + + Args: + run: run ID + iteration: iteration ID + sample: sample ID + kwargs: additional OpenSearch parameters + + Returns: + a list of normalized period documents + """ + if not any((run, iteration, sample)): + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + "A period query requires a run, iteration, or sample ID", + ) + match = None + if sample: + match = {"sample.id": sample} + elif iteration: + match = {"iteration.id": iteration} + else: + match = {"run.id": run} + periods = await self.search( + index="period", + filters=[{"term": match}], + sort=[{"period.begin": "asc"}], + **kwargs, + ignore_unavailable=True, + ) + body = [] + for h in self._hits(periods): + period = self._format_period(period=h["period"]) + period["iteration"] = h["iteration"]["num"] + period["sample"] = h["sample"]["num"] + period["primary_metric"] = h["iteration"]["primary-metric"] + period["status"] = h["iteration"]["status"] + body.append(period) + return body + + async def get_timeline(self, run: str, **kwargs) -> dict[str, Any]: + """Report the relative timeline of a run + + With nested object lists, show runs to iterations to samples to + periods. + + Args: + run: run ID + kwargs: additional OpenSearch parameters + """ + itr = await self.search( + index="iteration", + filters=[{"term": {"run.id": run}}], + **kwargs, + ignore_unavailable=True, + ) + sam = await self.search( + index="sample", + filters=[{"term": {"run.id": run}}], + **kwargs, + ignore_unavailable=True, + ) + per = await self.search( + index="period", + filters=[{"term": {"run.id": run}}], + **kwargs, + ignore_unavailable=True, + ) + samples = defaultdict(list) + periods = defaultdict(list) + + for s in self._hits(sam): + samples[s["iteration"]["id"]].append(s) + for p in self._hits(per): + periods[p["sample"]["id"]].append(p) + + iterations = [] + robj = {"id": run, "iterations": iterations} + body = {"run": robj} + for i in self._hits(itr): + if "begin" not in robj: + robj["begin"] = self._format_timestamp(i["run"]["begin"]) + robj["end"] = self._format_timestamp(i["run"]["end"]) + iteration = i["iteration"] + iterations.append(iteration) + iteration["samples"] = [] + for s in samples.get(iteration["id"], []): + sample = s["sample"] + sample["periods"] = [] + for pr in periods.get(sample["id"], []): + period = self._format_period(pr["period"]) + sample["periods"].append(period) + iteration["samples"].append(sample) + return body + + async def get_metrics_list(self, run: str, **kwargs) -> dict[str, Any]: + """Return a list of metrics available for a run + + Each run may have multiple performance metrics stored. This API allows + retrieving a sorted list of the metrics available for a given run, with + the "names" selection criteria available for each and, for "periodic" + (benchmark) metrics, the defined periods for which data was gathered. + + { + "ilab::train-samples-sec": { + "periods": [{"id": , "name": "measurement"}], + "breakouts": {"benchmark-group" ["unknown"], ...} + }, + "iostat::avg-queue-length": { + "periods": [], + "breakouts": {"benchmark-group": ["unknown"], ...}, + }, + ... + } + + Args: + run: run ID + + Returns: + List of metrics available for the run + """ + hits = await self.search( + index="metric_desc", + filters=[{"term": {"run.id": run}}], + ignore_unavailable=True, + **kwargs, + ) + met = {} + for h in self._hits(hits): + desc = h["metric_desc"] + name = desc["source"] + "::" + desc["type"] + if name in met: + record = met[name] + else: + record = {"periods": [], "breakouts": defaultdict(set)} + met[name] = record + if "period" in h: + record["periods"].append(h["period"]["id"]) + for n, v in desc["names"].items(): + record["breakouts"][n].add(v) + return met + + async def get_metric_breakouts( + self, + run: str, + metric: str, + names: Optional[list[str]] = None, + periods: Optional[list[str]] = None, + ) -> dict[str, Any]: + """Help explore available metric breakouts + + Args: + run: run ID + metric: metric label (e.g., "mpstat::Busy-CPU") + names: list of name filters ("cpu=3") + periods: list of period IDs + + Returns: + A description of all breakout names and values, which can be + specified to narrow down metrics returns by the data, summary, and + graph APIs. + + { + "label": "mpstat::Busy-CPU", + "class": [ + "throughput" + ], + "type": "Busy-CPU", + "source": "mpstat", + "breakouts": { + "num": [ + "8", + "72" + ], + "thread": [ + 0, + 1 + ] + } + } + """ + start = time.time() + filters = self._build_metric_filters(run, metric, names, periods) + metric_name = metric + ("" if not names else ("+" + ",".join(names))) + metrics = await self.search( + "metric_desc", + filters=filters, + ignore_unavailable=True, + ) + if len(metrics["hits"]["hits"]) < 1: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + f"Metric name {metric_name} not found for run {run}", + ) + classes = set() + response = {"label": metric, "class": classes} + breakouts = defaultdict(set) + pl = set() + for m in self._hits(metrics): + desc = m["metric_desc"] + response["type"] = desc["type"] + response["source"] = desc["source"] + if desc.get("class"): + classes.add(desc["class"]) + if "period" in m: + pl.add(m["period"]["id"]) + for n, v in desc["names"].items(): + breakouts[n].add(v) + # We want to help filter a consistent summary, so only show those + # names with more than one value. + if len(pl) > 1: + response["periods"] = pl + response["breakouts"] = {n: v for n, v in breakouts.items() if len(v) > 1} + duration = time.time() - start + print(f"Processing took {duration} seconds") + return response + + async def get_metrics_data( + self, + run: str, + metric: str, + names: Optional[list[str]] = None, + periods: Optional[list[str]] = None, + aggregate: bool = False, + ) -> list[Any]: + """Return a list of metric data + + The "aggregate" option allows aggregating various metrics across + breakout streams and periods: be careful, as this is meaningful only if + the breakout streams are sufficiently related. + + Args: + run: run ID + metric: metric label (e.g., "mpstat::Busy-CPU") + names: list of name filters ("cpu=3") + periods: list of period IDs + aggregate: aggregate multiple metric data streams + + Returns: + A sequence of data samples, showing the aggregate sample along with + the duration and end timestamp of each sample interval. + + [ + { + "begin": "2024-08-22 20:03:23.028000+00:00", + "end": "2024-08-22 20:03:37.127000+00:00", + "duration": 14.1, + "value": 9.35271216694379 + }, + { + "begin": "2024-08-22 20:03:37.128000+00:00", + "end": "2024-08-22 20:03:51.149000+00:00", + "duration": 14.022, + "value": 9.405932330557683 + }, + { + "begin": "2024-08-22 20:03:51.150000+00:00", + "end": "2024-08-22 20:04:05.071000+00:00", + "duration": 13.922, + "value": 9.478773265522682 + } + ] + """ + start = time.time() + ids = await self._get_metric_ids( + run, metric, names, periodlist=periods, aggregate=aggregate + ) + + # If we're searching by periods, filter metric data by the period + # timestamp range rather than just relying on the metric desc IDs as + # we also want to filter non-periodic tool data. + filters = [{"terms": {"metric_desc.id": ids}}] + filters.extend(await self._build_timestamp_range_filters(periods)) + + response = [] + if len(ids) > 1: + # Find the minimum sample interval of the selected metrics + aggdur = await self.search( + "metric_data", + size=0, + filters=filters, + aggregations={"duration": {"stats": {"field": "metric_data.duration"}}}, + ) + if aggdur["aggregations"]["duration"]["count"] > 0: + interval = int(aggdur["aggregations"]["duration"]["min"]) + data = await self.search( + index="metric_data", + size=0, + filters=filters, + aggregations={ + "interval": { + "histogram": { + "field": "metric_data.end", + "interval": interval, + }, + "aggs": {"value": {"sum": {"field": "metric_data.value"}}}, + } + }, + ) + for h in self._aggs(data, "interval"): + response.append( + { + "begin": self._format_timestamp(h["key"] - interval), + "end": self._format_timestamp(h["key"]), + "value": h["value"]["value"], + "duration": interval / 1000.0, + } + ) + else: + data = await self.search("metric_data", filters=filters) + for h in self._hits(data, ["metric_data"]): + response.append(self._format_data(h)) + response.sort(key=lambda a: a["end"]) + duration = time.time() - start + print(f"Processing took {duration} seconds") + return response + + async def get_metrics_summary( + self, + run: str, + metric: str, + names: Optional[list[str]] = None, + periods: Optional[list[str]] = None, + ) -> dict[str, Any]: + """Return a statistical summary of metric data + + Provides a statistical summary of selected data samples. + + Args: + run: run ID + metric: metric label (e.g., "mpstat::Busy-CPU") + names: list of name filters ("cpu=3") + periods: list of period IDs + + Returns: + A statistical summary of the selected metric data + + { + "count": 71, + "min": 0.0, + "max": 0.3296, + "avg": 0.02360704225352113, + "sum": 1.676self.BIGQUERY00000001 + } + """ + start = time.time() + ids = await self._get_metric_ids(run, metric, names, periodlist=periods) + filters = [{"terms": {"metric_desc.id": ids}}] + filters.extend(await self._build_timestamp_range_filters(periods)) + data = await self.search( + "metric_data", + size=0, + filters=filters, + aggregations={"score": {"stats": {"field": "metric_data.value"}}}, + ) + duration = time.time() - start + print(f"Processing took {duration} seconds") + return data["aggregations"]["score"] + + async def _graph_title( + self, + run_id: str, + run_id_list: list[str], + graph: Graph, + params_by_run: dict[str, Any], + periods_by_run: dict[str, Any], + ) -> str: + """Compute a default title for a graph + + Use the period, breakout name selections, run list, and iteration + parameters to construct a meaningful name for a graph. + + For example, "ilab::sdg-samples-sec (batch-size=4) {run 1}", or + "mpstat::Busy-CPU [cpu=4]" + + Args: + run_id: the Crucible run ID + run_id_list: ordered list of run IDs in our list of graphs + graph: the current Graph object + params_by_run: initially empty dict used to cache parameters + periods_by_run: initially empty dict used to cache periods + + Returns: + A string title + """ + names = graph.names + metric = graph.metric + if run_id not in params_by_run: + # Gather iteration parameters outside the loop for help in + # generating useful labels. + all_params = await self.search( + "param", filters=[{"term": {"run.id": run_id}}] + ) + collector = defaultdict(defaultdict) + for h in self._hits(all_params): + collector[h["iteration"]["id"]][h["param"]["arg"]] = h["param"]["val"] + params_by_run[run_id] = collector + else: + collector = params_by_run[run_id] + + if run_id not in periods_by_run: + periods = await self.search( + "period", filters=[{"term": {"run.id": run_id}}] + ) + iteration_periods = defaultdict(set) + for p in self._hits(periods): + iteration_periods[p["iteration"]["id"]].add(p["period"]["id"]) + periods_by_run[run_id] = iteration_periods + else: + iteration_periods = periods_by_run[run_id] + + # We can easily end up with multiple graphs across distinct + # periods or iterations, so we want to be able to provide some + # labeling to the graphs. We do this by looking for unique + # iteration parameters values, since the iteration number and + # period name aren't useful by themselves. + name_suffix = "" + if graph.periods: + iteration = None + for i, pset in iteration_periods.items(): + if set(graph.periods) <= pset: + iteration = i + break + + # If the period(s) we're graphing resolve to a single + # iteration in a run with multiple iterations, then we can + # try to find a unique title suffix based on distinct param + # values for that iteration. + if iteration and len(collector) > 1: + unique = collector[iteration].copy() + for i, params in collector.items(): + if i != iteration: + for p in list(unique.keys()): + if p in params and unique[p] == params[p]: + del unique[p] + if unique: + name_suffix = ( + " (" + ",".join([f"{p}={v}" for p, v in unique.items()]) + ")" + ) + + if len(run_id_list) > 1: + name_suffix += f" {{run {run_id_list.index(run_id) + 1}}}" + + options = (" [" + ",".join(names) + "]") if names else "" + return metric + options + name_suffix + + async def get_metrics_graph(self, graphdata: GraphList) -> dict[str, Any]: + """Return metrics data for a run + + Each run may have multiple performance metrics stored. This API allows + retrieving graphable time-series representation of a metric over the + period of the run, in the format defined by Plotly as configuration + settings plus an x value array and a y value array. + + { + "data": [ + { + "x": [ + "2024-08-27 09:16:27.371000", + ... + ], + "y": [ + 10.23444312132161, + ... + ], + "name": "Metric ilab::train-samples-sec", + "type": "scatter", + "mode": "line", + "marker": {"color": "black"}, + "labels": {"x": "sample timestamp", "y": "samples / second"} + } + ] + "layout": { + "width": 1500, + "yaxis": { + "title": "mpstat::Busy-CPU core=2,package=0,num=112,type=usr", + "color": "black" + } + } + } + + Args: + graphdata: A GraphList object + + Returns: + A Plotly object with layout + """ + start = time.time() + graphlist = [] + default_run_id = graphdata.run + layout: dict[str, Any] = {"width": "1500"} + axes = {} + yaxis = None + cindex = 0 + params_by_run = {} + periods_by_run = {} + + # Construct a de-duped ordered list of run IDs, starting with the + # default. + run_id_list = [] + if default_run_id: + run_id_list.append(default_run_id) + run_id_missing = False + for g in graphdata.graphs: + if g.run: + if g.run not in run_id_list: + run_id_list.append(g.run) + else: + run_id_missing = True + + if run_id_missing and not default_run_id: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, "each graph request must have a run ID" + ) + + for g in graphdata.graphs: + run_id = g.run if g.run else default_run_id + names = g.names + metric: str = g.metric + + # The caller can provide a title for each graph; but, if not, we + # journey down dark overgrown pathways to fabricate a default with + # reasonable context, including unique iteration parameters, + # breakdown selections, and which run provided the data. + if g.title: + title = g.title + else: + title = await self._graph_title( + run_id, run_id_list, g, params_by_run, periods_by_run + ) + + ids = await self._get_metric_ids( + run_id, + metric, + names, + periodlist=g.periods, + aggregate=g.aggregate, + ) + filters = [{"terms": {"metric_desc.id": ids}}] + filters.extend(await self._build_timestamp_range_filters(g.periods)) + y_max = 0.0 + points: list[Point] = [] + + # If we're pulling multiple breakouts, e.g., total CPU across modes + # or cores, we want to aggregate by timestamp interval. Sample + # timstamps don't necessarily align, so the "histogram" aggregation + # normalizes within the interval (based on the minimum actual + # interval duration). + if len(ids) > 1: + # Find the minimum sample interval of the selected metrics + aggdur = await self.search( + "metric_data", + size=0, + filters=filters, + aggregations={ + "duration": {"stats": {"field": "metric_data.duration"}} + }, + ) + if aggdur["aggregations"]["duration"]["count"] > 0: + interval = int(aggdur["aggregations"]["duration"]["min"]) + data = await self.search( + index="metric_data", + size=0, + filters=filters, + aggregations={ + "interval": { + "histogram": { + "field": "metric_data.begin", + "interval": interval, + }, + "aggs": { + "value": {"sum": {"field": "metric_data.value"}} + }, + } + }, + ) + for h in self._aggs(data, "interval"): + begin = int(h["key"]) + end = begin + interval - 1 + points.append(Point(begin, end, float(h["value"]["value"]))) + else: + data = await self.search("metric_data", filters=filters) + for h in self._hits(data, ["metric_data"]): + points.append( + Point(int(h["begin"]), int(h["end"]), float(h["value"])) + ) + + # Sort the graph points by timestamp so that Ploty will draw nice + # lines. We graph both the "begin" and "end" timestamp of each + # sample against the value to more clearly show the sampling + # interval. + x = [] + y = [] + + for p in sorted(points, key=lambda a: a.begin): + x.extend( + [self._format_timestamp(p.begin), self._format_timestamp(p.end)] + ) + y.extend([p.value, p.value]) + y_max = max(y_max, p.value) + + if g.color: + color = g.color + else: + color = colors[cindex] + cindex += 1 + if cindex >= len(colors): + cindex = 0 + graphitem = { + "x": x, + "y": y, + "name": title, + "type": "scatter", + "mode": "line", + "marker": {"color": color}, + "labels": { + "x": "sample timestamp", + "y": "samples / second", + }, + } + + # Y-axis scaling and labeling is divided by benchmark label; + # so store each we've created to reuse. (E.g., if we graph + # 5 different mpstat::Busy-CPU periods, they'll share a single + # Y axis.) + if metric in axes: + yref = axes[metric] + else: + if yaxis: + name = f"yaxis{yaxis}" + yref = f"y{yaxis}" + yaxis += 1 + layout[name] = { + "title": metric, + "color": color, + "autorange": True, + "anchor": "free", + "autoshift": True, + "overlaying": "y", + } + else: + name = "yaxis" + yref = "y" + yaxis = 2 + layout[name] = { + "title": metric, + "color": color, + } + axes[metric] = yref + graphitem["yaxis"] = yref + graphlist.append(graphitem) + duration = time.time() - start + print(f"Processing took {duration} seconds") + return {"data": graphlist, "layout": layout} From 55f9c2bcab8d54b3b4fcabf8023c2cf2328ee988 Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Thu, 17 Oct 2024 11:24:10 -0400 Subject: [PATCH 2/7] Add an `ilab` API module for InstructLab This builds on the `crucible_svc` layer in #122 to add a backend API. --- README.md | 7 +- backend/app/api/api.py | 5 + backend/app/api/v1/endpoints/ilab/ilab.py | 764 ++++++++++++++++++++++ backend/skeleton.toml | 5 + 4 files changed, 780 insertions(+), 1 deletion(-) create mode 100644 backend/app/api/v1/endpoints/ilab/ilab.py diff --git a/README.md b/README.md index 540ebdda..43b3ca21 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,11 @@ indice= username= password= +[.crucible] +url= +username= +password= + [ocp-server] port=8000 @@ -53,7 +58,7 @@ indice= username= password= ``` -**Note: The below applies only for the elastic search at the moment** +**Note: The below applies only for the elastic search at the moment** If you also have an archived internal instance that keeps track of older data, it can be specified with '.internal' suffix. Example of our `OCP` internal archived instance's configuration. ```toml [ocp.elasticsearch.internal] diff --git a/backend/app/api/api.py b/backend/app/api/api.py index b74b8ad4..c37735dc 100644 --- a/backend/app/api/api.py +++ b/backend/app/api/api.py @@ -1,3 +1,4 @@ +import sys from fastapi import APIRouter from app.api.v1.endpoints.ocp import results @@ -11,6 +12,7 @@ from app.api.v1.endpoints.telco import telcoJobs from app.api.v1.endpoints.telco import telcoGraphs from app.api.v1.endpoints.ocm import ocmJobs +from app.api.v1.endpoints.ilab import ilab router = APIRouter() @@ -39,3 +41,6 @@ # OCM endpoint router.include_router(ocmJobs.router, tags=['ocm']) + +# InstructLab endpoint +router.include_router(router=ilab.router, tags=['ilab']) diff --git a/backend/app/api/v1/endpoints/ilab/ilab.py b/backend/app/api/v1/endpoints/ilab/ilab.py new file mode 100644 index 00000000..a5b4b825 --- /dev/null +++ b/backend/app/api/v1/endpoints/ilab/ilab.py @@ -0,0 +1,764 @@ +"""Access RHEL AI InstructLab performance data through Crucible + +This defines an API to expose and filter performance data from InstructLab +CPT runs via a persistent Crucuble controller instance as defined in the +configuration path "ilab.crucible". +""" + +from datetime import datetime, timedelta, timezone +from typing import Annotated, Any, Optional + +from fastapi import APIRouter, Depends, Query + +from app.services.crucible_svc import CrucibleService, Graph, GraphList + +router = APIRouter() + + +CONFIGPATH = "ilab.crucible" + + +def example_response(response) -> dict[str, Any]: + return {"content": {"application/json": {"example": response}}} + + +def example_error(message: str) -> dict[str, Any]: + return example_response({"message": message}) + + +async def crucible_svc(): + crucible = None + try: + crucible = CrucibleService(CONFIGPATH) + yield crucible + finally: + if crucible: + await crucible.close() + + +@router.get( + "/api/v1/ilab/runs/filters", + summary="Returns possible filters", + description=( + "Returns a nested JSON object with all parameter and tag filter terms" + ), + responses={ + 200: example_response( + { + "param": { + "model": [ + "/home/models/granite-7b-redhat-lab", + "/home/models/granite-7b-lab/", + "/home/models/Mixtral-8x7B-Instruct-v0.1", + ], + "gpus": ["4"], + "workflow": ["train", "sdg", "train+eval"], + "data-path": [ + "/home/data/training/jun12-phase05.jsonl", + "/home/data/training/knowledge_data.jsonl", + "/home/data/training/jul19-knowledge-26k.jsonl", + "/home/data/jun12-phase05.jsonl", + ], + "nnodes": ["1"], + "train-until": ["checkpoint:1", "complete"], + "save-samples": ["5000", "2500", "10000"], + "deepspeed-cpu-offload-optimizer": ["0", "1"], + "deepspeed-cpu-offload-optimizer-pin-memory": ["0", "1"], + "batch-size": ["4", "8", "16", "12", "0"], + "cpu-offload-optimizer": ["1"], + "cpu-offload-pin-memory": ["1"], + "nproc-per-node": ["4"], + "num-runavg-samples": ["2", "6"], + "num-cpus": ["30"], + }, + "tag": {"topology": ["none"]}, + } + ) + }, +) +async def run_filters(crucible: Annotated[CrucibleService, Depends(crucible_svc)]): + return await crucible.get_run_filters() + + +@router.get( + "/api/v1/ilab/runs", + summary="Returns a list of InstructLab runs", + description="Returns a list of runs summary documents.", + responses={ + 200: example_response( + { + "results": [ + { + "benchmark": "ilab", + "email": "rhel-ai-user@example.com", + "id": "bd72561c-cc20-400b-b6f6-d9534a60033a", + "name": '"RHEL-AI User"', + "source": "n42-h01-b01-mx750c.example.com//var/lib/crucible/run/ilab--2024-09-11_19:43:53_UTC--bd72561c-cc20-400b-b6f6-d9534a60033a", + "status": "pass", + "begin_date": "1970-01-01 00:00:00+00:00", + "end_date": "1970-01-01 00:00:00+00:00", + "params": { + "gpus": "4", + "model": "/home/models/Mixtral-8x7B-Instruct-v0.1", + "workflow": "sdg", + }, + "iterations": [ + { + "iteration": 1, + "primary_metric": "ilab::sdg-samples-sec", + "primary_period": "measurement", + "status": "pass", + "params": { + "gpus": "4", + "model": "/home/models/Mixtral-8x7B-Instruct-v0.1", + "workflow": "sdg", + }, + } + ], + "primary_metrics": ["ilab::sdg-samples-sec"], + "tags": {"topology": "none"}, + } + ], + "count": 5, + "total": 21, + "startDate": "2024-08-19 20:42:52.239000+00:00", + "endDate": "2024-09-18 20:42:52.239000+00:00", + } + ), + 400: example_error( + "sort key 'bad' must be one of begin,benchmark,email,end,id,name,source,status" + ), + 422: example_error( + "invalid date format, start_date must be less than end_date" + ), + }, +) +async def runs( + crucible: Annotated[CrucibleService, Depends(crucible_svc)], + start_date: Annotated[ + Optional[str], + Query(description="Start time for search", examples=["2020-11-10"]), + ] = None, + end_date: Annotated[ + Optional[str], + Query(description="End time for search", examples=["2020-11-10"]), + ] = None, + filter: Annotated[ + Optional[list[str]], + Query( + description="Filter terms", examples=["tag:name=value", "param:name=value"] + ), + ] = None, + sort: Annotated[ + Optional[list[str]], + Query(description="Sort terms", examples=["start:asc", "status:desc"]), + ] = None, + size: Annotated[ + Optional[int], Query(description="Number of runs in a page", examples=[10]) + ] = None, + offset: Annotated[ + int, + Query(description="Page offset to start", examples=[10]), + ] = 0, +): + if start_date is None and end_date is None: + now = datetime.now(timezone.utc) + start = now - timedelta(days=30) + end = now + else: + start = start_date + end = end_date + return await crucible.get_runs( + start=start, end=end, filter=filter, sort=sort, size=size, offset=offset + ) + + +@router.get( + "/api/v1/ilab/runs/{run}/tags", + summary="Returns the Crucible tags for a run", + description="Returns tags for a specified Run ID.", + responses={ + 200: example_response({"topology": "none"}), + 400: example_error("Parameter error"), + }, +) +async def tags(crucible: Annotated[CrucibleService, Depends(crucible_svc)], run: str): + return await crucible.get_tags(run) + + +@router.get( + "/api/v1/ilab/runs/{run}/params", + summary="Returns the InstructLab parameters for a run", + description="Returns params for a specified Run ID by iteration plus common params.", + responses={ + 200: example_response( + { + "9D5AB7D6-510A-11EF-84ED-CCA69E6B5B5B": { + "num-runavg-samples": "2", + "cpu-offload-pin-memory": "1", + "nnodes": "1", + "cpu-offload-optimizer": "1", + "data-path": "/home/data/training/knowledge_data.jsonl", + "model": "/home/models/granite-7b-lab/", + "nproc-per-node": "4", + }, + "common": { + "num-runavg-samples": "2", + "cpu-offload-pin-memory": "1", + "nnodes": "1", + "cpu-offload-optimizer": "1", + "data-path": "/home/data/training/knowledge_data.jsonl", + "model": "/home/models/granite-7b-lab/", + "nproc-per-node": "4", + }, + } + ), + 400: example_error("Parameter error"), + }, +) +async def params(crucible: Annotated[CrucibleService, Depends(crucible_svc)], run: str): + return await crucible.get_params(run) + + +@router.get( + "/api/v1/ilab/runs/{run}/iterations", + summary="Returns a list of InstructLab run iterations", + description="Returns a list of iterations for a specified Run ID.", + responses={ + 200: example_response( + [ + { + "id": "6B98F650-7139-11EF-BB69-98B53E962BD1", + "num": 2, + "path": None, + "primary-metric": "ilab::sdg-samples-sec", + "primary-period": "measurement", + "status": "pass", + }, + { + "id": "6B99173E-7139-11EF-9434-F8BB3B1B9CFC", + "num": 5, + "path": None, + "primary-metric": "ilab::sdg-samples-sec", + "primary-period": "measurement", + "status": "pass", + }, + ] + ), + 400: example_error("Parameter error"), + }, +) +async def iterations( + crucible: Annotated[CrucibleService, Depends(crucible_svc)], run: str +): + return await crucible.get_iterations(run) + + +@router.get( + "/api/v1/ilab/runs/{run}/samples", + summary="Returns a list of InstructLab run samples", + description="Returns a list of samples for a specified Run ID.", + responses={ + 200: example_response( + [ + { + "id": "6BBE6872-7139-11EF-BFAA-8569A9399D61", + "num": "1", + "path": None, + "status": "pass", + "iteration": 5, + "primary_metric": "ilab::sdg-samples-sec", + }, + { + "id": "6BACDFA8-7139-11EF-9F33-8185DD5B4869", + "num": "1", + "path": None, + "status": "pass", + "iteration": 2, + "primary_metric": "ilab::sdg-samples-sec", + }, + ] + ), + 400: example_error("Parameter error"), + }, +) +async def run_samples( + crucible: Annotated[CrucibleService, Depends(crucible_svc)], run: str +): + return await crucible.get_samples(run) + + +@router.get( + "/api/v1/ilab/runs/{run}/periods", + summary="Returns a list of InstructLab run periods", + description="Returns a list of periods for a specified Run ID.", + responses={ + 200: example_response( + [ + { + "begin": "2024-09-12 17:40:27.982000+00:00", + "end": "2024-09-12 18:03:23.132000+00:00", + "id": "6BA57EF2-7139-11EF-A80B-E5037504B9B1", + "name": "measurement", + "iteration": 1, + "sample": "1", + "primary_metric": "ilab::sdg-samples-sec", + "status": "pass", + }, + { + "begin": "2024-09-12 18:05:03.229000+00:00", + "end": "2024-09-12 18:27:55.419000+00:00", + "id": "6BB93622-7139-11EF-A6C0-89A48E630F9D", + "name": "measurement", + "iteration": 4, + "sample": "1", + "primary_metric": "ilab::sdg-samples-sec", + "status": "pass", + }, + ] + ), + 400: example_error("Parameter error"), + }, +) +async def run_periods( + crucible: Annotated[CrucibleService, Depends(crucible_svc)], run: str +): + return await crucible.get_periods(run) + + +@router.get( + "/api/v1/ilab/iterations/{iteration}/samples", + summary="Returns a list of InstructLab iteration samples", + description="Returns a list of iterations for a specified iteration ID.", + responses={ + 200: example_response( + [ + { + "id": "6BBE6872-7139-11EF-BFAA-8569A9399D61", + "num": "1", + "path": None, + "status": "pass", + "iteration": 5, + "primary_metric": "ilab::sdg-samples-sec", + }, + { + "id": "6BACDFA8-7139-11EF-9F33-8185DD5B4869", + "num": "1", + "path": None, + "status": "pass", + "iteration": 2, + "primary_metric": "ilab::sdg-samples-sec", + }, + ] + ), + 400: example_error("Parameter error"), + }, +) +async def iteration_samples( + crucible: Annotated[CrucibleService, Depends(crucible_svc)], iteration: str +): + return await crucible.get_samples(iteration=iteration) + + +@router.get( + "/api/v1/ilab/runs/{run}/timeline", + summary="Returns the 'timeline' of a run", + description="Describes the sequence of iterations, samples, and periods.", + responses={ + 200: example_response( + { + "run": { + "id": "70d3b53f-c588-49a3-91c2-7fcf3927be7e", + "iterations": [ + { + "id": "BFC16DA6-60C8-11EF-AB10-CF940109872B", + "num": 1, + "path": None, + "primary-metric": "ilab::train-samples-sec", + "primary-period": "measurement", + "status": "pass", + "samples": [ + { + "id": "C021BECC-60C8-11EF-A619-E0BC70D6C320", + "num": "1", + "path": None, + "status": "pass", + "periods": [ + { + "begin": "2024-08-22 19:09:08.642000+00:00", + "end": "2024-08-22 20:04:32.889000+00:00", + "id": "C022CDC6-60C8-11EF-BA80-AFE7B4B2692B", + "name": "measurement", + } + ], + } + ], + } + ], + "begin": "2024-08-22 19:09:08.642000+00:00", + "end": "2024-08-22 20:04:32.889000+00:00", + } + } + ), + 400: example_error("Parameter error"), + }, +) +async def timeline( + crucible: Annotated[CrucibleService, Depends(crucible_svc)], run: str +): + return await crucible.get_timeline(run) + + +@router.get( + "/api/v1/ilab/runs/{run}/metrics", + summary="Describe the metrics collected for a run", + description="Returns metric labels along with breakout names and values.", + responses={ + 200: example_response( + { + "sar-net::packets-sec": { + "periods": [], + "breakouts": { + "benchmark-name": ["none"], + "benchmark-role": ["none"], + "csid": ["remotehosts-1-sysstat-1"], + "cstype": ["profiler"], + "dev": ["lo", "eno8303", "eno12399", "eno12409"], + "direction": ["rx", "tx"], + "endpoint-label": ["remotehosts-1"], + "engine-id": ["remotehosts-1-sysstat-1"], + "engine-role": ["profiler"], + "engine-type": ["profiler"], + "hosted-by": ["x.example.com"], + "hostname": ["x.example.com"], + "hypervisor-host": ["none"], + "osruntime": ["podman"], + "tool-name": ["sysstat"], + "type": ["virtual", "physical"], + "userenv": ["rhel-ai"], + }, + }, + }, + ), + 400: example_error("Parameter error"), + }, +) +async def metrics( + crucible: Annotated[CrucibleService, Depends(crucible_svc)], run: str +): + return await crucible.get_metrics_list(run) + + +@router.get( + "/api/v1/ilab/runs/{run}/breakouts/{metric}", + summary="Returns breakout options for a metric", + description="Describes the breakout names and available values for a run.", + responses={ + 200: example_response( + { + "label": "mpstat::Busy-CPU", + "class": ["throughput"], + "type": "Busy-CPU", + "source": "mpstat", + "breakouts": {"num": ["8", "72"], "thread": [0, 1]}, + } + ), + 400: example_error("Metric name not found for run "), + }, +) +async def metric_breakouts( + crucible: Annotated[CrucibleService, Depends(crucible_svc)], + run: str, + metric: str, + name: Annotated[ + Optional[list[str]], + Query( + description="List of name[=key] to match", + examples=["cpu=10", "cpu=10,cpu=110"], + ), + ] = None, + period: Annotated[ + Optional[list[str]], + Query( + description="List of periods to match", + examples=["", ","], + ), + ] = None, +): + return await crucible.get_metric_breakouts(run, metric, names=name, periods=period) + + +@router.get( + "/api/v1/ilab/runs/{run}/data/{metric}", + summary="Returns metric data collected for a run", + description="Returns data collected for a specified Run ID metric.", + responses={ + 200: example_response( + [ + { + "begin": "2024-08-22 20:04:05.072000+00:00", + "end": "2024-08-22 20:04:19.126000+00:00", + "duration": 14.055, + "value": 9.389257233311497, + }, + { + "begin": "2024-08-22 20:04:19.127000+00:00", + "end": "2024-08-22 20:04:32.889000+00:00", + "duration": 13.763, + "value": 9.552584444155011, + }, + ] + ), + 400: example_error("No matches for ilab::train-samples-sc+cpu=10"), + 422: example_response( + response={ + "detail": [ + { + "message": "More than one metric (2) probably means you should add filters", + "names": {"dev": ["sdb", "sdb3"]}, + "periods": [], + } + ] + } + ), + }, +) +async def metric_data( + crucible: Annotated[CrucibleService, Depends(crucible_svc)], + run: str, + metric: str, + name: Annotated[ + Optional[list[str]], + Query( + description="List of name[=key] to match", + examples=["cpu=10", "cpu=10,cpu=110"], + ), + ] = None, + period: Annotated[ + Optional[list[str]], + Query( + description="List of periods to match", + examples=["", ","], + ), + ] = None, + aggregate: Annotated[ + bool, Query(description="Allow aggregation of metrics") + ] = False, +): + return await crucible.get_metrics_data( + run, metric, names=name, periods=period, aggregate=aggregate + ) + + +@router.get( + "/api/v1/ilab/runs/{run}/summary/{metric}", + summary="Returns metric data collected for a run", + description="Returns data collected for a specified Run ID metric.", + responses={ + 200: example_response( + { + "count": 234, + "min": 7.905045031896648, + "max": 9.666444615077308, + "avg": 9.38298722585416, + "sum": 2195.6190108498736, + } + ), + 400: example_error("No matches for ilab::train-samples-sc+cpu=10"), + 422: example_response( + response={ + "detail": [ + { + "message": "More than one metric (2) probably means you should add filters", + "names": {"dev": ["sdb", "sdb3"]}, + "periods": [], + } + ] + } + ), + }, +) +async def metric_summary( + crucible: Annotated[CrucibleService, Depends(crucible_svc)], + run: str, + metric: str, + name: Annotated[ + Optional[list[str]], + Query( + description="List of name[=key] to match", + examples=["cpu=10", "cpu=10,cpu=110"], + ), + ] = None, + period: Annotated[ + Optional[list[str]], + Query( + description="List of periods to match", + examples=["", ","], + ), + ] = None, +): + return await crucible.get_metrics_summary(run, metric, names=name, periods=period) + + +@router.post( + "/api/v1/ilab/runs/multigraph", + summary="Returns overlaid Plotly graph objects", + description="Returns metric data in a form usable by the Plot React component.", + responses={ + 200: example_response( + response={ + "data": [ + { + "x": [ + "2024-09-05 21:50:07+00:00", + "2024-09-05 21:56:37+00:00", + "2024-09-05 21:56:37.001000+00:00", + "2024-09-05 21:56:52+00:00", + "2024-09-05 21:56:52.001000+00:00", + "2024-09-05 22:01:52+00:00", + ], + "y": [0.0, 0.0, 0.33, 0.33, 0.0, 0.0], + "name": "iostat::operations-merged-sec [cmd=read,dev=sdb]", + "type": "scatter", + "mode": "line", + "marker": {"color": "black"}, + "labels": {"x": "sample timestamp", "y": "samples / second"}, + "yaxis": "y", + }, + { + "x": [ + "2024-09-05 21:50:07+00:00", + "2024-09-05 21:56:37+00:00", + "2024-09-05 21:56:37.001000+00:00", + "2024-09-05 21:56:52+00:00", + "2024-09-05 21:56:52.001000+00:00", + "2024-09-05 22:01:52+00:00", + ], + "y": [0.0, 0.0, 0.33, 0.33, 0.0, 0.0], + "name": "iostat::operations-merged-sec [dev=sdb,cmd=read]", + "type": "scatter", + "mode": "line", + "marker": {"color": "purple"}, + "labels": {"x": "sample timestamp", "y": "samples / second"}, + "yaxis": "y", + }, + ], + "layout": { + "width": "1500", + "yaxis": { + "title": "iostat::operations-merged-sec", + "color": "black", + }, + }, + } + ), + 400: example_error("No matches for ilab::train-samples-sec"), + 422: example_response( + response={ + "detail": [ + { + "message": "More than one metric (2) probably means you should add filters", + "names": {"dev": ["sdb", "sdb3"]}, + "periods": [], + } + ] + } + ), + }, +) +async def metric_graph_body( + crucible: Annotated[CrucibleService, Depends(crucible_svc)], graphs: GraphList +): + return await crucible.get_metrics_graph(graphs) + + +@router.get( + "/api/v1/ilab/runs/{run}/graph/{metric}", + summary="Returns a single Plotly graph object for a run", + description="Returns metric data in a form usable by the Plot React component.", + responses={ + 200: example_response( + response={ + "data": [ + { + "x": [ + "2024-09-12 16:49:01+00:00", + "2024-09-12 18:04:31+00:00", + "2024-09-12 18:04:31.001000+00:00", + "2024-09-12 18:04:46+00:00", + "2024-09-12 18:04:46.001000+00:00", + "2024-09-12 18:53:16+00:00", + ], + "y": [0.0, 0.0, 1.4, 1.4, 0.0, 0.0], + "name": "iostat::operations-merged-sec [cmd=read,dev=sda]", + "type": "scatter", + "mode": "line", + "marker": {"color": "black"}, + "labels": { + "x": "sample timestamp", + "y": "samples / second", + }, + "yaxis": "y", + } + ], + "layout": { + "width": "1500", + "yaxis": { + "title": "iostat::operations-merged-sec", + "color": "black", + }, + }, + } + ), + 400: example_error("No matches for ilab::train-samples-sec"), + 422: example_response( + response={ + "detail": [ + { + "message": "More than one metric (2) probably means you should add filters", + "names": {"dev": ["sdb", "sdb3"]}, + "periods": [], + } + ] + } + ), + }, +) +async def metric_graph_param( + crucible: Annotated[CrucibleService, Depends(crucible_svc)], + run: str, + metric: str, + aggregate: Annotated[ + bool, Query(description="Allow aggregation of metrics") + ] = False, + name: Annotated[ + Optional[list[str]], + Query( + description="List of name[=key] to match", + examples=["cpu=10", "cpu=10,cpu=110"], + ), + ] = None, + period: Annotated[ + Optional[list[str]], + Query( + description="List of periods to match", + examples=["", ","], + ), + ] = None, + title: Annotated[Optional[str], Query(description="Title for graph")] = None, +): + return await crucible.get_metrics_graph( + GraphList( + run=run, + name=metric, + graphs=[ + Graph( + metric=metric, + aggregate=aggregate, + names=name, + periods=period, + title=title, + ) + ], + ) + ) diff --git a/backend/skeleton.toml b/backend/skeleton.toml index 81662a55..2aac4574 100644 --- a/backend/skeleton.toml +++ b/backend/skeleton.toml @@ -15,3 +15,8 @@ personal_access_token= url= username= password= + +[crucible] +url= +username= +password= From 176ddee15f4ff6cc3e10f96f39fa019e8e7d612a Mon Sep 17 00:00:00 2001 From: MVarshini Date: Thu, 17 Oct 2024 16:15:21 -0400 Subject: [PATCH 3/7] Add an ilab UI tab This relies on the ilab API in #123, which in turn builds on the crucible service in #122. --- frontend/README.md | 16 +- frontend/src/App.js | 58 ----- frontend/src/App.jsx | 2 + frontend/src/actions/filterActions.js | 4 + frontend/src/actions/ilabActions.js | 213 ++++++++++++++++ frontend/src/actions/paginationActions.js | 18 ++ frontend/src/actions/types.js | 12 + .../src/assets/constants/SidemenuConstants.js | 1 + .../molecules/ExpandedRow/index.jsx | 2 +- .../molecules/SideMenuOptions/index.jsx | 5 + .../components/organisms/Pagination/index.jsx | 11 + .../organisms/TableFilters/index.jsx | 3 +- .../components/templates/ILab/ILabGraph.jsx | 44 ++++ .../src/components/templates/ILab/MetaRow.jsx | 40 +++ .../templates/ILab/MetricsDropdown.jsx | 86 +++++++ .../components/templates/ILab/StatusCell.jsx | 24 ++ .../src/components/templates/ILab/index.jsx | 241 ++++++++++++++++++ .../src/components/templates/ILab/index.less | 13 + frontend/src/reducers/ilabReducer.js | 61 +++++ frontend/src/reducers/index.js | 2 + frontend/src/reducers/loadingReducer.js | 2 +- frontend/src/store/reducers/InitialData.js | 181 ------------- frontend/src/store/reducers/index.js | 18 -- frontend/src/utils/apiConstants.js | 5 +- frontend/src/utils/routeConstants.js | 1 + 25 files changed, 794 insertions(+), 269 deletions(-) delete mode 100644 frontend/src/App.js create mode 100644 frontend/src/actions/ilabActions.js create mode 100644 frontend/src/components/templates/ILab/ILabGraph.jsx create mode 100644 frontend/src/components/templates/ILab/MetaRow.jsx create mode 100644 frontend/src/components/templates/ILab/MetricsDropdown.jsx create mode 100644 frontend/src/components/templates/ILab/StatusCell.jsx create mode 100644 frontend/src/components/templates/ILab/index.jsx create mode 100644 frontend/src/components/templates/ILab/index.less create mode 100644 frontend/src/reducers/ilabReducer.js delete mode 100644 frontend/src/store/reducers/InitialData.js delete mode 100644 frontend/src/store/reducers/index.js diff --git a/frontend/README.md b/frontend/README.md index 0b01bbaf..99101f03 100644 --- a/frontend/README.md +++ b/frontend/README.md @@ -21,21 +21,21 @@ The `utils` directory has all helper/utility scripts. #### [`src/reducers`](src/reducers) -Contains functions that manage store via actions +Contains functions that manage store via actions -## Cloning and Running the Application Locally +## Cloning and Running the Application Locally -- Install [Node.js](https://nodejs.org) +- Install [Node.js](https://nodejs.org) - Clone the [CPT Dashboard code](https://github.com/cloud-bulldozer/cpt-dashboard) to a local file system - Install all the npm packages -Type the following command to install all npm packages +Type the following command to install all npm packages ```bash $ npm install ``` -In order to run the application use the following command +In order to run the application use the following command ```bash $ npm run dev @@ -56,12 +56,12 @@ Then, copy the `build` folder to the proper place on the server for deployment. ## Template -This application is based on v5 of PatternFly which is a production-ready UI solution for admin interfaces. For more information regarding the foundation and template of the application, please visit [PatternFly](https://www.patternfly.org/get-started/develop) +This application is based on v5 of PatternFly which is a production-ready UI solution for admin interfaces. For more information regarding the foundation and template of the application, please visit [PatternFly](https://www.patternfly.org/get-started/develop) ## Resources -- [Vite](https://vitejs.dev/guide/) +- [Vite](https://vitejs.dev/guide/) -- [ReactJS](https://reactjs.org/) +- [ReactJS](https://reactjs.org/) - [React-Redux](https://github.com/reduxjs/react-redux) diff --git a/frontend/src/App.js b/frontend/src/App.js deleted file mode 100644 index 4b8c6382..00000000 --- a/frontend/src/App.js +++ /dev/null @@ -1,58 +0,0 @@ -import React, {useEffect} from 'react'; -import '@patternfly/react-core/dist/styles/base.css'; - -import { - Page, - PageSection, - PageSectionVariants, -} from '@patternfly/react-core'; -import {fetchOCPJobsData, fetchCPTJobsData, fetchQuayJobsData, fetchTelcoJobsData} from "./store/Actions/ActionCreator"; -import {useDispatch} from "react-redux"; -import {Route, Switch, BrowserRouter as Router} from "react-router-dom"; -import {NavBar} from "./components/NavBar/NavBar"; -import {HomeView} from "./components/Home/HomeView"; -import {OCPHome} from './components/OCP/OCPHome'; -import {QuayHome} from './components/Quay/QuayHome'; -import {TelcoHome} from './components/Telco/TelcoHome'; - - -export const App = () => { - const dispatch = useDispatch() - - useEffect(() => { - const fetchData = async () =>{ - await dispatch(fetchOCPJobsData()) - await dispatch(fetchCPTJobsData()) - await dispatch(fetchQuayJobsData()) - await dispatch(fetchTelcoJobsData()) - } - fetchData() - }, [dispatch]) - - - - - return ( - - } - groupProps={{ - stickyOnBreakpoint: { default: 'top' }, - sticky: 'top' - }} - > - - - - - - - - - - - - ); -}; - -export default App diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index c5f48549..d93c960e 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -5,6 +5,7 @@ import * as APP_ROUTES from "./utils/routeConstants"; import { BrowserRouter, Route, Routes } from "react-router-dom"; import Home from "./components/templates/Home"; +import ILab from "./components/templates/ILab"; import MainLayout from "./containers/MainLayout"; import OCP from "./components/templates/OCP"; import Quay from "./components/templates/Quay"; @@ -26,6 +27,7 @@ function App() { } /> } /> } /> + } /> diff --git a/frontend/src/actions/filterActions.js b/frontend/src/actions/filterActions.js index 7f565887..f8fa5691 100644 --- a/frontend/src/actions/filterActions.js +++ b/frontend/src/actions/filterActions.js @@ -1,3 +1,4 @@ +import { fetchILabJobs, setIlabDateFilter } from "./ilabActions"; import { removeCPTAppliedFilters, setCPTAppliedFilters, @@ -76,6 +77,9 @@ export const setDateFilter = (date, key, navigation, currType) => { dispatch(setQuayDateFilter(date, key, navigation)); } else if (currType === "telco") { dispatch(setTelcoDateFilter(date, key, navigation)); + } else if (currType === "ilab") { + dispatch(setIlabDateFilter(date, key, navigation)); + dispatch(fetchILabJobs()); } }; diff --git a/frontend/src/actions/ilabActions.js b/frontend/src/actions/ilabActions.js new file mode 100644 index 00000000..1152846b --- /dev/null +++ b/frontend/src/actions/ilabActions.js @@ -0,0 +1,213 @@ +import * as API_ROUTES from "@/utils/apiConstants"; +import * as TYPES from "./types.js"; + +import API from "@/utils/axiosInstance"; +import { appendQueryString } from "@/utils/helper"; +import { cloneDeep } from "lodash"; +import { showFailureToast } from "@/actions/toastActions"; + +export const fetchILabJobs = + (shouldStartFresh = false) => + async (dispatch, getState) => { + try { + dispatch({ type: TYPES.LOADING }); + const { start_date, end_date, size, offset, results } = getState().ilab; + const response = await API.get(API_ROUTES.ILABS_JOBS_API_V1, { + params: { + ...(start_date && { start_date }), + ...(end_date && { end_date }), + ...(size && { size }), + ...(offset && { offset }), + }, + }); + if (response.status === 200 && response?.data?.results.length > 0) { + const startDate = response.data.startDate, + endDate = response.data.endDate; + dispatch({ + type: TYPES.SET_ILAB_JOBS_DATA, + payload: shouldStartFresh + ? response.data.results + : [...results, ...response.data.results], + }); + + dispatch({ + type: TYPES.SET_ILAB_DATE_FILTER, + payload: { + start_date: startDate, + end_date: endDate, + }, + }); + + dispatch({ + type: TYPES.SET_ILAB_TOTAL_ITEMS, + payload: response.data.total, + }); + dispatch({ + type: TYPES.SET_ILAB_OFFSET, + payload: response.data.next_offset, + }); + + dispatch(tableReCalcValues()); + } + } catch (error) { + dispatch(showFailureToast()); + } + dispatch({ type: TYPES.COMPLETED }); + }; +export const sliceIlabTableRows = + (startIdx, endIdx) => (dispatch, getState) => { + const results = [...getState().ilab.results]; + + dispatch({ + type: TYPES.SET_ILAB_INIT_JOBS, + payload: results.slice(startIdx, endIdx), + }); + }; +export const setIlabDateFilter = + (start_date, end_date, navigate) => (dispatch, getState) => { + const appliedFilters = getState().ilab.appliedFilters; + + dispatch({ + type: TYPES.SET_ILAB_DATE_FILTER, + payload: { + start_date, + end_date, + }, + }); + + appendQueryString({ ...appliedFilters, start_date, end_date }, navigate); + }; + +export const fetchMetricsInfo = (uid) => async (dispatch) => { + try { + dispatch({ type: TYPES.LOADING }); + const response = await API.get(`/api/v1/ilab/runs/${uid}/metrics`); + if (response.status === 200) { + if ( + response.data.constructor === Object && + Object.keys(response.data).length > 0 + ) { + dispatch({ + type: TYPES.SET_ILAB_METRICS, + payload: { uid, metrics: Object.keys(response.data) }, + }); + } + } + } catch (error) { + console.error(error); + dispatch(showFailureToast()); + } + dispatch({ type: TYPES.COMPLETED }); +}; + +export const fetchPeriods = (uid) => async (dispatch) => { + try { + dispatch({ type: TYPES.LOADING }); + const response = await API.get(`/api/v1/ilab/runs/${uid}/periods`); + if (response.status === 200) { + dispatch({ + type: TYPES.SET_ILAB_PERIODS, + payload: { uid, periods: response.data }, + }); + } + } catch (error) { + console.error( + `ERROR (${error?.response?.status}): ${JSON.stringify( + error?.response?.data + )}` + ); + dispatch(showFailureToast()); + } + dispatch({ type: TYPES.COMPLETED }); +}; + +export const fetchGraphData = + (uid, metric, primary_metric) => async (dispatch, getState) => { + try { + const periods = getState().ilab.periods.find((i) => i.uid == uid); + const graphData = cloneDeep(getState().ilab.graphData); + const filterData = graphData.filter((i) => i.uid !== uid); + dispatch({ + type: TYPES.SET_ILAB_GRAPH_DATA, + payload: filterData, + }); + const copyData = cloneDeep(filterData); + dispatch({ type: TYPES.GRAPH_LOADING }); + let graphs = []; + periods?.periods?.forEach((p) => { + graphs.push({ metric: p.primary_metric, periods: [p.id] }); + graphs.push({ + metric, + aggregate: true, + periods: [p.id], + }); + }); + const response = await API.post(`/api/v1/ilab/runs/multigraph`, { + run: uid, + name: primary_metric, + graphs, + }); + if (response.status === 200) { + copyData.push({ + uid, + data: response.data.data, + layout: response.data.layout, + }); + dispatch({ + type: TYPES.SET_ILAB_GRAPH_DATA, + payload: copyData, + }); + } + } catch (error) { + console.error( + `ERROR (${error?.response?.status}): ${JSON.stringify( + error?.response?.data + )}` + ); + dispatch(showFailureToast()); + } + dispatch({ type: TYPES.GRAPH_COMPLETED }); + }; + +export const setIlabPage = (pageNo) => ({ + type: TYPES.SET_ILAB_PAGE, + payload: pageNo, +}); + +export const setIlabPageOptions = (page, perPage) => ({ + type: TYPES.SET_ILAB_PAGE_OPTIONS, + payload: { page, perPage }, +}); + +export const checkIlabJobs = (newPage) => (dispatch, getState) => { + const results = cloneDeep(getState().ilab.results); + const { totalItems, perPage } = getState().ilab; + + const startIdx = (newPage - 1) * perPage; + const endIdx = newPage * perPage; + + if ( + (typeof results[startIdx] === "undefined" || + typeof results[endIdx] === "undefined") && + results.length < totalItems + ) { + dispatch(fetchILabJobs()); + } +}; + +export const setSelectedMetrics = (id, metrics) => (dispatch, getState) => { + const metrics_selected = cloneDeep(getState().ilab.metrics_selected); + metrics_selected[id] = metrics; + dispatch({ + type: TYPES.SET_ILAB_SELECTED_METRICS, + payload: metrics_selected, + }); +}; + +export const tableReCalcValues = () => (dispatch, getState) => { + const { page, perPage } = getState().ilab; + + const startIdx = page !== 1 ? (page - 1) * perPage : 0; + const endIdx = page !== 1 ? page * perPage - 1 : perPage; + dispatch(sliceIlabTableRows(startIdx, endIdx)); +}; diff --git a/frontend/src/actions/paginationActions.js b/frontend/src/actions/paginationActions.js index 80a7dff1..1717a82a 100644 --- a/frontend/src/actions/paginationActions.js +++ b/frontend/src/actions/paginationActions.js @@ -3,9 +3,17 @@ import { setCPTPageOptions, sliceCPTTableRows, } from "./homeActions"; +import { + setIlabPage, + setIlabPageOptions, + sliceIlabTableRows, +} from "./ilabActions"; import { setOCPPage, setOCPPageOptions, sliceOCPTableRows } from "./ocpActions"; import { setQuayPage, setQuayPageOptions } from "./quayActions"; import { setTelcoPage, setTelcoPageOptions } from "./telcoActions"; + +import { checkIlabJobs } from "./ilabActions"; + export const setPage = (newPage, currType) => (dispatch) => { if (currType === "cpt") { dispatch(setCPTPage(newPage)); @@ -15,6 +23,8 @@ export const setPage = (newPage, currType) => (dispatch) => { dispatch(setQuayPage(newPage)); } else if (currType === "telco") { dispatch(setTelcoPage(newPage)); + } else if (currType === "ilab") { + dispatch(setIlabPage(newPage)); } }; @@ -27,6 +37,8 @@ export const setPageOptions = (newPage, newPerPage, currType) => (dispatch) => { dispatch(setQuayPageOptions(newPage, newPerPage)); } else if (currType === "telco") { dispatch(setTelcoPageOptions(newPage, newPerPage)); + } else if (currType === "ilab") { + dispatch(setIlabPageOptions(newPage, newPerPage)); } }; @@ -35,5 +47,11 @@ export const sliceTableRows = (startIdx, endIdx, currType) => (dispatch) => { dispatch(sliceCPTTableRows(startIdx, endIdx)); } else if (currType === "ocp") { dispatch(sliceOCPTableRows(startIdx, endIdx)); + } else if (currType === "ilab") { + dispatch(sliceIlabTableRows(startIdx, endIdx)); } }; + +export const fetchNextJobs = (newPage) => (dispatch) => { + dispatch(checkIlabJobs(newPage)); +}; diff --git a/frontend/src/actions/types.js b/frontend/src/actions/types.js index 1804cf21..58d7506f 100644 --- a/frontend/src/actions/types.js +++ b/frontend/src/actions/types.js @@ -77,3 +77,15 @@ export const SET_TELCO_SELECTED_FILTERS = "SET_TELCO_SELECTED_FILTERS"; export const SET_TELCO_SUMMARY = "SET_TELCO_SUMMARY"; export const SET_TELCO_COLUMNS = "SET_TELCO_COLUMNS"; export const SET_TELCO_GRAPH_DATA = "SET_TELCO_GRAPH_DATA"; +/* ILAB JOBS */ +export const SET_ILAB_JOBS_DATA = "SET_ILAB_JOBS_DATA"; +export const SET_ILAB_DATE_FILTER = "SET_ILAB_DATE_FILTER"; +export const SET_ILAB_GRAPH_DATA = "SET_ILAB_GRAPH_DATA"; +export const SET_ILAB_TOTAL_ITEMS = "SET_ILAB_TOTAL_ITEMS"; +export const SET_ILAB_OFFSET = "SET_ILAB_OFFSET"; +export const SET_ILAB_PAGE = "SET_ILAB_PAGE"; +export const SET_ILAB_PAGE_OPTIONS = "SET_ILAB_PAGE_OPTIONS"; +export const SET_ILAB_METRICS = "SET_ILAB_METRICS"; +export const SET_ILAB_SELECTED_METRICS = "SET_ILAB_SELECTED_METRICS"; +export const SET_ILAB_PERIODS = "SET_ILAB_PERIODS"; +export const SET_ILAB_INIT_JOBS = "SET_ILAB_INIT_JOBS"; diff --git a/frontend/src/assets/constants/SidemenuConstants.js b/frontend/src/assets/constants/SidemenuConstants.js index bc04fd52..e65a2103 100644 --- a/frontend/src/assets/constants/SidemenuConstants.js +++ b/frontend/src/assets/constants/SidemenuConstants.js @@ -2,3 +2,4 @@ export const HOME_NAV = "home"; export const QUAY_NAV = "quay"; export const OCP_NAV = "ocp"; export const TELCO_NAV = "telco"; +export const ILAB_NAV = "ilab"; diff --git a/frontend/src/components/molecules/ExpandedRow/index.jsx b/frontend/src/components/molecules/ExpandedRow/index.jsx index 981d5660..8fcc2d48 100644 --- a/frontend/src/components/molecules/ExpandedRow/index.jsx +++ b/frontend/src/components/molecules/ExpandedRow/index.jsx @@ -42,7 +42,7 @@ const RowContent = (props) => { }, []); return ( - + {content.map((unit) => ( diff --git a/frontend/src/components/molecules/SideMenuOptions/index.jsx b/frontend/src/components/molecules/SideMenuOptions/index.jsx index 48bed8de..17a00160 100644 --- a/frontend/src/components/molecules/SideMenuOptions/index.jsx +++ b/frontend/src/components/molecules/SideMenuOptions/index.jsx @@ -28,6 +28,11 @@ const sideMenuOptions = [ key: "telco", displayName: "Telco", }, + { + id: CONSTANTS.ILAB_NAV, + key: "ilab", + displayName: "ILAB", + }, ]; const MenuOptions = () => { diff --git a/frontend/src/components/organisms/Pagination/index.jsx b/frontend/src/components/organisms/Pagination/index.jsx index 7b316a21..deb8d8fe 100644 --- a/frontend/src/components/organisms/Pagination/index.jsx +++ b/frontend/src/components/organisms/Pagination/index.jsx @@ -1,5 +1,6 @@ import { Pagination, PaginationVariant } from "@patternfly/react-core"; import { + fetchNextJobs, setPage, setPageOptions, sliceTableRows, @@ -13,6 +14,7 @@ const RenderPagination = (props) => { const dispatch = useDispatch(); const perPageOptions = [ + { title: "10", value: 10 }, { title: "25", value: 25 }, { title: "50", value: 50 }, { title: "100", value: 100 }, @@ -21,6 +23,7 @@ const RenderPagination = (props) => { const onSetPage = useCallback( (_evt, newPage, _perPage, startIdx, endIdx) => { dispatch(setPage(newPage, props.type)); + dispatch(sliceTableRows(startIdx, endIdx, props.type)); }, [dispatch, props.type] @@ -28,11 +31,17 @@ const RenderPagination = (props) => { const onPerPageSelect = useCallback( (_evt, newPerPage, newPage, startIdx, endIdx) => { dispatch(setPageOptions(newPage, newPerPage, props.type)); + dispatch(sliceTableRows(startIdx, endIdx, props.type)); }, [dispatch, props.type] ); + const checkAndFetch = (_evt, newPage) => { + if (props.type === "ilab") { + dispatch(fetchNextJobs(newPage)); + } + }; return ( { perPage={props.perPage} page={props.page} variant={PaginationVariant.bottom} + onNextClick={checkAndFetch} perPageOptions={perPageOptions} onSetPage={onSetPage} onPerPageSelect={onPerPageSelect} + onPageInput={checkAndFetch} /> ); }; diff --git a/frontend/src/components/organisms/TableFilters/index.jsx b/frontend/src/components/organisms/TableFilters/index.jsx index c5f5ae62..0dd5885d 100644 --- a/frontend/src/components/organisms/TableFilters/index.jsx +++ b/frontend/src/components/organisms/TableFilters/index.jsx @@ -124,7 +124,8 @@ const TableFilter = (props) => { )} - {Object.keys(appliedFilters).length > 0 && + {appliedFilters && + Object.keys(appliedFilters).length > 0 && Object.keys(appliedFilters).map((key) => ( {getFilterName(key)} : diff --git a/frontend/src/components/templates/ILab/ILabGraph.jsx b/frontend/src/components/templates/ILab/ILabGraph.jsx new file mode 100644 index 00000000..c41300ba --- /dev/null +++ b/frontend/src/components/templates/ILab/ILabGraph.jsx @@ -0,0 +1,44 @@ +import Plot from "react-plotly.js"; +import PropType from "prop-types"; +import { cloneDeep } from "lodash"; +import { uid } from "@/utils/helper"; +import { useSelector } from "react-redux"; + +const ILabGraph = (props) => { + const { item } = props; + const isGraphLoading = useSelector((state) => state.loading.isGraphLoading); + const { graphData } = useSelector((state) => state.ilab); + + const graphDataCopy = cloneDeep(graphData); + + const getGraphData = (id) => { + const data = graphDataCopy?.filter((a) => a.uid === id); + return data; + }; + const hasGraphData = (uuid) => { + const hasData = getGraphData(uuid).length > 0; + + return hasData; + }; + + return ( + <> + {hasGraphData(item.id) ? ( + + ) : isGraphLoading && !hasGraphData(item.id) ? ( +
+ ) : ( + <> + )} + + ); +}; + +ILabGraph.propTypes = { + item: PropType.object, +}; +export default ILabGraph; diff --git a/frontend/src/components/templates/ILab/MetaRow.jsx b/frontend/src/components/templates/ILab/MetaRow.jsx new file mode 100644 index 00000000..c196e79f --- /dev/null +++ b/frontend/src/components/templates/ILab/MetaRow.jsx @@ -0,0 +1,40 @@ +import { Table, Tbody, Th, Thead, Tr } from "@patternfly/react-table"; + +import Proptypes from "prop-types"; +import { Title } from "@patternfly/react-core"; +import { uid } from "@/utils/helper"; + +const MetaRow = (props) => { + const { metadata, heading } = props; + return ( + <> + + {heading} + + + + + + + + + + {metadata.map((item) => ( + + + + + ))} + +
+ Key + Value
{item[0]}{item[1]}
+ + ); +}; + +MetaRow.propTypes = { + heading: Proptypes.string, + metadata: Proptypes.array, +}; +export default MetaRow; diff --git a/frontend/src/components/templates/ILab/MetricsDropdown.jsx b/frontend/src/components/templates/ILab/MetricsDropdown.jsx new file mode 100644 index 00000000..f301953d --- /dev/null +++ b/frontend/src/components/templates/ILab/MetricsDropdown.jsx @@ -0,0 +1,86 @@ +import { + MenuToggle, + Select, + SelectList, + SelectOption, +} from "@patternfly/react-core"; +import { fetchGraphData, setSelectedMetrics } from "@/actions/ilabActions"; +import { useDispatch, useSelector } from "react-redux"; + +import { cloneDeep } from "lodash"; +import { uid } from "@/utils/helper"; +import { useState } from "react"; + +const MetricsSelect = (props) => { + const { metrics, metrics_selected } = useSelector((state) => state.ilab); + const { item } = props; + /* Metrics select */ + const [isOpen, setIsOpen] = useState(false); + const dispatch = useDispatch(); + // const [selected, setSelected] = useState("Select a value"); + + const toggle1 = (toggleRef, selected) => ( + + {selected} + + ); + + const onToggleClick = () => { + setIsOpen(!isOpen); + }; + const onSelect = (_event, value) => { + console.log("selected", value); + const run = value.split("*"); + //setSelected(run[1].trim()); + dispatch(setSelectedMetrics(run[0].trim(), run[1].trim())); + setIsOpen(false); + dispatch(fetchGraphData(run[0].trim(), run[1].trim(), run[2].trim())); + }; + const metricsDataCopy = cloneDeep(metrics); + + const getMetricsData = (id) => { + const data = metricsDataCopy?.filter((a) => a.uid === id); + return data; + }; + const hasMetricsData = (uuid) => { + const hasData = getMetricsData(uuid).length > 0; + + return hasData; + }; + /* Metrics select */ + return ( + <> + {hasMetricsData(item.id) && ( + + )} + + ); +}; + +export default MetricsSelect; diff --git a/frontend/src/components/templates/ILab/StatusCell.jsx b/frontend/src/components/templates/ILab/StatusCell.jsx new file mode 100644 index 00000000..a4bd208f --- /dev/null +++ b/frontend/src/components/templates/ILab/StatusCell.jsx @@ -0,0 +1,24 @@ +import { + CheckCircleIcon, + ExclamationCircleIcon, +} from "@patternfly/react-icons"; + +import { Label } from "@patternfly/react-core"; +import Proptype from "prop-types"; + +const StatusCell = (props) => { + return props.value?.toLowerCase() === "pass" ? ( + + ) : ( + + ); +}; +StatusCell.propTypes = { + value: Proptype.string, +}; + +export default StatusCell; diff --git a/frontend/src/components/templates/ILab/index.jsx b/frontend/src/components/templates/ILab/index.jsx new file mode 100644 index 00000000..d1eb5d62 --- /dev/null +++ b/frontend/src/components/templates/ILab/index.jsx @@ -0,0 +1,241 @@ +import "./index.less"; + +import { + Accordion, + AccordionContent, + AccordionItem, + AccordionToggle, + Card, + CardBody, +} from "@patternfly/react-core"; +import { + ExpandableRowContent, + Table, + Tbody, + Td, + Th, + Thead, + Tr, +} from "@patternfly/react-table"; +import { + fetchILabJobs, + fetchMetricsInfo, + fetchPeriods, + setIlabDateFilter, +} from "@/actions/ilabActions"; +import { formatDateTime, uid } from "@/utils/helper"; +import { useDispatch, useSelector } from "react-redux"; +import { useEffect, useState } from "react"; +import { useNavigate, useSearchParams } from "react-router-dom"; + +import ILabGraph from "./ILabGraph"; +import MetaRow from "./MetaRow"; +import MetricsSelect from "./MetricsDropdown"; +import RenderPagination from "@/components/organisms/Pagination"; +import StatusCell from "./StatusCell"; +import TableFilter from "@/components/organisms/TableFilters"; + +const ILab = () => { + const dispatch = useDispatch(); + const navigate = useNavigate(); + const [searchParams] = useSearchParams(); + + const { start_date, end_date } = useSelector((state) => state.ilab); + const [expandedResult, setExpandedResult] = useState([]); + const [expanded, setAccExpanded] = useState(["bordered-toggle1"]); + + const onToggle = (id) => { + const index = expanded.indexOf(id); + const newExpanded = + index >= 0 + ? [ + ...expanded.slice(0, index), + ...expanded.slice(index + 1, expanded.length), + ] + : [...expanded, id]; + setAccExpanded(newExpanded); + }; + const isResultExpanded = (res) => expandedResult?.includes(res); + const setExpanded = async (run, isExpanding = true) => { + setExpandedResult((prevExpanded) => { + const otherExpandedRunNames = prevExpanded.filter((r) => r !== run.id); + return isExpanding + ? [...otherExpandedRunNames, run.id] + : otherExpandedRunNames; + }); + if (isExpanding) { + dispatch(fetchPeriods(run.id)); + dispatch(fetchMetricsInfo(run.id)); + } + }; + + const { totalItems, page, perPage, tableData } = useSelector( + (state) => state.ilab + ); + + useEffect(() => { + if (searchParams.size > 0) { + // date filter is set apart + const startDate = searchParams.get("start_date"); + const endDate = searchParams.get("end_date"); + + searchParams.delete("start_date"); + searchParams.delete("end_date"); + const params = Object.fromEntries(searchParams); + const obj = {}; + for (const key in params) { + obj[key] = params[key].split(","); + } + dispatch(setIlabDateFilter(startDate, endDate, navigate)); + } + }, []); + + useEffect(() => { + dispatch(fetchILabJobs()); + }, [dispatch]); + + const columnNames = { + benchmark: "Benchmark", + email: "Email", + name: "Name", + source: "Source", + metric: "Metric", + begin_date: "Start Date", + end_date: "End Date", + status: "Status", + }; + + return ( + <> + + + + + + + + + + + + {tableData.map((item, rowIndex) => ( + <> + + + + + + + + + + + ))} + +
+ {columnNames.metric}{columnNames.begin_date}{columnNames.end_date}{columnNames.status}
+ setExpanded(item, !isResultExpanded(item.id)), + expandId: `expandId-${uid()}`, + }} + /> + + {item.primary_metrics[0]}{formatDateTime(item.begin_date)}{formatDateTime(item.end_date)} + +
+ + + + { + onToggle("bordered-toggle1"); + }} + isExpanded={expanded.includes("bordered-toggle1")} + id="bordered-toggle1" + > + Metadata + + + +
+ + + + + + + + + + + + + + + +
+
+
+ + { + onToggle("bordered-toggle2"); + }} + isExpanded={expanded.includes("bordered-toggle2")} + id="bordered-toggle2" + > + Metrics & Graph + + + Metrics: +
+ +
+
+
+
+
+
+ + + ); +}; + +export default ILab; diff --git a/frontend/src/components/templates/ILab/index.less b/frontend/src/components/templates/ILab/index.less new file mode 100644 index 00000000..02cd02cb --- /dev/null +++ b/frontend/src/components/templates/ILab/index.less @@ -0,0 +1,13 @@ +.pf-v5-c-accordion__expandable-content-body { + display: block; +} +.metadata-wrapper { + display: flex; + flex-direction: row; + margin-bottom: 1vw; + .metadata-card { + flex: 1; /* additionally, equal width */ + padding: 1em; + margin-right: 1.5vw; + } +} diff --git a/frontend/src/reducers/ilabReducer.js b/frontend/src/reducers/ilabReducer.js new file mode 100644 index 00000000..2f1bcd91 --- /dev/null +++ b/frontend/src/reducers/ilabReducer.js @@ -0,0 +1,61 @@ +import * as TYPES from "@/actions/types"; + +const initialState = { + results: [], + start_date: "", + end_date: "", + graphData: [], + totalItems: 0, + page: 1, + perPage: 10, + size: 10, + offset: 0, + metrics: [], + periods: [], + metrics_selected: {}, + tableData: [], +}; +const ILabReducer = (state = initialState, action = {}) => { + const { type, payload } = action; + switch (type) { + case TYPES.SET_ILAB_JOBS_DATA: + return { + ...state, + results: payload, + }; + case TYPES.SET_ILAB_DATE_FILTER: + return { + ...state, + start_date: payload.start_date, + end_date: payload.end_date, + }; + case TYPES.SET_ILAB_TOTAL_ITEMS: + return { + ...state, + totalItems: payload, + }; + case TYPES.SET_ILAB_OFFSET: + return { ...state, offset: payload }; + case TYPES.SET_ILAB_PAGE: + return { ...state, page: payload }; + case TYPES.SET_ILAB_PAGE_OPTIONS: + return { ...state, page: payload.page, perPage: payload.perPage }; + case TYPES.SET_ILAB_METRICS: + return { ...state, metrics: [...state.metrics, payload] }; + case TYPES.SET_ILAB_PERIODS: + return { ...state, periods: [...state.periods, payload] }; + case TYPES.SET_ILAB_SELECTED_METRICS: + return { + ...state, + metrics_selected: payload, + }; + case TYPES.SET_ILAB_GRAPH_DATA: + return { ...state, graphData: payload }; + case TYPES.SET_ILAB_INIT_JOBS: + return { ...state, tableData: payload }; + default: + return state; + } +}; + +export default ILabReducer; diff --git a/frontend/src/reducers/index.js b/frontend/src/reducers/index.js index 1fb4c555..43970170 100644 --- a/frontend/src/reducers/index.js +++ b/frontend/src/reducers/index.js @@ -1,4 +1,5 @@ import HomeReducer from "./homeReducer"; +import ILabReducer from "./ilabReducer"; import LoadingReducer from "./loadingReducer"; import OCPReducer from "./ocpReducer"; import QuayReducer from "./quayReducer"; @@ -15,4 +16,5 @@ export default combineReducers({ ocp: OCPReducer, quay: QuayReducer, telco: TelcoReducer, + ilab: ILabReducer, }); diff --git a/frontend/src/reducers/loadingReducer.js b/frontend/src/reducers/loadingReducer.js index 496a4e65..52f0c732 100644 --- a/frontend/src/reducers/loadingReducer.js +++ b/frontend/src/reducers/loadingReducer.js @@ -7,7 +7,7 @@ import { const initialState = { isLoading: false, - isGraphLoading: true, + isGraphLoading: false, }; const LoadingReducer = (state = initialState, action = {}) => { diff --git a/frontend/src/store/reducers/InitialData.js b/frontend/src/store/reducers/InitialData.js deleted file mode 100644 index 80503b3c..00000000 --- a/frontend/src/store/reducers/InitialData.js +++ /dev/null @@ -1,181 +0,0 @@ - -export const OCP_INITIAL_DATA = { - initialState: true, - success: 0, - failure: 0, - total: 0, - others: 0, - duration:0, - benchmarks: ["All"], - versions: ["All"], - workers: ["All"], - ciSystems: ["All"], - networkTypes: ["All"], - jobTypes: ["All"], - rehearses: ["All"], - allIpsec: ["All"], - allFips: ["All"], - allEncrypted: ["All"], - encryptionTypes: ["All"], - allPublish: ["All"], - computeArchs: ["All"], - controlPlaneArchs: ["All"], - jobStatuses: ["All"], - selectedBenchmark: "All", - selectedVersion: "All", - selectedPlatform: "All", - selectedWorkerCount: "All", - selectedNetworkType: "All", - selectedCiSystem: "All", - selectedJobType: "All", - selectedRehearse: "All", - selectedIpsec: "All", - selectedFips: "All", - selectedEncrypted: "All", - selectedEncryptionType: "All", - selectedPublish: "All", - selectedComputeArch: "All", - selectedControlPlaneArch: "All", - selectedJobStatus: "All", - waitForUpdate: false, - platforms: ["All"], - copyData: [], - data: [], - updatedTime: 'Loading', - error: null, - startDate: '', - endDate: '', - tableData : [{ name: "Benchmark", value: "benchmark" }, - {name:"Release Stream", value: "releaseStream"}, - {name:"Build", value: "build"}, - {name: "Worker Count", value: "workerNodesCount"}, - {name: "Start Date", value: "startDate"}, - {name: "End Date", value: "endDate"}, - {name: "Status", value: "jobStatus"}], -} - -export const QUAY_INITIAL_DATA = { - initialState: true, - success: 0, - failure: 0, - total: 0, - others: 0, - duration:0, - ciSystems: ["All"], - platforms: ["All"], - benchmarks: ["All"], - releaseStreams: ["All"], - workers: ["All"], - hitSizes: ["All"], - concurrencies: ["All"], - imagePushPulls: ["All"], - selectedCiSystem: "All", - selectedPlatform: "All", - selectedBenchmark: "All", - selectedReleaseStream: "All", - selectedWorkerCount: "All", - selectedHitSize: "All", - selectedConcurrency: "All", - selectedImagePushPulls: "All", - waitForUpdate: false, - copyData: [], - data: [], - updatedTime: 'Loading', - error: null, - startDate: '', - endDate: '', - tableData : [{ name: "Benchmark", value: "benchmark" }, - {name:"Release Stream", value: "releaseStream"}, - {name:"Platform", value: "platform"}, - {name: "Worker Count", value: "workerNodesCount"}, - {name: "Start Date", value: "startDate"}, - {name: "End Date", value: "endDate"}, - {name: "Status", value: "jobStatus"}], -} - -export const TELCO_INITIAL_DATA = { - initialState: true, - success: 0, - failure: 0, - total: 0, - others: 0, - duration:0, - ciSystems: ["All"], - benchmarks: ["All"], - versions: ["All"], - releaseStreams: ["All"], - formals: ["All"], - nodeNames: ["All"], - cpus: ["All"], - selectedCiSystem: "All", - selectedBenchmark: "All", - selectedVersion: "All", - selectedReleaseStream: "All", - selectedFormal: "All", - selectedCpu: "All", - selectedNodeName: "All", - waitForUpdate: false, - copyData: [], - data: [], - updatedTime: 'Loading', - error: null, - startDate: '', - endDate: '', - tableData : [{ name: "Benchmark", value: "benchmark" }, - {name:"Release Stream", value: "releaseStream"}, - {name:"Build", value: "ocpVersion"}, - {name:"CPU", value: "cpu"}, - {name:"Node Name", value: "nodeName"}, - {name: "Start Date", value: "startDate"}, - {name: "End Date", value: "endDate"}, - {name: "Status", value: "jobStatus"}], -} - -export const CPT_INITIAL_DATA = { - initialState: true, - success: 0, - failure: 0, - total: 0, - others: 0, - testNames: ["All"], - products: ["All"], - ciSystems: ["All"], - statuses: ["All"], - releaseStreams: ["All"], - selectedCiSystem: "All", - selectedProduct: "All", - selectedTestName: "All", - selectedJobStatus: "All", - selectedReleaseStream: "All", - waitForUpdate: false, - copyData: [], - data: [], - updatedTime: 'Loading', - error: null, - startDate: '', - endDate: '', - tableData : [{name:"Product", value: "product"}, - { name: "CI System", value: "ciSystem" }, - {name: "Test Name", value: "testName"}, - {name: "Version", value: "version"}, - {name: "Release Stream", value: "releaseStream"}, - {name: "Start Date", value: "startDate"}, - {name: "End Date", value: "endDate"}, - {name: "Build URL", value: "buildUrl"}, - {name: "Status", value: "jobStatus"},], -} - -export const GRAPH_INITIAL_DATA = { - uuid_results: {}, - graphError: false, -} - -export const QUAY_GRAPH_INITIAL_DATA = { - uuid_results: {}, - graphError: false, -} - -export const TELCO_GRAPH_INITIAL_DATA = { - uuid_results: {}, - graphError: false, -} diff --git a/frontend/src/store/reducers/index.js b/frontend/src/store/reducers/index.js deleted file mode 100644 index fe4fddad..00000000 --- a/frontend/src/store/reducers/index.js +++ /dev/null @@ -1,18 +0,0 @@ -import ocpJobsReducer from "./OCPJobsReducer"; -import cptJobsReducer from "./CPTJobsReducer"; -import quayJobsReducer from "./QuayJobsReducer"; -import telcoJobsReducer from "./TelcoJobsReducer"; -import graphReducer from "./GraphReducer"; -import quayGraphReducer from "./QuayGraphReducer"; -import telcoGraphReducer from "./TelcoGraphReducer"; - - -export const rootReducer = { - 'ocpJobs': ocpJobsReducer, - 'cptJobs': cptJobsReducer, - 'quayJobs': quayJobsReducer, - 'telcoJobs': telcoJobsReducer, - 'graph': graphReducer, - 'quayGraph': quayGraphReducer, - 'telcoGraph': telcoGraphReducer, -} diff --git a/frontend/src/utils/apiConstants.js b/frontend/src/utils/apiConstants.js index 52576b4a..33fe0ccd 100644 --- a/frontend/src/utils/apiConstants.js +++ b/frontend/src/utils/apiConstants.js @@ -1,7 +1,7 @@ export const getUrl = () => { const { hostname, protocol } = window.location; return hostname === "localhost" - ? "http://localhost:8000" + ? "http://0.0.0.0:8000" : `${protocol}//${hostname}`; }; @@ -17,3 +17,6 @@ export const QUAY_GRAPH_API_V1 = "/api/v1/quay/graph"; export const TELCO_JOBS_API_V1 = "/api/v1/telco/jobs"; export const TELCO_GRAPH_API_V1 = "/api/v1/telco/graph"; + +export const ILABS_JOBS_API_V1 = "/api/v1/ilab/runs"; +export const ILAB_GRAPH_API_V1 = "/api/v1/ilab/runs/"; diff --git a/frontend/src/utils/routeConstants.js b/frontend/src/utils/routeConstants.js index 53f271fa..c46bab55 100644 --- a/frontend/src/utils/routeConstants.js +++ b/frontend/src/utils/routeConstants.js @@ -2,3 +2,4 @@ export const HOME = "Home"; export const OCP = "OCP"; export const QUAY = "QUAY"; export const TELCO = "TELCO"; +export const ILAB = "ILAB"; From 7215bb297956a8e08dc37600eb1d398f023a7415 Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Fri, 18 Oct 2024 13:06:58 -0400 Subject: [PATCH 4/7] Support graphing multiple run comparisons When graphing metrics from two runs, the timestamps rarely align; so we add a `relative` option to convert the absolute metric timestamps into relative delta seconds from each run's start. --- backend/app/services/crucible_svc.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/backend/app/services/crucible_svc.py b/backend/app/services/crucible_svc.py index 7ae9ac12..fd0d310e 100644 --- a/backend/app/services/crucible_svc.py +++ b/backend/app/services/crucible_svc.py @@ -63,14 +63,21 @@ class GraphList(BaseModel): omitted if all Graph objects specify a run ID. (This is most useful to select a set of graphs all for a single run ID.) + Normally the X axis will be the actual sample timestamp values; if you + specify relative=True, the X axis will be the duration from the first + timestamp of the metric series, in seconds. This allows graphs of similar + runs started at different times to be overlaid. + Fields: run: Specify the (default) run ID name: Specify a name for the set of graphs + relative: True for relative timescale in seconds graphs: a list of Graph objects """ run: Optional[str] = None name: str + relative: bool = False graphs: list[Graph] @@ -1180,6 +1187,7 @@ async def get_runs( "params": iparams, } ) + run["iterations"].sort(key=lambda i: i["iteration"]) run["params"] = common.render() try: run["begin_date"] = self._format_timestamp(run["begin"]) @@ -1926,10 +1934,19 @@ async def get_metrics_graph(self, graphdata: GraphList) -> dict[str, Any]: x = [] y = [] + first = None + for p in sorted(points, key=lambda a: a.begin): - x.extend( - [self._format_timestamp(p.begin), self._format_timestamp(p.end)] - ) + if graphdata.relative: + if not first: + first = p.begin + s = (p.begin - first) / 1000.0 + e = (p.end - first) / 1000.0 + x.extend([s, e]) + else: + x.extend( + [self._format_timestamp(p.begin), self._format_timestamp(p.end)] + ) y.extend([p.value, p.value]) y_max = max(y_max, p.value) From 1fb02a0009a3948ee2704c494f4c4cd45d3815f8 Mon Sep 17 00:00:00 2001 From: MVarshini Date: Thu, 24 Oct 2024 16:55:13 -0400 Subject: [PATCH 5/7] Multi-run comparison UI This adds the basic UI to support comparison of the metrics of two InstructLab runs. This compares only the primary metrics of the two runs, in a relative timeline graph. This is backed by #125, which is backed by #124, which is backed by #123, which is backed by #122. These represent a series of steps towards a complete InstructLab UI and API, and will be reviewed and merged from #122 forward. --- frontend/src/actions/filterActions.js | 2 +- frontend/src/actions/ilabActions.js | 112 ++++++++- frontend/src/actions/types.js | 3 + .../organisms/TableFilters/index.jsx | 19 +- .../organisms/TableFilters/index.less | 4 + .../templates/ILab/IlabCompareComponent.jsx | 117 ++++++++++ .../templates/ILab/IlabExpandedRow.jsx | 148 ++++++++++++ .../templates/ILab/MetricsDropdown.jsx | 13 +- .../src/components/templates/ILab/index.jsx | 213 ++++++------------ .../src/components/templates/ILab/index.less | 30 ++- frontend/src/reducers/ilabReducer.js | 9 + 11 files changed, 512 insertions(+), 158 deletions(-) create mode 100644 frontend/src/components/templates/ILab/IlabCompareComponent.jsx create mode 100644 frontend/src/components/templates/ILab/IlabExpandedRow.jsx diff --git a/frontend/src/actions/filterActions.js b/frontend/src/actions/filterActions.js index f8fa5691..6385b0bc 100644 --- a/frontend/src/actions/filterActions.js +++ b/frontend/src/actions/filterActions.js @@ -79,7 +79,7 @@ export const setDateFilter = (date, key, navigation, currType) => { dispatch(setTelcoDateFilter(date, key, navigation)); } else if (currType === "ilab") { dispatch(setIlabDateFilter(date, key, navigation)); - dispatch(fetchILabJobs()); + dispatch(fetchILabJobs(true)); } }; diff --git a/frontend/src/actions/ilabActions.js b/frontend/src/actions/ilabActions.js index 1152846b..c3782735 100644 --- a/frontend/src/actions/ilabActions.js +++ b/frontend/src/actions/ilabActions.js @@ -122,7 +122,8 @@ export const fetchPeriods = (uid) => async (dispatch) => { }; export const fetchGraphData = - (uid, metric, primary_metric) => async (dispatch, getState) => { + (uid, metric = null) => + async (dispatch, getState) => { try { const periods = getState().ilab.periods.find((i) => i.uid == uid); const graphData = cloneDeep(getState().ilab.graphData); @@ -136,15 +137,17 @@ export const fetchGraphData = let graphs = []; periods?.periods?.forEach((p) => { graphs.push({ metric: p.primary_metric, periods: [p.id] }); - graphs.push({ - metric, - aggregate: true, - periods: [p.id], - }); + if (metric) { + graphs.push({ + metric, + aggregate: true, + periods: [p.id], + }); + } }); const response = await API.post(`/api/v1/ilab/runs/multigraph`, { run: uid, - name: primary_metric, + name: `graph ${uid}`, graphs, }); if (response.status === 200) { @@ -169,6 +172,86 @@ export const fetchGraphData = dispatch({ type: TYPES.GRAPH_COMPLETED }); }; +export const handleMultiGraph = (uids) => async (dispatch, getState) => { + try { + const periods = getState().ilab.periods; + const pUids = periods.map((i) => i.uid); + + const missingPeriods = uids.filter(function (x) { + return pUids.indexOf(x) < 0; + }); + + await Promise.all( + missingPeriods.map(async (uid) => { + await dispatch(fetchPeriods(uid)); // Dispatch each item + }) + ); + + dispatch(fetchMultiGraphData(uids)); + } catch (error) { + console.error( + `ERROR (${error?.response?.status}): ${JSON.stringify( + error?.response?.data + )}` + ); + dispatch(showFailureToast()); + } +}; +export const fetchMultiGraphData = (uids) => async (dispatch, getState) => { + try { + dispatch({ type: TYPES.LOADING }); + const periods = getState().ilab.periods; + const filterPeriods = periods.filter((item) => uids.includes(item.uid)); + + let graphs = []; + uids.forEach(async (uid) => { + const periods = filterPeriods.find((i) => i.uid == uid); + periods?.periods?.forEach((p) => { + graphs.push({ + run: uid, + metric: p.primary_metric, + periods: [p.id], + }); + // graphs.push({ + // run: uid, + // metric, + // aggregate: true, + // periods: [p.id], + // }); + }); + }); + console.log(graphs); + const response = await API.post(`/api/v1/ilab/runs/multigraph`, { + name: "comparison", + relative: true, + graphs, + }); + if (response.status === 200) { + response.data.layout["showlegend"] = true; + response.data.layout["responsive"] = "true"; + response.data.layout["autosize"] = "true"; + response.data.layout["legend"] = { x: 0, y: 1.5 }; + const graphData = []; + graphData.push({ + data: response.data.data, + layout: response.data.layout, + }); + dispatch({ + type: TYPES.SET_ILAB_MULTIGRAPH_DATA, + payload: graphData, + }); + } + } catch (error) { + console.error( + `ERROR (${error?.response?.status}): ${JSON.stringify( + error?.response?.data + )}` + ); + dispatch(showFailureToast()); + } + dispatch({ type: TYPES.COMPLETED }); +}; + export const setIlabPage = (pageNo) => ({ type: TYPES.SET_ILAB_PAGE, payload: pageNo, @@ -210,4 +293,19 @@ export const tableReCalcValues = () => (dispatch, getState) => { const startIdx = page !== 1 ? (page - 1) * perPage : 0; const endIdx = page !== 1 ? page * perPage - 1 : perPage; dispatch(sliceIlabTableRows(startIdx, endIdx)); + dispatch(getMetaRowdId()); +}; + +export const getMetaRowdId = () => (dispatch, getState) => { + const tableData = getState().ilab.tableData; + const metaId = tableData.map((item) => `metadata-toggle-${item.id}`); + dispatch(setMetaRowExpanded(metaId)); }; +export const toggleComparisonSwitch = () => ({ + type: TYPES.TOGGLE_COMPARISON_SWITCH, +}); + +export const setMetaRowExpanded = (expandedItems) => ({ + type: TYPES.SET_EXPANDED_METAROW, + payload: expandedItems, +}); diff --git a/frontend/src/actions/types.js b/frontend/src/actions/types.js index 58d7506f..f7e21fec 100644 --- a/frontend/src/actions/types.js +++ b/frontend/src/actions/types.js @@ -81,6 +81,7 @@ export const SET_TELCO_GRAPH_DATA = "SET_TELCO_GRAPH_DATA"; export const SET_ILAB_JOBS_DATA = "SET_ILAB_JOBS_DATA"; export const SET_ILAB_DATE_FILTER = "SET_ILAB_DATE_FILTER"; export const SET_ILAB_GRAPH_DATA = "SET_ILAB_GRAPH_DATA"; +export const SET_ILAB_MULTIGRAPH_DATA = "SET_ILAB_MULTIGRAPH_DATA"; export const SET_ILAB_TOTAL_ITEMS = "SET_ILAB_TOTAL_ITEMS"; export const SET_ILAB_OFFSET = "SET_ILAB_OFFSET"; export const SET_ILAB_PAGE = "SET_ILAB_PAGE"; @@ -89,3 +90,5 @@ export const SET_ILAB_METRICS = "SET_ILAB_METRICS"; export const SET_ILAB_SELECTED_METRICS = "SET_ILAB_SELECTED_METRICS"; export const SET_ILAB_PERIODS = "SET_ILAB_PERIODS"; export const SET_ILAB_INIT_JOBS = "SET_ILAB_INIT_JOBS"; +export const TOGGLE_COMPARISON_SWITCH = "TOGGLE_COMPARISON_SWITCH"; +export const SET_EXPANDED_METAROW = "SET_EXPANDED_METAROW"; diff --git a/frontend/src/components/organisms/TableFilters/index.jsx b/frontend/src/components/organisms/TableFilters/index.jsx index 0dd5885d..cec3fee0 100644 --- a/frontend/src/components/organisms/TableFilters/index.jsx +++ b/frontend/src/components/organisms/TableFilters/index.jsx @@ -5,6 +5,7 @@ import "./index.less"; import { Chip, ChipGroup, + Switch, Toolbar, ToolbarContent, ToolbarItem, @@ -39,6 +40,8 @@ const TableFilter = (props) => { setColumns, selectedFilters, updateSelectedFilter, + onSwitchChange, + isSwitchChecked, } = props; const category = @@ -66,7 +69,7 @@ const TableFilter = (props) => { setDateFilter(date, key, navigation, type); }; const endDateChangeHandler = (date, key) => { - setDateFilter(key, date, navigation, type); + setDateFilter(date, key, navigation, type); }; return ( @@ -123,6 +126,18 @@ const TableFilter = (props) => { )} + {type === "ilab" && ( + + + + + + )} {appliedFilters && Object.keys(appliedFilters).length > 0 && @@ -154,5 +169,7 @@ TableFilter.propTypes = { selectedFilters: PropTypes.array, updateSelectedFilter: PropTypes.func, navigation: PropTypes.func, + isSwitchChecked: PropTypes.bool, + onSwitchChange: PropTypes.func, }; export default TableFilter; diff --git a/frontend/src/components/organisms/TableFilters/index.less b/frontend/src/components/organisms/TableFilters/index.less index b100a012..1a479703 100644 --- a/frontend/src/components/organisms/TableFilters/index.less +++ b/frontend/src/components/organisms/TableFilters/index.less @@ -11,4 +11,8 @@ .to-text { padding: 5px 0; } + #comparison-switch { + margin-left: auto; + align-content: center; + } } \ No newline at end of file diff --git a/frontend/src/components/templates/ILab/IlabCompareComponent.jsx b/frontend/src/components/templates/ILab/IlabCompareComponent.jsx new file mode 100644 index 00000000..c062be13 --- /dev/null +++ b/frontend/src/components/templates/ILab/IlabCompareComponent.jsx @@ -0,0 +1,117 @@ +import "./index.less"; + +import { + Button, + Menu, + MenuContent, + MenuItem, + MenuItemAction, + MenuList, + Title, +} from "@patternfly/react-core"; +import { useDispatch, useSelector } from "react-redux"; + +import { InfoCircleIcon } from "@patternfly/react-icons"; +import Plot from "react-plotly.js"; +import PropTypes from "prop-types"; +import RenderPagination from "@/components/organisms/Pagination"; +import { cloneDeep } from "lodash"; +import { handleMultiGraph } from "@/actions/ilabActions.js"; +import { uid } from "@/utils/helper"; +import { useState } from "react"; + +const IlabCompareComponent = () => { + // const { data } = props; + const { page, perPage, totalItems, tableData } = useSelector( + (state) => state.ilab + ); + const dispatch = useDispatch(); + const [selectedItems, setSelectedItems] = useState([]); + const { multiGraphData } = useSelector((state) => state.ilab); + const isGraphLoading = useSelector((state) => state.loading.isGraphLoading); + const graphDataCopy = cloneDeep(multiGraphData); + + const onSelect = (_event, itemId) => { + const item = itemId; + if (selectedItems.includes(item)) { + setSelectedItems(selectedItems.filter((id) => id !== item)); + } else { + setSelectedItems([...selectedItems, item]); + } + }; + const dummy = () => { + dispatch(handleMultiGraph(selectedItems)); + }; + return ( +
+
+ + Metrics + + + + + + {tableData.map((item) => { + return ( + } + actionId="code" + onClick={() => console.log("clicked on code icon")} + aria-label="Code" + /> + } + > + {`${new Date(item.begin_date).toLocaleDateString()} ${ + item.primary_metrics[0] + }`} + + ); + })} + + + + +
+
+ {isGraphLoading ? ( +
+ ) : graphDataCopy?.length > 0 && + graphDataCopy?.[0]?.data?.length > 0 ? ( +
+ +
+ ) : ( +
No data to compare
+ )} +
+
+ ); +}; + +IlabCompareComponent.propTypes = { + data: PropTypes.array, +}; +export default IlabCompareComponent; diff --git a/frontend/src/components/templates/ILab/IlabExpandedRow.jsx b/frontend/src/components/templates/ILab/IlabExpandedRow.jsx new file mode 100644 index 00000000..bcdbcc33 --- /dev/null +++ b/frontend/src/components/templates/ILab/IlabExpandedRow.jsx @@ -0,0 +1,148 @@ +import { + Accordion, + AccordionContent, + AccordionItem, + AccordionToggle, + Card, + CardBody, +} from "@patternfly/react-core"; +import { useDispatch, useSelector } from "react-redux"; + +import ILabGraph from "./ILabGraph"; +import MetaRow from "./MetaRow"; +import MetricsSelect from "./MetricsDropdown"; +import PropTypes from "prop-types"; +import { setMetaRowExpanded } from "@/actions/ilabActions"; +import { uid } from "@/utils/helper"; + +const IlabRowContent = (props) => { + const { item } = props; + const dispatch = useDispatch(); + const { metaRowExpanded } = useSelector((state) => state.ilab); + + const onToggle = (id) => { + const index = metaRowExpanded.indexOf(id); + const newExpanded = + index >= 0 + ? [ + ...metaRowExpanded.slice(0, index), + ...metaRowExpanded.slice(index + 1, metaRowExpanded.length), + ] + : [...metaRowExpanded, id]; + + dispatch(setMetaRowExpanded(newExpanded)); + }; + return ( + + + { + onToggle(`metadata-toggle-${item.id}`); + }} + isExpanded={metaRowExpanded.includes(`metadata-toggle-${item.id}`)} + id={`metadata-toggle-${item.id}`} + > + Metadata + + + +
+ + + + + + + + + + + + + + {item.iterations.length > 1 && ( + + { + onToggle(`iterations-toggle-${item.id}`); + }} + isExpanded={metaRowExpanded.includes( + `iterations-toggle-${item.id}` + )} + id={`iterations-toggle-${item.id}`} + > + {`Unique parameters for ${item.iterations.length} Iterations`} + + + {item.iterations.map((i) => ( + !(i[0] in item.params) + )} + /> + ))} + + + )} + + +
+
+
+ + { + onToggle(`graph-toggle-${item.id}`); + }} + isExpanded={metaRowExpanded.includes(`graph-toggle-${item.id}`)} + id={`graph-toggle-${item.id}`} + > + Metrics & Graph + + +
Metrics:
+ +
+ +
+
+
+
+ ); +}; +IlabRowContent.propTypes = { + item: PropTypes.object, +}; +export default IlabRowContent; diff --git a/frontend/src/components/templates/ILab/MetricsDropdown.jsx b/frontend/src/components/templates/ILab/MetricsDropdown.jsx index f301953d..04568f0f 100644 --- a/frontend/src/components/templates/ILab/MetricsDropdown.jsx +++ b/frontend/src/components/templates/ILab/MetricsDropdown.jsx @@ -3,10 +3,12 @@ import { Select, SelectList, SelectOption, + Skeleton } from "@patternfly/react-core"; import { fetchGraphData, setSelectedMetrics } from "@/actions/ilabActions"; import { useDispatch, useSelector } from "react-redux"; +import PropTypes from "prop-types"; import { cloneDeep } from "lodash"; import { uid } from "@/utils/helper"; import { useState } from "react"; @@ -41,7 +43,7 @@ const MetricsSelect = (props) => { //setSelected(run[1].trim()); dispatch(setSelectedMetrics(run[0].trim(), run[1].trim())); setIsOpen(false); - dispatch(fetchGraphData(run[0].trim(), run[1].trim(), run[2].trim())); + dispatch(fetchGraphData(run[0].trim(), run[1].trim())); }; const metricsDataCopy = cloneDeep(metrics); @@ -57,7 +59,7 @@ const MetricsSelect = (props) => { /* Metrics select */ return ( <> - {hasMetricsData(item.id) && ( + {hasMetricsData(item.id) ? ( - )} + ): + + } ); }; +MetricsSelect.propTypes = { + item: PropTypes.object, +}; export default MetricsSelect; diff --git a/frontend/src/components/templates/ILab/index.jsx b/frontend/src/components/templates/ILab/index.jsx index d1eb5d62..d728b44a 100644 --- a/frontend/src/components/templates/ILab/index.jsx +++ b/frontend/src/components/templates/ILab/index.jsx @@ -1,13 +1,5 @@ import "./index.less"; -import { - Accordion, - AccordionContent, - AccordionItem, - AccordionToggle, - Card, - CardBody, -} from "@patternfly/react-core"; import { ExpandableRowContent, Table, @@ -22,15 +14,15 @@ import { fetchMetricsInfo, fetchPeriods, setIlabDateFilter, + toggleComparisonSwitch, } from "@/actions/ilabActions"; import { formatDateTime, uid } from "@/utils/helper"; import { useDispatch, useSelector } from "react-redux"; import { useEffect, useState } from "react"; import { useNavigate, useSearchParams } from "react-router-dom"; -import ILabGraph from "./ILabGraph"; -import MetaRow from "./MetaRow"; -import MetricsSelect from "./MetricsDropdown"; +import IlabCompareComponent from "./IlabCompareComponent"; +import IlabRowContent from "./IlabExpandedRow"; import RenderPagination from "@/components/organisms/Pagination"; import StatusCell from "./StatusCell"; import TableFilter from "@/components/organisms/TableFilters"; @@ -40,21 +32,17 @@ const ILab = () => { const navigate = useNavigate(); const [searchParams] = useSearchParams(); - const { start_date, end_date } = useSelector((state) => state.ilab); + const { + start_date, + end_date, + comparisonSwitch, + tableData, + page, + perPage, + totalItems, + } = useSelector((state) => state.ilab); const [expandedResult, setExpandedResult] = useState([]); - const [expanded, setAccExpanded] = useState(["bordered-toggle1"]); - const onToggle = (id) => { - const index = expanded.indexOf(id); - const newExpanded = - index >= 0 - ? [ - ...expanded.slice(0, index), - ...expanded.slice(index + 1, expanded.length), - ] - : [...expanded, id]; - setAccExpanded(newExpanded); - }; const isResultExpanded = (res) => expandedResult?.includes(res); const setExpanded = async (run, isExpanding = true) => { setExpandedResult((prevExpanded) => { @@ -69,10 +57,6 @@ const ILab = () => { } }; - const { totalItems, page, perPage, tableData } = useSelector( - (state) => state.ilab - ); - useEffect(() => { if (searchParams.size > 0) { // date filter is set apart @@ -105,6 +89,9 @@ const ILab = () => { status: "Status", }; + const onSwitchChange = () => { + dispatch(toggleComparisonSwitch()); + }; return ( <> { type={"ilab"} showColumnMenu={false} navigation={navigate} + isSwitchChecked={comparisonSwitch} + onSwitchChange={onSwitchChange} /> - - - - - - - - - - - {tableData.map((item, rowIndex) => ( - <> + {comparisonSwitch ? ( + + ) : ( + <> +
- {columnNames.metric}{columnNames.begin_date}{columnNames.end_date}{columnNames.status}
+ - - - - + + + + - - + + {tableData.map((item, rowIndex) => ( + <> + + - - - ))} - -
- setExpanded(item, !isResultExpanded(item.id)), - expandId: `expandId-${uid()}`, - }} - /> - - {item.primary_metrics[0]}{formatDateTime(item.begin_date)}{formatDateTime(item.end_date)} - - + {columnNames.metric}{columnNames.begin_date}{columnNames.end_date}{columnNames.status}
- - - - { - onToggle("bordered-toggle1"); - }} - isExpanded={expanded.includes("bordered-toggle1")} - id="bordered-toggle1" - > - Metadata - +
+ setExpanded(item, !isResultExpanded(item.id)), + expandId: `expandId-${uid()}`, + }} + /> - -
- - - - - - - - - - - - - - - -
-
- - - { - onToggle("bordered-toggle2"); - }} - isExpanded={expanded.includes("bordered-toggle2")} - id="bordered-toggle2" - > - Metrics & Graph - - - Metrics: -
- -
-
-
- - -
- + {item.primary_metrics[0]} + {formatDateTime(item.begin_date)} + {formatDateTime(item.end_date)} + + + + + + + + + + + + + ))} + + + + + )} ); }; diff --git a/frontend/src/components/templates/ILab/index.less b/frontend/src/components/templates/ILab/index.less index 02cd02cb..399c6c77 100644 --- a/frontend/src/components/templates/ILab/index.less +++ b/frontend/src/components/templates/ILab/index.less @@ -6,8 +6,36 @@ flex-direction: row; margin-bottom: 1vw; .metadata-card { - flex: 1; /* additionally, equal width */ + flex: 1; /* additionally, equal width */ padding: 1em; margin-right: 1.5vw; } } +.comparison-container { + display: flex; + width: 100%; + height: 80%; + .metrics-container { + width: 40%; + padding: 10px; + .compare-btn { + margin: 2vh 0; + } + .pf-v5-c-menu { + height: 75%; + box-shadow: unset; + } + } + .chart-container { + width: 80%; + .js-plotly-plot { + width: 100%; + height: 100%; + overflow-x: auto; + overflow-y: visible; + } + } + .title { + margin-bottom: 2vh; + } +} diff --git a/frontend/src/reducers/ilabReducer.js b/frontend/src/reducers/ilabReducer.js index 2f1bcd91..f8cd5b0d 100644 --- a/frontend/src/reducers/ilabReducer.js +++ b/frontend/src/reducers/ilabReducer.js @@ -5,6 +5,7 @@ const initialState = { start_date: "", end_date: "", graphData: [], + multiGraphData: [], totalItems: 0, page: 1, perPage: 10, @@ -14,6 +15,8 @@ const initialState = { periods: [], metrics_selected: {}, tableData: [], + comparisonSwitch: false, + metaRowExpanded: [], }; const ILabReducer = (state = initialState, action = {}) => { const { type, payload } = action; @@ -53,6 +56,12 @@ const ILabReducer = (state = initialState, action = {}) => { return { ...state, graphData: payload }; case TYPES.SET_ILAB_INIT_JOBS: return { ...state, tableData: payload }; + case TYPES.SET_ILAB_MULTIGRAPH_DATA: + return { ...state, multiGraphData: payload }; + case TYPES.TOGGLE_COMPARISON_SWITCH: + return { ...state, comparisonSwitch: !state.comparisonSwitch }; + case TYPES.SET_EXPANDED_METAROW: + return { ...state, metaRowExpanded: payload }; default: return state; } From bea1372e1195ae58b18950f391264ca5a6ba027c Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Wed, 6 Nov 2024 15:54:11 -0500 Subject: [PATCH 6/7] Crucible statistics comparison This PR is primarily CPT dashboard backend API (and Crucible service) changes to support pulling and displaying multiple Crucible metric statistics. Only minor UI changes are included to support API changes. The remaining UI changes to pull and display statistics will be pushed separately. --- backend/app/api/v1/endpoints/ilab/ilab.py | 173 +++++++--- backend/app/services/crucible_svc.py | 396 ++++++++++++---------- frontend/src/actions/ilabActions.js | 33 +- publish-containers.sh | 39 +++ 4 files changed, 411 insertions(+), 230 deletions(-) create mode 100755 publish-containers.sh diff --git a/backend/app/api/v1/endpoints/ilab/ilab.py b/backend/app/api/v1/endpoints/ilab/ilab.py index a5b4b825..b30f0e6a 100644 --- a/backend/app/api/v1/endpoints/ilab/ilab.py +++ b/backend/app/api/v1/endpoints/ilab/ilab.py @@ -10,7 +10,7 @@ from fastapi import APIRouter, Depends, Query -from app.services.crucible_svc import CrucibleService, Graph, GraphList +from app.services.crucible_svc import CrucibleService, GraphList, Metric router = APIRouter() @@ -250,7 +250,7 @@ async def params(crucible: Annotated[CrucibleService, Depends(crucible_svc)], ru ) async def iterations( crucible: Annotated[CrucibleService, Depends(crucible_svc)], run: str -): +) -> list[dict[str, Any]]: return await crucible.get_iterations(run) @@ -284,7 +284,7 @@ async def iterations( ) async def run_samples( crucible: Annotated[CrucibleService, Depends(crucible_svc)], run: str -): +) -> list[dict[str, Any]]: return await crucible.get_samples(run) @@ -296,23 +296,34 @@ async def run_samples( 200: example_response( [ { - "begin": "2024-09-12 17:40:27.982000+00:00", - "end": "2024-09-12 18:03:23.132000+00:00", - "id": "6BA57EF2-7139-11EF-A80B-E5037504B9B1", + "begin": "2024-10-29 14:30:56.723000+00:00", + "end": "2024-10-29 14:35:18.939000+00:00", + "id": "DDBF3584-9603-11EF-8B3C-BAA807DC31B7", "name": "measurement", "iteration": 1, "sample": "1", - "primary_metric": "ilab::sdg-samples-sec", + "is_primary": True, + "primary_metric": "ilab::actual-train-seconds", "status": "pass", }, { - "begin": "2024-09-12 18:05:03.229000+00:00", - "end": "2024-09-12 18:27:55.419000+00:00", - "id": "6BB93622-7139-11EF-A6C0-89A48E630F9D", - "name": "measurement", - "iteration": 4, + "begin": "1970-01-01 00:00:00+00:00", + "end": "1970-01-01 00:00:00+00:00", + "id": "DDB7FC92-9603-11EF-8FD6-97CFCD234564", + "name": "phase1", + "iteration": 1, "sample": "1", - "primary_metric": "ilab::sdg-samples-sec", + "is_primary": False, + "status": "pass", + }, + { + "begin": "1970-01-01 00:00:00+00:00", + "end": "1970-01-01 00:00:00+00:00", + "id": "DDBBB5B2-9603-11EF-A19F-824975057E5B", + "name": "phase2", + "iteration": 1, + "sample": "1", + "is_primary": False, "status": "pass", }, ] @@ -322,7 +333,7 @@ async def run_samples( ) async def run_periods( crucible: Annotated[CrucibleService, Depends(crucible_svc)], run: str -): +) -> list[dict[str, Any]]: return await crucible.get_periods(run) @@ -334,21 +345,14 @@ async def run_periods( 200: example_response( [ { - "id": "6BBE6872-7139-11EF-BFAA-8569A9399D61", + "id": "DDB759EA-9603-11EF-A714-9033EEFCCE93", "num": "1", "path": None, "status": "pass", - "iteration": 5, - "primary_metric": "ilab::sdg-samples-sec", - }, - { - "id": "6BACDFA8-7139-11EF-9F33-8185DD5B4869", - "num": "1", - "path": None, - "status": "pass", - "iteration": 2, - "primary_metric": "ilab::sdg-samples-sec", - }, + "iteration": 1, + "primary_metric": "ilab::actual-train-seconds", + "primary_period": "measurement", + } ] ), 400: example_error("Parameter error"), @@ -356,7 +360,7 @@ async def run_periods( ) async def iteration_samples( crucible: Annotated[CrucibleService, Depends(crucible_svc)], iteration: str -): +) -> list[dict[str, Any]]: return await crucible.get_samples(iteration=iteration) @@ -405,7 +409,7 @@ async def iteration_samples( ) async def timeline( crucible: Annotated[CrucibleService, Depends(crucible_svc)], run: str -): +) -> dict[str, Any]: return await crucible.get_timeline(run) @@ -484,7 +488,7 @@ async def metric_breakouts( examples=["", ","], ), ] = None, -): +) -> dict[str, Any]: return await crucible.get_metric_breakouts(run, metric, names=name, periods=period) @@ -544,12 +548,70 @@ async def metric_data( aggregate: Annotated[ bool, Query(description="Allow aggregation of metrics") ] = False, -): +) -> list[dict[str, Any]]: return await crucible.get_metrics_data( run, metric, names=name, periods=period, aggregate=aggregate ) +@router.post( + "/api/v1/ilab/runs/multisummary", + summary="Returns metric data summaries", + description="Returns a statistical summary of metric data", + responses={ + 200: example_response( + [ + { + "count": 1625, + "min": 0.0, + "max": 375.4, + "avg": 2.364492307692308, + "sum": 3842.3, + "sum_of_squares": 773029.4976, + "variance": 470.11963618840235, + "variance_population": 470.11963618840235, + "variance_sampling": 470.4091187230011, + "std_deviation": 21.68224241605103, + "std_deviation_population": 21.68224241605103, + "std_deviation_sampling": 21.68891695597088, + "std_deviation_bounds": { + "upper": 45.72897713979437, + "lower": -40.99999252440975, + "upper_population": 45.72897713979437, + "lower_population": -40.99999252440975, + "upper_sampling": 45.742326219634066, + "lower_sampling": -41.01334160424945, + }, + "aggregate": True, + "metric": "iostat::operations-merged-sec", + "names": None, + "periods": None, + "run": "26ad48c1-fc9c-404d-bccf-d19755ca8a39", + "title": "iostat::operations-merged-sec {run 2}", + } + ] + ), + 400: example_error("No matches for ilab::train-samples-sc+cpu=10"), + 422: example_response( + response={ + "detail": [ + { + "message": "More than one metric (2) probably means you should add filters", + "names": {"dev": ["sdb", "sdb3"]}, + "periods": [], + } + ] + } + ), + }, +) +async def metric_summary_body( + crucible: Annotated[CrucibleService, Depends(crucible_svc)], + summaries: list[Metric], +) -> list[dict[str, Any]]: + return await crucible.get_metrics_summary(summaries) + + @router.get( "/api/v1/ilab/runs/{run}/summary/{metric}", summary="Returns metric data collected for a run", @@ -557,11 +619,32 @@ async def metric_data( responses={ 200: example_response( { - "count": 234, - "min": 7.905045031896648, - "max": 9.666444615077308, - "avg": 9.38298722585416, - "sum": 2195.6190108498736, + "count": 1625, + "min": 0.0, + "max": 375.4, + "avg": 2.364492307692308, + "sum": 3842.3, + "sum_of_squares": 773029.4976, + "variance": 470.11963618840235, + "variance_population": 470.11963618840235, + "variance_sampling": 470.4091187230011, + "std_deviation": 21.68224241605103, + "std_deviation_population": 21.68224241605103, + "std_deviation_sampling": 21.68891695597088, + "std_deviation_bounds": { + "upper": 45.72897713979437, + "lower": -40.99999252440975, + "upper_population": 45.72897713979437, + "lower_population": -40.99999252440975, + "upper_sampling": 45.742326219634066, + "lower_sampling": -41.01334160424945, + }, + "aggregate": True, + "metric": "iostat::operations-merged-sec", + "names": None, + "periods": None, + "run": "26ad48c1-fc9c-404d-bccf-d19755ca8a39", + "title": "iostat::operations-merged-sec {run 2}", } ), 400: example_error("No matches for ilab::train-samples-sc+cpu=10"), @@ -578,7 +661,7 @@ async def metric_data( ), }, ) -async def metric_summary( +async def metric_summary_param( crucible: Annotated[CrucibleService, Depends(crucible_svc)], run: str, metric: str, @@ -596,8 +679,18 @@ async def metric_summary( examples=["", ","], ), ] = None, -): - return await crucible.get_metrics_summary(run, metric, names=name, periods=period) + aggregate: Annotated[ + bool, Query(description="Allow aggregation of metrics") + ] = False, +) -> dict[str, Any]: + result = await crucible.get_metrics_summary( + [ + Metric( + run=run, metric=metric, aggregate=aggregate, names=name, periods=period + ) + ] + ) + return result[0] if isinstance(result, list) and len(result) == 1 else result @router.post( @@ -749,10 +842,10 @@ async def metric_graph_param( ): return await crucible.get_metrics_graph( GraphList( - run=run, name=metric, graphs=[ - Graph( + Metric( + run=run, metric=metric, aggregate=aggregate, names=name, diff --git a/backend/app/services/crucible_svc.py b/backend/app/services/crucible_svc.py index fd0d310e..cf9e7990 100644 --- a/backend/app/services/crucible_svc.py +++ b/backend/app/services/crucible_svc.py @@ -23,34 +23,35 @@ from app import config -class Graph(BaseModel): - """Describe a single graph +class Metric(BaseModel): + """Describe a single metric to be graphed or summarized - This represents a JSON object provided by a caller through the get_graph - API to describe a specific metric graph. + This represents a JSON object provided by a caller through the + get_multigraph or get_multisummary APIs to describe a specific + metric. The default title (if the field is omitted) is the metric label with a suffix denoting breakout values selected, any unique parameter values in a selected iteration, and (if multiple runs are selected in any Graph list) an indication of the run index. For example, - "mpstat::Busy-CPU [core=2,type=usr] (batch-size=16)". + "mpstat::Busy-CPU [core=2,type=usr] (batch-size=16) {run 1}". Fields: + run: run ID metric: the metric label, "ilab::train-samples-sec" aggregate: True to aggregate unspecified breakouts color: CSS color string ("green" or "#008000") names: Lock in breakouts periods: Select metrics for specific test period(s) - run: Override the default run ID from GraphList title: Provide a title for the graph. The default is a generated title """ + run: str metric: str aggregate: bool = False color: Optional[str] = None names: Optional[list[str]] = None periods: Optional[list[str]] = None - run: Optional[str] = None title: Optional[str] = None @@ -65,20 +66,18 @@ class GraphList(BaseModel): Normally the X axis will be the actual sample timestamp values; if you specify relative=True, the X axis will be the duration from the first - timestamp of the metric series, in seconds. This allows graphs of similar - runs started at different times to be overlaid. + timestamp of the metric series. This allows graphs of similar runs started + at different times to be overlaid. Fields: - run: Specify the (default) run ID name: Specify a name for the set of graphs - relative: True for relative timescale in seconds + relative: True for relative timescale graphs: a list of Graph objects """ - run: Optional[str] = None name: str relative: bool = False - graphs: list[Graph] + graphs: list[Metric] @dataclass @@ -801,13 +800,8 @@ async def _get_metric_ids( ignore_unavailable=True, ) if len(metrics["hits"]["hits"]) < 1: - raise HTTPException( - status.HTTP_400_BAD_REQUEST, - ( - f"No matches for {metric}" - f"{('+' + ','.join(namelist) if namelist else '')}" - ), - ) + print(f"No metric descs: filters={filters}") + return [] ids = [h["metric_desc"]["id"] for h in self._hits(metrics)] if len(ids) < 2 or aggregate: return ids @@ -851,38 +845,22 @@ async def _build_timestamp_range_filters( Constructs a range filter for the earliest begin timestamp and the latest end timestamp among the specified periods. """ + if periods: ps = self._split_list(periods) matches = await self.search( "period", filters=[{"terms": {"period.id": ps}}] ) - start = None - end = None - for h in self._hits(matches): - p = h["period"] - st = p["begin"] - et = p["end"] - - # If any period is missing a timestamp, use the run's timestamp - # instead to avoid blowing up on a CDM error. - if st is None: - st = h["run"]["begin"] - if et is None: - et = h["run"]["end"] - if st and (not start or st < start): - start = st - if et and (not end or et > end): - end = et - if start is None or end is None: - name = ( - f"{h['run']['benchmark']}:{h['run']['begin']}-" - f"{h['iteration']['num']}-{h['sample']['num']}-" - f"{p['name']}" - ) - raise HTTPException( - status.HTTP_422_UNPROCESSABLE_ENTITY, - f"Unable to compute {name!r} time range {start!r} -> {end!r}", + try: + start = min([int(h) for h in self._hits(matches, ["period", "begin"])]) + end = max([int(h) for h in self._hits(matches, ["period", "end"])]) + except Exception as e: + print( + f"At least one of periods in {ps} lacks a begin or end " + f"timestamp ({str(e)!r}): date filtering by period is " + "disabled, which may produce bad results." ) + return [] return [ {"range": {"metric_data.begin": {"gte": str(start)}}}, {"range": {"metric_data.end": {"lte": str(end)}}}, @@ -911,6 +889,101 @@ async def _get_run_ids( print(f"HITS: {filtered['hits']['hits']}") return set([x for x in self._hits(filtered, ["run", "id"])]) + async def _make_title( + self, + run_id: str, + run_id_list: list[str], + metric_item: Metric, + params_by_run: dict[str, Any], + periods_by_run: dict[str, Any], + ) -> str: + """Compute a default title for a graph + + Use the period, breakout name selections, run list, and iteration + parameters to construct a meaningful name for a metric. + + For example, "ilab::sdg-samples-sec (batch-size=4) {run 1}", or + "mpstat::Busy-CPU [cpu=4]" + + Args: + run_id: the Crucible run ID + run_id_list: ordered list of run IDs in our list of metrics + metric_item: the current MetricItem object + periods: list of aggregation periods, if any + params_by_run: initially empty dict used to cache parameters + periods_by_run: initially empty dict used to cache periods + + Returns: + A string title + """ + names = metric_item.names + metric = metric_item.metric + if metric_item.periods and len(metric_item.periods) == 1: + period = metric_item.periods[0] + else: + period = None + if run_id not in params_by_run: + # Gather iteration parameters outside the loop for help in + # generating useful labels. + all_params = await self.search( + "param", filters=[{"term": {"run.id": run_id}}] + ) + collector = defaultdict(defaultdict) + for h in self._hits(all_params): + collector[h["iteration"]["id"]][h["param"]["arg"]] = h["param"]["val"] + params_by_run[run_id] = collector + else: + collector = params_by_run[run_id] + + if run_id not in periods_by_run: + periods = await self.search( + "period", filters=[{"term": {"run.id": run_id}}] + ) + iteration_periods = defaultdict(list[dict[str, Any]]) + for p in self._hits(periods): + iteration_periods[p["iteration"]["id"]].append(p["period"]) + periods_by_run[run_id] = iteration_periods + else: + iteration_periods = periods_by_run[run_id] + + # We can easily end up with multiple graphs across distinct + # periods or iterations, so we want to be able to provide some + # labeling to the graphs. We do this by looking for unique + # iteration parameters values, since the iteration number and + # period name aren't useful by themselves. + name_suffix = "" + if metric_item.periods: + iteration = None + for i, plist in iteration_periods.items(): + if set(metric_item.periods) <= set([p["id"] for p in plist]): + iteration = i + if period: + for p in plist: + if p["id"] == period: + name_suffix += f" {p['name']}" + + # If the period(s) we're graphing resolve to a single + # iteration in a run with multiple iterations, then we can + # try to find a unique title suffix based on distinct param + # values for that iteration. + if iteration and len(collector) > 1: + unique = collector[iteration].copy() + for i, params in collector.items(): + if i != iteration: + for p in list(unique.keys()): + if p in params and unique[p] == params[p]: + del unique[p] + if unique: + name_suffix += ( + " (" + ",".join([f"{p}={v}" for p, v in unique.items()]) + ")" + ) + + if len(run_id_list) > 1: + name_suffix += f" {{run {run_id_list.index(run_id) + 1}}}" + + options = (" [" + ",".join(names) + "]") if names else "" + return metric + options + name_suffix + async def get_run_filters(self) -> dict[str, dict[str, list[str]]]: """Return possible tag and filter terms @@ -1162,13 +1235,24 @@ async def get_runs( if tag_filters and rid not in tagids: continue - # Collect unique runs: the status is "fail" if any iteration for - # that run ID failed. runs[rid] = run + + # Convert string timestamps (milliseconds from epoch) to int + try: + run["begin"] = int(run["begin"]) + run["end"] = int(run["end"]) + except Exception as e: + print( + f"Unexpected error converting timestamp {run['begin']!r} " + f"or {run['end']!r} to int: {str(e)!r}" + ) run["tags"] = tags.get(rid, {}) run["iterations"] = [] run["primary_metrics"] = set() common = CommonParams() + + # Collect unique iterations: the status is "fail" if any iteration + # for that run ID failed. for i in iterations.get(rid, []): iparams = params.get(i["id"], {}) if "status" not in run: @@ -1285,14 +1369,27 @@ async def get_iterations(self, run: str, **kwargs) -> list[dict[str, Any]]: Returns: A list of iteration documents """ - iterations = await self.search( + hits = await self.search( index="iteration", filters=[{"term": {"run.id": run}}], sort=[{"iteration.num": "asc"}], **kwargs, ignore_unavailable=True, ) - return [i["iteration"] for i in self._hits(iterations)] + + iterations = [] + for i in self._hits(hits, ["iteration"]): + iterations.append( + { + "id": i["id"], + "num": i["num"], + "path": i["path"], + "primary_metric": i["primary-metric"], + "primary_period": i["primary-period"], + "status": i["status"], + } + ) + return iterations async def get_samples( self, run: Optional[str] = None, iteration: Optional[str] = None, **kwargs @@ -1325,6 +1422,7 @@ async def get_samples( sample = s["sample"] sample["iteration"] = s["iteration"]["num"] sample["primary_metric"] = s["iteration"]["primary-metric"] + sample["primary_period"] = s["iteration"]["primary-period"] sample["status"] = s["iteration"]["status"] samples.append(sample) return samples @@ -1375,7 +1473,10 @@ async def get_periods( period = self._format_period(period=h["period"]) period["iteration"] = h["iteration"]["num"] period["sample"] = h["sample"]["num"] - period["primary_metric"] = h["iteration"]["primary-metric"] + is_primary = h["iteration"]["primary-period"] == h["period"]["name"] + period["is_primary"] = is_primary + if is_primary: + period["primary_metric"] = h["iteration"]["primary-metric"] period["status"] = h["iteration"]["status"] body.append(period) return body @@ -1621,10 +1722,10 @@ async def get_metrics_data( "metric_data", size=0, filters=filters, - aggregations={"duration": {"stats": {"field": "metric_data.duration"}}}, + aggregations={"duration": {"min": {"field": "metric_data.duration"}}}, ) if aggdur["aggregations"]["duration"]["count"] > 0: - interval = int(aggdur["aggregations"]["duration"]["min"]) + interval = int(aggdur["aggregations"]["duration"]["value"]) data = await self.search( index="metric_data", size=0, @@ -1658,133 +1759,72 @@ async def get_metrics_data( return response async def get_metrics_summary( - self, - run: str, - metric: str, - names: Optional[list[str]] = None, - periods: Optional[list[str]] = None, - ) -> dict[str, Any]: + self, summaries: list[Metric] + ) -> list[dict[str, Any]]: """Return a statistical summary of metric data Provides a statistical summary of selected data samples. Args: - run: run ID - metric: metric label (e.g., "mpstat::Busy-CPU") - names: list of name filters ("cpu=3") - periods: list of period IDs + summaries: list of Summary objects to define desired metrics Returns: A statistical summary of the selected metric data - - { - "count": 71, - "min": 0.0, - "max": 0.3296, - "avg": 0.02360704225352113, - "sum": 1.676self.BIGQUERY00000001 - } """ start = time.time() - ids = await self._get_metric_ids(run, metric, names, periodlist=periods) - filters = [{"terms": {"metric_desc.id": ids}}] - filters.extend(await self._build_timestamp_range_filters(periods)) - data = await self.search( - "metric_data", - size=0, - filters=filters, - aggregations={"score": {"stats": {"field": "metric_data.value"}}}, - ) - duration = time.time() - start - print(f"Processing took {duration} seconds") - return data["aggregations"]["score"] - - async def _graph_title( - self, - run_id: str, - run_id_list: list[str], - graph: Graph, - params_by_run: dict[str, Any], - periods_by_run: dict[str, Any], - ) -> str: - """Compute a default title for a graph - - Use the period, breakout name selections, run list, and iteration - parameters to construct a meaningful name for a graph. - - For example, "ilab::sdg-samples-sec (batch-size=4) {run 1}", or - "mpstat::Busy-CPU [cpu=4]" - - Args: - run_id: the Crucible run ID - run_id_list: ordered list of run IDs in our list of graphs - graph: the current Graph object - params_by_run: initially empty dict used to cache parameters - periods_by_run: initially empty dict used to cache periods - - Returns: - A string title - """ - names = graph.names - metric = graph.metric - if run_id not in params_by_run: - # Gather iteration parameters outside the loop for help in - # generating useful labels. - all_params = await self.search( - "param", filters=[{"term": {"run.id": run_id}}] + results = [] + params_by_run = {} + periods_by_run = {} + run_id_list = [] + for s in summaries: + if not s.run: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, + "each summary request must have a run ID", + ) + if s.run not in run_id_list: + run_id_list.append(s.run) + for summary in summaries: + ids = await self._get_metric_ids( + summary.run, + summary.metric, + summary.names, + periodlist=summary.periods, + aggregate=summary.aggregate, ) - collector = defaultdict(defaultdict) - for h in self._hits(all_params): - collector[h["iteration"]["id"]][h["param"]["arg"]] = h["param"]["val"] - params_by_run[run_id] = collector - else: - collector = params_by_run[run_id] - - if run_id not in periods_by_run: - periods = await self.search( - "period", filters=[{"term": {"run.id": run_id}}] + filters = [{"terms": {"metric_desc.id": ids}}] + filters.extend(await self._build_timestamp_range_filters(summary.periods)) + data = await self.search( + "metric_data", + size=0, + filters=filters, + aggregations={ + "score": {"extended_stats": {"field": "metric_data.value"}} + }, ) - iteration_periods = defaultdict(set) - for p in self._hits(periods): - iteration_periods[p["iteration"]["id"]].add(p["period"]["id"]) - periods_by_run[run_id] = iteration_periods - else: - iteration_periods = periods_by_run[run_id] - - # We can easily end up with multiple graphs across distinct - # periods or iterations, so we want to be able to provide some - # labeling to the graphs. We do this by looking for unique - # iteration parameters values, since the iteration number and - # period name aren't useful by themselves. - name_suffix = "" - if graph.periods: - iteration = None - for i, pset in iteration_periods.items(): - if set(graph.periods) <= pset: - iteration = i - break - - # If the period(s) we're graphing resolve to a single - # iteration in a run with multiple iterations, then we can - # try to find a unique title suffix based on distinct param - # values for that iteration. - if iteration and len(collector) > 1: - unique = collector[iteration].copy() - for i, params in collector.items(): - if i != iteration: - for p in list(unique.keys()): - if p in params and unique[p] == params[p]: - del unique[p] - if unique: - name_suffix = ( - " (" + ",".join([f"{p}={v}" for p, v in unique.items()]) + ")" - ) - if len(run_id_list) > 1: - name_suffix += f" {{run {run_id_list.index(run_id) + 1}}}" + # The caller can provide a title for each graph; but, if not, we + # journey down dark overgrown pathways to fabricate a default with + # reasonable context, including unique iteration parameters, + # breakdown selections, and which run provided the data. + if summary.title: + title = summary.title + else: + title = await self._make_title( + summary.run, run_id_list, summary, params_by_run, periods_by_run + ) - options = (" [" + ",".join(names) + "]") if names else "" - return metric + options + name_suffix + score = data["aggregations"]["score"] + score["aggregate"] = summary.aggregate + score["metric"] = summary.metric + score["names"] = summary.names + score["periods"] = summary.periods + score["run"] = summary.run + score["title"] = title + results.append(score) + duration = time.time() - start + print(f"Processing took {duration} seconds") + return results async def get_metrics_graph(self, graphdata: GraphList) -> dict[str, Any]: """Return metrics data for a run @@ -1829,7 +1869,6 @@ async def get_metrics_graph(self, graphdata: GraphList) -> dict[str, Any]: """ start = time.time() graphlist = [] - default_run_id = graphdata.run layout: dict[str, Any] = {"width": "1500"} axes = {} yaxis = None @@ -1840,23 +1879,16 @@ async def get_metrics_graph(self, graphdata: GraphList) -> dict[str, Any]: # Construct a de-duped ordered list of run IDs, starting with the # default. run_id_list = [] - if default_run_id: - run_id_list.append(default_run_id) - run_id_missing = False for g in graphdata.graphs: - if g.run: - if g.run not in run_id_list: - run_id_list.append(g.run) - else: - run_id_missing = True - - if run_id_missing and not default_run_id: - raise HTTPException( - status.HTTP_400_BAD_REQUEST, "each graph request must have a run ID" - ) + if not g.run: + raise HTTPException( + status.HTTP_400_BAD_REQUEST, "each graph request must have a run ID" + ) + if g.run not in run_id_list: + run_id_list.append(g.run) for g in graphdata.graphs: - run_id = g.run if g.run else default_run_id + run_id = g.run names = g.names metric: str = g.metric @@ -1867,7 +1899,7 @@ async def get_metrics_graph(self, graphdata: GraphList) -> dict[str, Any]: if g.title: title = g.title else: - title = await self._graph_title( + title = await self._make_title( run_id, run_id_list, g, params_by_run, periods_by_run ) diff --git a/frontend/src/actions/ilabActions.js b/frontend/src/actions/ilabActions.js index c3782735..1cee0fd2 100644 --- a/frontend/src/actions/ilabActions.js +++ b/frontend/src/actions/ilabActions.js @@ -136,9 +136,12 @@ export const fetchGraphData = dispatch({ type: TYPES.GRAPH_LOADING }); let graphs = []; periods?.periods?.forEach((p) => { - graphs.push({ metric: p.primary_metric, periods: [p.id] }); + if (p.is_primary) { + graphs.push({ run: uid, metric: p.primary_metric, periods: [p.id] }); + } if (metric) { graphs.push({ + run: uid, metric, aggregate: true, periods: [p.id], @@ -146,11 +149,19 @@ export const fetchGraphData = } }); const response = await API.post(`/api/v1/ilab/runs/multigraph`, { - run: uid, name: `graph ${uid}`, graphs, }); if (response.status === 200) { + response.data.layout["showlegend"] = true; + response.data.layout["responsive"] = "true"; + response.data.layout["autosize"] = "true"; + response.data.layout["legend"] = { + orientation: "h", + xanchor: "left", + yanchor: "top", + y: -0.1, + }; copyData.push({ uid, data: response.data.data, @@ -207,11 +218,13 @@ export const fetchMultiGraphData = (uids) => async (dispatch, getState) => { uids.forEach(async (uid) => { const periods = filterPeriods.find((i) => i.uid == uid); periods?.periods?.forEach((p) => { - graphs.push({ - run: uid, - metric: p.primary_metric, - periods: [p.id], - }); + if (p.is_primary) { + graphs.push({ + run: uid, + metric: p.primary_metric, + periods: [p.id], + }); + } // graphs.push({ // run: uid, // metric, @@ -230,7 +243,11 @@ export const fetchMultiGraphData = (uids) => async (dispatch, getState) => { response.data.layout["showlegend"] = true; response.data.layout["responsive"] = "true"; response.data.layout["autosize"] = "true"; - response.data.layout["legend"] = { x: 0, y: 1.5 }; + response.data.layout["legend"] = { + orientation: "h", + xanchor: "left", + yanchor: "top", + }; const graphData = []; graphData.push({ data: response.data.data, diff --git a/publish-containers.sh b/publish-containers.sh new file mode 100755 index 00000000..ca97f8b5 --- /dev/null +++ b/publish-containers.sh @@ -0,0 +1,39 @@ +#!/bin/sh +# +# Simple script to build and publish development containers for the backend +# and frontend components. +# +# Please don't edit this file for debugging: it's too easy to accidentally +# commit undesirable changes. Instead, I've added some convenient environment +# variables to support common changes: +# +# REGISTRY -- container registry (docker.io, quay.io) +# ACCOUNT -- user account (presumed to be logged in already) +# REPO -- account container repo (cpt) +# TAG -- container tag (latest) +# SKIP_FRONTEND -- SKIP_FRONTEND=1 to skip building and pushing frontend +# SKIP_BACKEND -- SKIP_BACKEND=1 to skip building and pushing backend +# +REGISTRY=${REGISTRY:-images.paas.redhat.com} +ACCOUNT=${ACCOUNT:-${USER}} +REPO=${REPO:-cpt} +TAG=${TAG:-latest} +SKIP_FRONTEND=${SKIP_FRONTEND:-0} +SKIP_BACKEND=${SKIP_BACKEND:-0} + +REPOSITORY=${REGISTRY}/${ACCOUNT}/${REPO} +# remove current images, if any +podman rm -f front back +now=$(date +'%Y%m%d-%H%M%S') + +if [ "$SKIP_BACKEND" != 1 ] ;then + podman build -f backend/backend.containerfile --tag backend + podman push backend "${REPOSITORY}/backend:${TAG}" + podman push backend "${REPOSITORY}/backend:${now}" +fi + +if [ "$SKIP_FRONTEND" != 1 ] ;then + podman build -f frontend/frontend.containerfile --tag frontend + podman push frontend "${REPOSITORY}/frontend:${TAG}" + podman push frontend "${REPOSITORY}/frontend:${now}" +fi From 1245407cce84ec2dd7f0d7f5dbcd670ccbb9d6df Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Thu, 7 Nov 2024 11:48:29 -0500 Subject: [PATCH 7/7] Display statistical summaries Add statistics charts for selected metric in row expansion and comparison views. --- frontend/src/actions/ilabActions.js | 74 +++++++++++++++ frontend/src/actions/types.js | 3 + .../components/templates/ILab/ILabSummary.jsx | 89 +++++++++++++++++++ .../templates/ILab/IlabCompareComponent.jsx | 43 ++++++--- .../templates/ILab/IlabExpandedRow.jsx | 14 ++- .../templates/ILab/MetricsDropdown.jsx | 16 ++-- frontend/src/reducers/ilabReducer.js | 14 +++ 7 files changed, 228 insertions(+), 25 deletions(-) create mode 100644 frontend/src/components/templates/ILab/ILabSummary.jsx diff --git a/frontend/src/actions/ilabActions.js b/frontend/src/actions/ilabActions.js index 1cee0fd2..bca6bdd3 100644 --- a/frontend/src/actions/ilabActions.js +++ b/frontend/src/actions/ilabActions.js @@ -121,6 +121,80 @@ export const fetchPeriods = (uid) => async (dispatch) => { dispatch({ type: TYPES.COMPLETED }); }; +export const fetchSummaryData = + (uid, metric = null) => + async (dispatch, getState) => { + try { + const periods = getState().ilab.periods.find((i) => i.uid == uid); + const metrics = getState().ilab.metrics_selected[uid]; + dispatch({ type: TYPES.SET_ILAB_SUMMARY_LOADING }); + let summaries = []; + periods?.periods?.forEach((p) => { + if (p.is_primary) { + summaries.push({ + run: uid, + metric: p.primary_metric, + periods: [p.id], + }); + } + if (metric) { + summaries.push({ + run: uid, + metric, + aggregate: true, + periods: [p.id], + }); + } + }); + const response = await API.post( + `/api/v1/ilab/runs/multisummary`, + summaries + ); + if (response.status === 200) { + dispatch({ + type: TYPES.SET_ILAB_SUMMARY_DATA, + payload: { uid, data: response.data }, + }); + } + } catch (error) { + console.error( + `ERROR (${error?.response?.status}): ${JSON.stringify( + error?.response?.data + )}` + ); + dispatch(showFailureToast()); + } + dispatch({ type: TYPES.SET_ILAB_SUMMARY_COMPLETE }); + }; + +export const handleSummaryData = + (uids, metric = null) => + async (dispatch, getState) => { + try { + const periods = getState().ilab.periods; + const pUids = periods.map((i) => i.uid); + const missingPeriods = uids.filter(function (x) { + return pUids.indexOf(x) < 0; + }); + console.log(`Missing periods for ${missingPeriods}`); + await Promise.all( + missingPeriods.map(async (uid) => { + console.log(`Fetching periods for ${uid}`); + await dispatch(fetchPeriods(uid)); // Dispatch each item + }) + ); + await Promise.all( + uids.map(async (uid) => { + console.log(`Fetching summary data for ${uid}`); + await dispatch(fetchSummaryData(uid, metric)); + }) + ); + } catch (error) { + console.error(`ERROR: ${JSON.stringify(error)}`); + dispatch(showFailureToast()); + } + }; + export const fetchGraphData = (uid, metric = null) => async (dispatch, getState) => { diff --git a/frontend/src/actions/types.js b/frontend/src/actions/types.js index f7e21fec..70a67873 100644 --- a/frontend/src/actions/types.js +++ b/frontend/src/actions/types.js @@ -82,6 +82,9 @@ export const SET_ILAB_JOBS_DATA = "SET_ILAB_JOBS_DATA"; export const SET_ILAB_DATE_FILTER = "SET_ILAB_DATE_FILTER"; export const SET_ILAB_GRAPH_DATA = "SET_ILAB_GRAPH_DATA"; export const SET_ILAB_MULTIGRAPH_DATA = "SET_ILAB_MULTIGRAPH_DATA"; +export const SET_ILAB_SUMMARY_LOADING = "SET_ILAB_SUMMARY_LOADING"; +export const SET_ILAB_SUMMARY_COMPLETE = "SET_ILAB_SUMMARY_COMPLETE"; +export const SET_ILAB_SUMMARY_DATA = "SET_ILAB_SUMMARY_DATA"; export const SET_ILAB_TOTAL_ITEMS = "SET_ILAB_TOTAL_ITEMS"; export const SET_ILAB_OFFSET = "SET_ILAB_OFFSET"; export const SET_ILAB_PAGE = "SET_ILAB_PAGE"; diff --git a/frontend/src/components/templates/ILab/ILabSummary.jsx b/frontend/src/components/templates/ILab/ILabSummary.jsx new file mode 100644 index 00000000..25526a7d --- /dev/null +++ b/frontend/src/components/templates/ILab/ILabSummary.jsx @@ -0,0 +1,89 @@ +import PropType from "prop-types"; +import { uid } from "@/utils/helper"; +import { useSelector } from "react-redux"; +import { Table, Tbody, Th, Thead, Tr, Td } from "@patternfly/react-table"; + +const ILabSummary = (props) => { + const { ids } = props; + const { isSummaryLoading, summaryData } = useSelector((state) => state.ilab); + + const getSummaryData = (id) => { + const data = summaryData?.find((a) => a.uid === id); + return data; + }; + const hasSummaryData = (ids) => { + const hasData = Boolean( + summaryData.filter((i) => ids.includes(i.uid)).length === ids.length + ); + return hasData; + }; + + return ( +
+ {hasSummaryData(ids) ? ( + + + + {ids.length > 1 ? : <>} + + + + + + + + + {ids.map((id, ridx) => + getSummaryData(id).data.map((stat, sidx) => ( + + {ids.length > 1 && sidx === 0 ? ( + + ) : undefined} + + + + + + + )) + )} + +
RunMetricMinAverageMaxStandard Deviation
{ridx + 1}{stat.title} + {typeof stat.min === "number" + ? stat.min.toPrecision(6) + : stat.min} + + {typeof stat.avg === "number" + ? stat.avg.toPrecision(6) + : stat.avg} + + {typeof stat.max === "number" + ? stat.max.toPrecision(6) + : stat.max} + + {typeof stat.std_deviation === "number" + ? stat.std_deviation.toPrecision(6) + : stat.std_deviation} +
+ ) : isSummaryLoading && !hasSummaryData(ids) ? ( +
+ ) : ( + <> + )} +
+ ); +}; + +ILabSummary.propTypes = { + item: PropType.object, +}; +export default ILabSummary; diff --git a/frontend/src/components/templates/ILab/IlabCompareComponent.jsx b/frontend/src/components/templates/ILab/IlabCompareComponent.jsx index c062be13..b2c5eb8f 100644 --- a/frontend/src/components/templates/ILab/IlabCompareComponent.jsx +++ b/frontend/src/components/templates/ILab/IlabCompareComponent.jsx @@ -7,6 +7,8 @@ import { MenuItem, MenuItemAction, MenuList, + Stack, + StackItem, Title, } from "@patternfly/react-core"; import { useDispatch, useSelector } from "react-redux"; @@ -16,18 +18,20 @@ import Plot from "react-plotly.js"; import PropTypes from "prop-types"; import RenderPagination from "@/components/organisms/Pagination"; import { cloneDeep } from "lodash"; -import { handleMultiGraph } from "@/actions/ilabActions.js"; +import { handleMultiGraph, handleSummaryData } from "@/actions/ilabActions.js"; import { uid } from "@/utils/helper"; import { useState } from "react"; +import ILabSummary from "./ILabSummary"; const IlabCompareComponent = () => { - // const { data } = props; const { page, perPage, totalItems, tableData } = useSelector( (state) => state.ilab ); const dispatch = useDispatch(); const [selectedItems, setSelectedItems] = useState([]); - const { multiGraphData } = useSelector((state) => state.ilab); + const { multiGraphData, summaryData, isSummaryLoading } = useSelector( + (state) => state.ilab + ); const isGraphLoading = useSelector((state) => state.loading.isGraphLoading); const graphDataCopy = cloneDeep(multiGraphData); @@ -40,6 +44,7 @@ const IlabCompareComponent = () => { } }; const dummy = () => { + dispatch(handleSummaryData(selectedItems)); dispatch(handleMultiGraph(selectedItems)); }; return ( @@ -91,22 +96,32 @@ const IlabCompareComponent = () => { type={"ilab"} /> -
- {isGraphLoading ? ( -
- ) : graphDataCopy?.length > 0 && - graphDataCopy?.[0]?.data?.length > 0 ? ( -
+ + + {isSummaryLoading ? ( +
+ ) : summaryData.filter((i) => selectedItems.includes(i.uid)).length == + selectedItems.length ? ( + + ) : ( +
No data to summarize
+ )} +
+ + {isGraphLoading ? ( +
+ ) : graphDataCopy?.length > 0 && + graphDataCopy?.[0]?.data?.length > 0 ? ( -
- ) : ( -
No data to compare
- )} -
+ ) : ( +
No data to compare
+ )} + + ); }; diff --git a/frontend/src/components/templates/ILab/IlabExpandedRow.jsx b/frontend/src/components/templates/ILab/IlabExpandedRow.jsx index bcdbcc33..495a88d7 100644 --- a/frontend/src/components/templates/ILab/IlabExpandedRow.jsx +++ b/frontend/src/components/templates/ILab/IlabExpandedRow.jsx @@ -5,12 +5,15 @@ import { AccordionToggle, Card, CardBody, + Stack, + StackItem, } from "@patternfly/react-core"; import { useDispatch, useSelector } from "react-redux"; import ILabGraph from "./ILabGraph"; import MetaRow from "./MetaRow"; import MetricsSelect from "./MetricsDropdown"; +import ILabSummary from "./ILabSummary"; import PropTypes from "prop-types"; import { setMetaRowExpanded } from "@/actions/ilabActions"; import { uid } from "@/utils/helper"; @@ -134,9 +137,14 @@ const IlabRowContent = (props) => { >
Metrics:
-
- -
+ + + + + + + + diff --git a/frontend/src/components/templates/ILab/MetricsDropdown.jsx b/frontend/src/components/templates/ILab/MetricsDropdown.jsx index 04568f0f..9ebe96cf 100644 --- a/frontend/src/components/templates/ILab/MetricsDropdown.jsx +++ b/frontend/src/components/templates/ILab/MetricsDropdown.jsx @@ -5,7 +5,7 @@ import { SelectOption, Skeleton } from "@patternfly/react-core"; -import { fetchGraphData, setSelectedMetrics } from "@/actions/ilabActions"; +import { fetchGraphData, fetchSummaryData, setSelectedMetrics } from "@/actions/ilabActions"; import { useDispatch, useSelector } from "react-redux"; import PropTypes from "prop-types"; @@ -39,11 +39,11 @@ const MetricsSelect = (props) => { }; const onSelect = (_event, value) => { console.log("selected", value); - const run = value.split("*"); - //setSelected(run[1].trim()); - dispatch(setSelectedMetrics(run[0].trim(), run[1].trim())); + const [run, metric] = value; + dispatch(setSelectedMetrics(run, metric)); + dispatch(fetchGraphData(run, metric)); + dispatch(fetchSummaryData(run, metric)); setIsOpen(false); - dispatch(fetchGraphData(run[0].trim(), run[1].trim())); }; const metricsDataCopy = cloneDeep(metrics); @@ -70,12 +70,12 @@ const MetricsSelect = (props) => { shouldFocusToggleOnSelect > - {getMetricsData(item.id)[0]?.metrics.map((unit) => ( + {getMetricsData(item.id)[0]?.metrics.map((metric) => ( - {unit} + {metric} ))} diff --git a/frontend/src/reducers/ilabReducer.js b/frontend/src/reducers/ilabReducer.js index f8cd5b0d..d57c847b 100644 --- a/frontend/src/reducers/ilabReducer.js +++ b/frontend/src/reducers/ilabReducer.js @@ -6,6 +6,8 @@ const initialState = { end_date: "", graphData: [], multiGraphData: [], + isSummaryLoading: false, + summaryData: [], totalItems: 0, page: 1, perPage: 10, @@ -62,6 +64,18 @@ const ILabReducer = (state = initialState, action = {}) => { return { ...state, comparisonSwitch: !state.comparisonSwitch }; case TYPES.SET_EXPANDED_METAROW: return { ...state, metaRowExpanded: payload }; + case TYPES.SET_ILAB_SUMMARY_LOADING: + return { ...state, isSummaryLoading: true }; + case TYPES.SET_ILAB_SUMMARY_COMPLETE: + return { ...state, isSummaryLoading: false }; + case TYPES.SET_ILAB_SUMMARY_DATA: + return { + ...state, + summaryData: [ + ...state.summaryData.filter((i) => i.uid !== payload.uid), + payload, + ], + }; default: return state; }