Skip to content

Commit

Permalink
Merge pull request #122 from edwarddavidbaker/create_perf_json_pathlib
Browse files Browse the repository at this point in the history
create_perf_json: Migrate to pathlib.Path for file operations
  • Loading branch information
edwarddavidbaker authored Dec 1, 2023
2 parents 9481de5 + a37b9de commit 5e5b20f
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 68 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/create-perf-json.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ jobs:
with:
python-version: "3.x"

- name: Run unittests
working-directory: ./scripts/unittesting
run: python metric_test.py

- name: Create perf json files
working-directory: ./scripts
run: python create_perf_json.py -v
Expand Down
140 changes: 72 additions & 68 deletions scripts/create_perf_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#
# create_perf_json.py
# --outdir <Output directory where files are written - default perf>
# --basepath <Base directory of event, metric and other files - default '..' >
# --verbose/-v/-vv/-vvv <Print verbosity during generation>
#
# ASSUMES: That the script is being run in the scripts folder of the repo.
Expand All @@ -17,10 +16,9 @@
from itertools import takewhile
import json
import metric
import os
from pathlib import Path
import re
from typing import DefaultDict, Dict, Optional, Set, TextIO, Tuple
import urllib.request

_verbose = 0
def _verboseprintX(level:int, *args, **kwargs):
Expand Down Expand Up @@ -480,7 +478,7 @@ class Model:
Data related to 1 CPU model such as Skylake or Broadwell.
"""
def __init__(self, shortname: str, longname: str, version: str,
models: Set[str], files: Dict[str, str]):
models: Set[str], files: Dict[str, Path]):
"""
Constructs a model.
Expand All @@ -504,7 +502,7 @@ def __lt__(self, other: 'Model') -> bool:

def __str__(self):
return f'{self.shortname} / {self.longname}\n\tmodels={self.models}\n\tfiles:\n\t\t' + \
'\n\t\t'.join([f'{type} = {url}' for (type, url) in self.files.items()])
'\n\t\t'.join([f'{type} = {path}' for (type, path) in self.files.items()])

def mapfile_line(self) -> str:
"""
Expand Down Expand Up @@ -720,7 +718,7 @@ class PerfMetric:
thresholds: Dict[str, str] = {}
issue_to_metrics: Dict[str, Set[str]] = collections.defaultdict(set)
found_key = False
csvf = csv.reader([l.decode('utf-8') for l in csvfile.readlines()])
csvf = csv.reader(csvfile)
for l in csvf:
if l[0] == 'Key':
found_key = True
Expand Down Expand Up @@ -1465,7 +1463,7 @@ def append_to_desc(s: str):
})

if 'extra metrics' in self.files:
with urllib.request.urlopen(self.files['extra metrics']) as extra_json:
with open(self.files['extra metrics'], 'r') as extra_json:
broken_metrics = {
'ICX': {
# Missing event: UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD
Expand Down Expand Up @@ -1497,7 +1495,7 @@ def append_to_desc(s: str):
return jo


def to_perf_json(self, outdir: str):
def to_perf_json(self, outdir: Path):
# Map from a topic to its list of events as dictionaries.
pmon_topic_events: Dict[str, list[Dict[str, str]]] = \
collections.defaultdict(list)
Expand All @@ -1516,7 +1514,7 @@ def to_perf_json(self, outdir: str):
pmu_prefix = None
if event_type in ['atom', 'core']:
pmu_prefix = f'cpu_{event_type}' if 'atom' in self.files else 'cpu'
with urllib.request.urlopen(self.files[event_type]) as event_json:
with open(self.files[event_type], 'r') as event_json:
json_data = json.load(event_json)
# UNC_IIO_BANDWIDTH_OUT events are broken on Linux pre-SPR so skip if they exist.
pmon_events = [PerfmonJsonEvent(self.shortname, pmu_prefix, x)
Expand Down Expand Up @@ -1551,11 +1549,8 @@ def to_perf_json(self, outdir: str):

if 'uncore csv' in self.files:
_verboseprint2(f'Rewriting events with {self.files["uncore csv"]}')
with urllib.request.urlopen(self.files['uncore csv']) as uncore_csv:
csv_lines = [
l.decode('utf-8') for l in uncore_csv.readlines()
]
csvfile = csv.reader(csv_lines)
with open(self.files['uncore csv'], 'r') as uncore_csv:
csvfile = csv.reader(uncore_csv)
for l in csvfile:
while len(l) < 7:
l.append('')
Expand Down Expand Up @@ -1642,8 +1637,8 @@ def to_perf_json(self, outdir: str):

for topic, events_ in pmon_topic_events.items():
events_ = sorted(events_, key=lambda event: event['EventName'])
filename = f'{topic.lower().replace(" ", "-")}.json'
with open(f'{outdir}/{filename}', 'w', encoding='ascii') as perf_json:
output_path = Path(outdir, f'{topic.lower().replace(" ", "-")}.json')
with open(output_path, 'w', encoding='ascii') as perf_json:
json.dump(events_, perf_json, sort_keys=True, indent=4,
separators=(',', ': '))
perf_json.write('\n')
Expand All @@ -1654,7 +1649,7 @@ def to_perf_json(self, outdir: str):
if metric_csv_key not in self.files:
continue
pmu_prefix = unit if 'atom' in self.files else 'cpu'
with urllib.request.urlopen(self.files[metric_csv_key]) as metric_csv:
with open(self.files[metric_csv_key], 'r') as metric_csv:
csv_metrics = self.extract_tma_metrics(metric_csv, pmu_prefix, events)
csv_metrics = sorted(csv_metrics,
key=lambda m: (m['Unit'] if 'Unit' in m else 'cpu',
Expand All @@ -1673,14 +1668,14 @@ def to_perf_json(self, outdir: str):
key=lambda m: (m['Unit'] if 'Unit' in m else 'cpu',
m['MetricName'])
)
with open(f'{outdir}/{self.shortname.lower().replace("-","")}-metrics.json',
'w', encoding='ascii') as perf_metric_json:
output_path = Path(outdir, f'{self.shortname.lower().replace("-","")}-metrics.json')
with open(output_path, 'w', encoding='ascii') as perf_metric_json:
json.dump(metrics, perf_metric_json, sort_keys=True, indent=4,
separators=(',', ': '))
perf_metric_json.write('\n')

if self.metricgroups:
with open(f'{outdir}/metricgroups.json', 'w', encoding='ascii') as metricgroups_json:
with open(Path(outdir, 'metricgroups.json'), 'w', encoding='ascii') as metricgroups_json:
json.dump(self.metricgroups, metricgroups_json, sort_keys=True, indent=4,
separators=(',', ': '))
metricgroups_json.write('\n')
Expand All @@ -1691,7 +1686,7 @@ class Mapfile:
The read representation of mapfile.csv.
"""

def __init__(self, base_path: str):
def __init__(self, base_path: Path):
self.archs = []
# Map from shortname (like SKL) to longname (like Skylake).
longnames: Dict[str, str] = {}
Expand All @@ -1700,16 +1695,14 @@ def __init__(self, base_path: str):
models: DefaultDict[str, Set[str]] = collections.defaultdict(set)
# Map from shortname to a map from a kind of file to the path
# of that file.
files: Dict[str, Dict[str, str]] = collections.defaultdict(dict)
files: Dict[str, Dict[str, Path]] = collections.defaultdict(dict)
# Map from shortname to the version of the event files.
versions: Dict[str, str] = {}

_verboseprint(f'Opening: {base_path}/mapfile.csv')
with urllib.request.urlopen(f'{base_path}/mapfile.csv') as mapfile_csv:
mapfile_csv_lines = [
l.decode('utf-8') for l in mapfile_csv.readlines()
]
mapfile = csv.reader(mapfile_csv_lines)
mapfile_path = Path(base_path, 'mapfile.csv')
_verboseprint(f'Opening: {mapfile_path}')
with open(mapfile_path, 'r') as mapfile_csv:
mapfile = csv.reader(mapfile_csv)
first_row = True
for l in mapfile:
while len(l) < 7:
Expand All @@ -1733,7 +1726,9 @@ def __init__(self, base_path: str):
# longname (like Skylake).
shortname = re.sub(r'/([^/]*)/.*', r'\1', path)
longname = re.sub(rf'/{shortname}/events/([^_]*)_.*', r'\1', path)
url = base_path + path

# Drop leading slash before combining with base path.
filepath = Path(base_path, path[1:])

# Workarounds:
if family_model == 'GenuineIntel-6-BE':
Expand Down Expand Up @@ -1769,79 +1764,88 @@ def __init__(self, base_path: str):
assert versions[shortname] == version
models[shortname].add(family_model)
if shortname in files and event_type in files[shortname]:
assert files[shortname][event_type] == url, \
f'Expected {shortname}/{longname} to have just 1 {event_type} url {files[shortname][event_type]} but found {url}'
assert files[shortname][event_type] == filepath, \
f'Expected {shortname}/{longname} to have just 1 {event_type} filepath {files[shortname][event_type]} but found {filepath}'
else:
files[shortname][event_type] = url
files[shortname][event_type] = filepath

for (shortname, longname) in longnames.items():
# Add uncore CSV file if it exists.
try:
uncore_csv_url = f'{base_path}/scripts/config/perf-uncore-events-{shortname.lower()}.csv'
urllib.request.urlopen(uncore_csv_url)
files[shortname]['uncore csv'] = uncore_csv_url
except:
pass
uncore_csv_path = Path(base_path, 'scripts', 'config',
f'perf-uncore-events-{shortname.lower()}.csv')
if uncore_csv_path.is_file():
files[shortname]['uncore csv'] = uncore_csv_path

# Add metric files that will be used for each model.
files[shortname]['tma metrics'] = base_path + '/TMA_Metrics-full.csv'
files[shortname]['tma metrics'] = Path(base_path, 'TMA_Metrics-full.csv')
if shortname == 'ADLN':
files[shortname]['tma metrics'] = base_path + '/E-core_TMA_Metrics.csv'
files[shortname]['tma metrics'] = Path(base_path, 'E-core_TMA_Metrics.csv')
if 'atom' in files[shortname]:
files[shortname][
'e-core tma metrics'] = base_path + '/E-core_TMA_Metrics.csv'
cpu_metrics_url = f'{base_path}/{shortname}/metrics/perf/{longname.lower()}_metrics_perf.json'
try:
urllib.request.urlopen(cpu_metrics_url)
_verboseprint2(f'Found {cpu_metrics_url}')
files[shortname]['extra metrics'] = cpu_metrics_url
except:
_verboseprint2(f'Didn\'t find {cpu_metrics_url}')
if shortname in ['BDX','CLX','HSX','ICX','SKX','SPR']:
files[shortname]['e-core tma metrics'] = Path(base_path, 'E-core_TMA_Metrics.csv')

cpu_metrics_path = Path(base_path, shortname, 'metrics', 'perf',
f'{longname.lower()}_metrics_perf.json')
if cpu_metrics_path.is_file():
_verboseprint2(f'Found {cpu_metrics_path}')
files[shortname]['extra metrics'] = cpu_metrics_path
else:
_verboseprint2(f'Didn\'t find {cpu_metrics_path}')
if shortname in ['BDX', 'CLX', 'HSX', 'ICX', 'SKX', 'SPR']:
raise
else:
pass

self.archs += [
Model(shortname, longname, versions[shortname],
models[shortname], files[shortname])
Model(shortname, longname, versions[shortname], models[shortname], files[shortname])
]
self.archs.sort()
_verboseprint2('Parsed models:\n' + str(self))

def __str__(self):
return ''.join(str(model) for model in self.archs)

def to_perf_json(self, outdir: str):
def to_perf_json(self, outdir: Path):
"""
Create a perf style mapfile.csv.
"""
_verboseprint(f'Writing mapfile to {outdir}/mapfile.csv')
gen_mapfile = open(f'{outdir}/mapfile.csv', 'w', encoding='ascii')
for model in self.archs:
gen_mapfile.write(model.mapfile_line() + '\n')
output_mapfile_path = Path(outdir, 'mapfile.csv')
_verboseprint(f'Writing mapfile to {output_mapfile_path}')
with open(output_mapfile_path, 'w', encoding='ascii') as gen_mapfile:
for model in self.archs:
gen_mapfile.write(model.mapfile_line() + '\n')

for model in self.archs:
modeldir = outdir + '/' + model.longname
modeldir = Path(outdir, model.longname)
_verboseprint(f'Creating event json for {model.shortname} in {modeldir}')
os.system(f'mkdir -p {modeldir}')
modeldir.mkdir(exist_ok=True)
model.to_perf_json(modeldir)


def main():
ap = argparse.ArgumentParser()
ap.add_argument('--outdir', default='perf',
scriptdir = Path(__file__).resolve().parent
basepath = scriptdir.parent
default_outdir = Path(scriptdir, 'perf')

ap = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
ap.add_argument('--outdir',
default=default_outdir,
type=Path,
help='Directory to write output to.')
ap.add_argument('--basepath', default=f'file://{os.getcwd()}/..',
help='Base directory containing event, metric and other files.')
ap.add_argument('--verbose', '-v', action='count', default=0, dest='verbose',
ap.add_argument('--verbose',
'-v',
action='count',
default=0,
dest='verbose',
help='Additional output when running.')
args = ap.parse_args()

global _verbose
_verbose = args.verbose
os.system(f'mkdir -p {args.outdir}')
Mapfile(args.basepath).to_perf_json(args.outdir)

outdir = args.outdir.resolve()
if outdir.exists() and not outdir.is_dir():
raise IOError(f'Output directory argument {outdir} exists but is not a directory.')
outdir.mkdir(exist_ok=True)

Mapfile(basepath).to_perf_json(outdir)

if __name__ == '__main__':
main()

0 comments on commit 5e5b20f

Please sign in to comment.