-
Notifications
You must be signed in to change notification settings - Fork 33
/
build_datastructure_doc.py
333 lines (252 loc) · 10.7 KB
/
build_datastructure_doc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
"""Create datastructure documentation page.
This will add a page with various svg graphs and html tables
describing the datastructure: dependencies, columns provided,
and configuration options that apply to each plugins.
For extra credit, the SVGs are clickable.
"""
from collections import defaultdict
import os
import shutil
from immutabledict import immutabledict
import numpy as np
import pandas as pd
import graphviz
import strax
import straxen
from straxen import kind_colors
from straxen.docs_utils import add_spaces, add_deps_to_graph_tree
this_dir = os.path.dirname(os.path.realpath(__file__))
page_header = """
{title}
===========================================================
This page is an autogenerated reference for all the plugins in straxen's
`{context}` context.
The figures presented are dependency diagrams, meaning that an arrow stemming from a node points to a node that the stem depends on (i.e. "A depends on B" is equivalent to "A ---> B").
Colors indicate data kinds. To load tables with different data kinds,
you currently need more than one `get_df` (or `get_array`) commands.
"""
template = """
{data_type}
--------------------------------------------------------
Description
~~~~~~~~~~~~~~~~~~~~~~
Provided by plugin: `{p.__class__.__name__} <https://github.com/XENONnT/straxen/blob/master/{module}.py>`_
Data kind: {kind}
{docstring}
Columns provided
~~~~~~~~~~~~~~~~~~~~~~
.. raw:: html
{columns}
Dependencies
~~~~~~~~~~~~~~~~~~~~~~
.. raw:: html
{svg}
Configuration options
~~~~~~~~~~~~~~~~~~~~~~~
These are all options that affect this data type.
This also includes options taken by dependencies of this datatype,
because changing any of those options affect this data indirectly.
.. raw:: html
{config_options}
------------------
"""
data_kinds_header = """
XENONnT data kinds
====================
As explained in the
`demo <https://straxen.readthedocs.io/en/latest/tutorials/strax_demo.html>`_,
in straxen, we have **data types** and **data kinds**. The **data types** are
documented in `the datastructure <https://straxen.readthedocs.io/en/latest/reference/datastructure.html>`_
page and are the type of data that one can load in straxen using
``st.get_array(<RUN_ID>, <DATA_TYPE>)`` or ``st.get_df(<RUN_ID>, <DATA_TYPE>)``.
Additionally, each data type also has a data kind. Each data kinds has a group
of data types associated to it. All data of a given data type has the same number
of entities. As such, different data types can be loaded simultaneously if they
are of the same data kind. For example, `peak_basics` and `peak_positions` are
two data types but they contain information about the same data kind: `peaks`.
When writing a plugin, the ``plugin.compute(self, <DATA KIND>)`` method takes the **data kind**.
--------------------------------------------------------
.. raw:: html
{svg}
"""
titles = {
"": "Straxen datastructure",
"_he": "Straxen datastructure for high energy channels",
"_nv": "Straxen datastructure for neutron veto",
"_mv": "Straxen datastructure for muon veto",
}
tree_suffices = list(titles.keys())
suffices = ["_he", "_nv", "_mv"]
for suffix in suffices:
to_copy = list(kind_colors.keys())
for c in to_copy:
kind_colors[c + suffix] = kind_colors[c]
def get_plugins_deps(st):
"""For a given Strax.Context return the dependencies per plugin split by the known
tree_suffices.
:param st: Strax.Context
:return: dict of default dicts containing the number of dependencies.
"""
plugins_by_deps = {k: defaultdict(list) for k in tree_suffices}
for det_suffix in tree_suffices:
for plugin_name, plugin_class in st._plugin_class_registry.items():
if det_suffix not in plugin_name:
continue
elif det_suffix == "" and np.any([s in plugin_name for s in tree_suffices if s != ""]):
continue
plugins = st._get_plugins((plugin_name,), run_id="0")
# Clear cache, otherwise we might be getting more than we asked for from the cache
st._fixed_plugin_cache = {}
plugins_by_deps[det_suffix][len(plugins)].append(plugin_name)
return plugins_by_deps
def get_context():
"""Need to init a context without initializing the runs_db as that requires the appropriate
passwords.
:return: straxen context that mimics the xenonnt_online context without the rundb init
"""
st = straxen.contexts.xenonnt_online(_database_init=False)
st.context_config["forbid_creation_of"] = straxen.daqreader.DAQReader.provides
return st
def build_datastructure_doc():
"""Build a dependency tree for all plugins."""
pd.set_option("display.max_colwidth", int(1e9))
st = get_context()
# Too lazy to write proper graph sorter
# Make dictionary {total number of dependencies below -> list of plugins}
plugins_by_deps = get_plugins_deps(st)
# Make graph for each suffix ('' referring to TPC)
for suffix in tree_suffices:
title = titles[suffix]
out = page_header.format(title=title, context="xenonnt_online")
print(f"------------ {suffix} ------------")
os.makedirs(this_dir + f"/graphs{suffix}", exist_ok=True)
for n_deps in list(reversed(sorted(list(plugins_by_deps[suffix].keys())))):
for this_data_type in plugins_by_deps[suffix][n_deps]:
this_plugin = st._get_plugins(targets=(this_data_type,), run_id="0")[this_data_type]
# Create dependency graph
graph_tree = graphviz.Digraph(format="svg")
# Add plugins and dependencies recursively
add_deps_to_graph_tree(graph_tree, this_plugin, this_data_type)
# Where to save this node
fn = this_dir + f"/graphs{suffix}/" + this_data_type
graph_tree.render(fn)
with open(f"{fn}.svg", mode="r") as f:
svg = add_spaces(f.readlines()[5:])
config_df = st.show_config(this_data_type).sort_values(by="option")
# Filter out the config options of lower level datatypes
config_mask = []
for ap_to in config_df["applies_to"].values:
config_mask.append(any([this_data_type in a for a in ap_to]))
keep_cols = ["option", "default", "current", "help"]
config_df = config_df[config_mask][keep_cols]
# Shorten long default values
config_df["default"] = [
x[:10] + "..." + x[-10:] if isinstance(x, str) and len(x) > 30 else x
for x in config_df["default"].values
]
out += template.format(
p=this_plugin,
context="",
module=str(this_plugin.__module__).replace(".", "/"),
svg=svg,
data_type=this_data_type,
columns=add_spaces(st.data_info(this_data_type).to_html(index=False)),
kind=this_plugin.data_kind_for(this_data_type),
docstring=(
this_plugin.__doc__ if this_plugin.__doc__ else "(no plugin description)"
),
config_options=add_spaces(config_df.to_html(index=False)),
)
with open(this_dir + f"/reference/datastructure{suffix}.rst", mode="w") as f:
f.write(out)
shutil.rmtree(this_dir + f"/graphs{suffix}")
def tree_to_svg(graph_tree, save_as="data_kinds"):
# Where to save this node
graph_tree.render(save_as)
with open(f"{save_as}.svg", mode="r") as f:
svg = add_spaces(f.readlines()[5:])
os.remove(f"{save_as}.svg")
os.remove(save_as)
return svg
def write_data_kind_dep_tree():
"""Work in progress to build a dependency tree of the datakinds."""
print("------------ data kinds ------------")
st = get_context()
def get_plugin(pov):
return st._get_plugins((pov,), "0")[pov]
tree = defaultdict(set)
data_kinds = defaultdict(list)
for data_type in st._plugin_class_registry.keys():
this_plugin = get_plugin(data_type)
this_data_kind = this_plugin.data_kind
depends_on = []
for dep in strax.to_str_tuple(this_plugin.depends_on):
dep_kind = get_plugin(dep).data_kind
if isinstance(dep_kind, (dict, immutabledict)):
dep_kind = dep_kind[dep]
depends_on.append(dep_kind)
if isinstance(this_data_kind, (dict, immutabledict)):
this_data_kind = this_data_kind[data_type]
for k in strax.to_str_tuple(this_data_kind):
this_deps = tree[k] | set(depends_on)
tree[k] = this_deps
data_kinds[this_data_kind].append(data_type)
graph_tree = graphviz.Digraph(format="svg")
graph_tree.attr(rankdir="RL")
for data_kind in tree.keys():
graph_tree.node(
data_kind,
style="filled",
href="#" + data_kind.replace("_", "-") + "-data-kind",
fillcolor=kind_colors.get(data_kind, "grey"),
shape="box3d",
)
for d in tree[data_kind]:
graph_tree.edge(data_kind, d)
svg = tree_to_svg(graph_tree, save_as="data_kinds")
output = data_kinds_header.format(svg=svg)
# Sort by largest first
sorted_zipped_lists = sorted(
zip(
[-len(d) for d in data_kinds.values()],
data_kinds.keys(),
)
)
for _, data_kind in sorted_zipped_lists:
data_types = data_kinds[data_kind]
graph_tree = graphviz.Graph(format="svg")
graph_tree.attr(rankdir="LR")
graph_tree.node(
data_kind + "-data-kind",
style="filled",
href="#" + data_kind.replace("_", "-") + "-data-kind",
fillcolor=kind_colors.get(data_kind, "grey"),
shape="box3d",
)
for dtype in data_types:
graph_tree.node(
dtype,
style="filled",
href="#" + data_kind.replace("_", "-") + "-data-kind",
fillcolor=kind_colors.get(data_kind, "grey"),
)
graph_tree.edge(data_kind + "-data-kind", dtype)
output += f"""
{data_kind}-data kind
--------------------------------------------------------
The ``{data_kind}``-data kind includes the following data types:
{{data_types}}
.. raw:: html
{tree_to_svg(graph_tree, save_as=f"{data_kind}_kind")}
"""
extra = ""
for d in data_types:
extra += f"\n - ``{d}``"
output = output.format(data_types=extra)
data_type = this_dir + f"/reference/data_kinds.rst"
with open(data_type, mode="w") as f:
f.write(output)
assert os.path.exists(data_type)
if __name__ == "__main__":
write_data_kind_dep_tree()
build_datastructure_doc()