Skip to content

Commit

Permalink
Add option to save first samples of peak(lets) waveform (#867)
Browse files Browse the repository at this point in the history
* Add draft to save the start of downsampled waveforms

* Bugfix

* Bugfix

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Bugfixing the test_simple_summed_waveform test

* take waveform length from n_sum_wv_samples

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update docstring of store_downsampled_waveform

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Make max downsample factor a function parameter

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Bugfix

* Make numba happy and set a integer as default value

* Bugfix

* Make waveform start optional in merge_peaks

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Dacheng Xu <dx2227@columbia.edu>
Co-authored-by: Yue Ma <3124558229@qq.com>
  • Loading branch information
4 people authored Oct 11, 2024
1 parent 5360ab3 commit a528e4b
Show file tree
Hide file tree
Showing 5 changed files with 119 additions and 9 deletions.
20 changes: 19 additions & 1 deletion strax/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,12 @@ def hitlet_with_data_dtype(n_samples=2):


def peak_dtype(
n_channels=100, n_sum_wv_samples=200, n_widths=11, digitize_top=True, hits_timing=True
n_channels=100,
n_sum_wv_samples=200,
n_widths=11,
digitize_top=True,
hits_timing=True,
save_waveform_start=True,
):
"""Data type for peaks - ranges across all channels in a detector
Remember to set channel to -1 (todo: make enum)
Expand Down Expand Up @@ -206,6 +211,19 @@ def peak_dtype(
n_sum_wv_samples,
)
dtype.insert(9, top_field)

if save_waveform_start:
dtype += [
(
(
"Waveform data in PE/sample (not PE/ns!), first 200 not downsampled samples",
"data_start",
),
np.float32,
n_sum_wv_samples,
)
]

return dtype


Expand Down
48 changes: 44 additions & 4 deletions strax/processing/peak_building.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,12 @@ def find_peaks(
@export
@numba.jit(nopython=True, nogil=True, cache=True)
def store_downsampled_waveform(
p, wv_buffer, store_in_data_top=False, wv_buffer_top=np.ones(1, dtype=np.float32)
p,
wv_buffer,
store_in_data_top=False,
store_waveform_start=False,
max_downsample_factor_waveform_start=2,
wv_buffer_top=np.ones(1, dtype=np.float32),
):
"""Downsample the waveform in buffer and store it in p['data'] and in p['data_top'] if indicated
to do so.
Expand All @@ -150,6 +155,11 @@ def store_downsampled_waveform(
:param store_in_data_top: Boolean which indicates whether to also store into p['data_top'] When
downsampling results in a fractional number of samples, the peak is shortened rather than
extended. This causes data loss, but it is necessary to prevent overlaps between peaks.
:param store_waveform_start: Boolean which indicates whether to store the first samples of the
waveform in the peak. It will only store the first samples if the waveform is downsampled
and the downsample factor is smaller equal to max_downsample_factor_waveform_start.
:param max_downsample_factor_waveform_start: Maximum downsample factor for storing the first
samples of the waveform. It should cover basically all S1s while keeping the disk usage low.
"""

Expand All @@ -170,6 +180,14 @@ def store_downsampled_waveform(
wv_buffer[: p["length"] * downsample_factor].reshape(-1, downsample_factor).sum(axis=1)
)
p["dt"] *= downsample_factor

# If the waveform is downsampled, we can store the first samples of the waveform
if store_waveform_start & (downsample_factor <= max_downsample_factor_waveform_start):
if p["length"] > len(p["data_start"]):
p["data_start"] = wv_buffer[: len(p["data_start"])]
else:
p["data_start"][: p["length"]] = wv_buffer[: p["length"]]

else:
if store_in_data_top:
p["data_top"][: p["length"]] = wv_buffer_top[: p["length"]]
Expand Down Expand Up @@ -229,7 +247,15 @@ def _simple_summed_waveform(records, containers, touching_windows, to_pe):
@export
@numba.jit(nopython=True, nogil=True, cache=True)
def sum_waveform(
peaks, hits, records, record_links, adc_to_pe, n_top_channels=0, select_peaks_indices=None
peaks,
hits,
records,
record_links,
adc_to_pe,
n_top_channels=0,
select_peaks_indices=None,
save_waveform_start=False,
max_downsample_factor_waveform_start=2,
):
"""Compute sum waveforms for all peaks in peaks. Only builds summed waveform other regions in
which hits were found. This is required to avoid any bias due to zero-padding and baselining.
Expand All @@ -243,6 +269,11 @@ def sum_waveform(
:param select_peaks_indices: Indices of the peaks for partial processing. In the form of
np.array([np.int, np.int, ..]). If None (default), all the peaks are used for the summation.
Assumes all peaks AND pulses have the same dt!
:param save_waveform_start: Boolean which indicates whether to store the first samples of the
waveform in the peak. It will only store the first samples if the waveform is downsampled
and the downsample factor is smaller equal to max_downsample_factor_waveform_start.
:param max_downsample_factor_waveform_start: Maximum downsample factor for storing the first
samples of the waveform. It should cover basically all S1s while keeping the disk usage low.
"""
if not len(records):
Expand Down Expand Up @@ -357,9 +388,18 @@ def sum_waveform(
p["area"] += area_pe

if n_top_channels > 0:
store_downsampled_waveform(p, swv_buffer, True, twv_buffer)
store_downsampled_waveform(
p,
swv_buffer,
True,
save_waveform_start,
max_downsample_factor_waveform_start,
twv_buffer,
)
else:
store_downsampled_waveform(p, swv_buffer)
store_downsampled_waveform(
p, swv_buffer, False, save_waveform_start, max_downsample_factor_waveform_start
)

p["n_saturated_channels"] = p["saturated_channel"].sum()
p["area_per_channel"][:] = area_per_channel
Expand Down
23 changes: 21 additions & 2 deletions strax/processing/peak_merging.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,14 @@


@export
def merge_peaks(peaks, start_merge_at, end_merge_at, max_buffer=int(1e5)):
def merge_peaks(
peaks,
start_merge_at,
end_merge_at,
max_buffer=int(1e5),
save_waveform_start=False,
max_downsample_factor_waveform_start=2,
):
"""Merge specified peaks with their neighbors, return merged peaks.
:param peaks: Record array of strax peak dtype.
Expand All @@ -17,6 +24,11 @@ def merge_peaks(peaks, start_merge_at, end_merge_at, max_buffer=int(1e5)):
:param max_buffer: Maximum number of samples in the sum_waveforms and other waveforms of the
resulting peaks (after merging). Peaks must be constructed based on the properties of
constituent peaks, it being too time-consuming to revert to records/hits.
:param save_waveform_start: Boolean which indicates whether to store the first samples of the
waveform in the peak. It will only store the first samples if the waveform is downsampled
and the downsample factor is smaller equal to max_downsample_factor_waveform_start.
:param max_downsample_factor_waveform_start: Maximum downsample factor for storing the first
samples of the waveform. It should cover basically all S1s while keeping the disk usage low.
"""
assert len(start_merge_at) == len(end_merge_at)
Expand Down Expand Up @@ -85,7 +97,14 @@ def merge_peaks(peaks, start_merge_at, end_merge_at, max_buffer=int(1e5)):

# Downsample the buffers into new_p['data'], new_p['data_top'],
# and new_p['data_bot']
strax.store_downsampled_waveform(new_p, buffer, True, buffer_top)
strax.store_downsampled_waveform(
new_p,
buffer,
True,
save_waveform_start,
max_downsample_factor_waveform_start,
buffer_top,
)

new_p["n_saturated_channels"] = new_p["saturated_channel"].sum()

Expand Down
36 changes: 34 additions & 2 deletions strax/processing/peak_splitting.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ def split_peaks(
algorithm="local_minimum",
data_type="peaks",
n_top_channels=0,
save_waveform_start=False,
max_downsample_factor_waveform_start=2,
**kwargs,
):
"""Return peaks split according to algorithm, with waveforms summed and widths computed.
Expand All @@ -37,6 +39,11 @@ def split_peaks(
the new split peaks/hitlets.
:param n_top_channels: Number of top array channels.
:param result_dtype: dtype of the result.
:param save_waveform_start: Boolean which indicates whether to store the first samples of the
waveform in the peak. It will only store the first samples if the waveform is downsampled
and the downsample factor is smaller equal to max_downsample_factor_waveform_start.
:param max_downsample_factor_waveform_start: Maximum downsample factor for storing the first
samples of the waveform. It should cover basically all S1s while keeping the disk usage low.
Any other options are passed to the algorithm.
Expand All @@ -49,7 +56,16 @@ def split_peaks(
if data_type_is_not_supported:
raise TypeError(f'Data_type "{data_type}" is not supported.')
return splitter(
peaks, hits, records, rlinks, to_pe, data_type, n_top_channels=n_top_channels, **kwargs
peaks,
hits,
records,
rlinks,
to_pe,
data_type,
n_top_channels=n_top_channels,
save_waveform_start=save_waveform_start,
max_downsample_factor_waveform_start=max_downsample_factor_waveform_start,
**kwargs,
)


Expand All @@ -72,6 +88,11 @@ class PeakSplitter:
implemented in each subclass defines the algorithm, which takes in a peak's waveform and
returns the index to split the peak at, if a split point is found. Otherwise NO_MORE_SPLITS
is returned and the peak is left as is.
:param save_waveform_start: Boolean which indicates whether to store the first samples of the
waveform in the peak. It will only store the first samples if the waveform is downsampled
and the downsample factor is smaller equal to max_downsample_factor_waveform_start.
:param max_downsample_factor_waveform_start: Maximum downsample factor for storing the first
samples of the waveform. It should cover basically all S1s while keeping the disk usage low.
"""

Expand All @@ -88,6 +109,8 @@ def __call__(
do_iterations=1,
min_area=0,
n_top_channels=0,
save_waveform_start=False,
max_downsample_factor_waveform_start=None,
**kwargs,
):
if not len(records) or not len(peaks) or not do_iterations:
Expand Down Expand Up @@ -127,7 +150,16 @@ def __call__(
if is_split.sum() != 0:
# Found new peaks: compute basic properties
if data_type == "peaks":
strax.sum_waveform(new_peaks, hits, records, rlinks, to_pe, n_top_channels)
strax.sum_waveform(
new_peaks,
hits,
records,
rlinks,
to_pe,
n_top_channels,
save_waveform_start=save_waveform_start,
max_downsample_factor_waveform_start=max_downsample_factor_waveform_start,
)
strax.compute_widths(new_peaks)
elif data_type == "hitlets":
# Add record fields here
Expand Down
1 change: 1 addition & 0 deletions tests/test_peak_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,7 @@ def test_simple_summed_waveform(pulses):
fake_event_dtype = strax.time_dt_fields + [
("data", np.float32, 200),
("data_top", np.float32, 200),
("data_start", np.float32, 200),
]

records = np.zeros(len(pulses), dtype=strax.record_dtype())
Expand Down

0 comments on commit a528e4b

Please sign in to comment.