From f1372793e490858b0e263877219e150359d5b6b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henning=20Schulze=20Ei=C3=9Fing?= <h_schu55@uni-muenster.de>
Date: Mon, 5 Aug 2024 10:27:40 -0500
Subject: [PATCH 01/16] Add draft to save the start of downsampled waveforms

---
 strax/dtypes.py                   | 10 +++++++++-
 strax/processing/peak_building.py | 19 +++++++++++++------
 2 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/strax/dtypes.py b/strax/dtypes.py
index b30d5f856..520f4aaef 100644
--- a/strax/dtypes.py
+++ b/strax/dtypes.py
@@ -182,7 +182,7 @@ def hitlet_with_data_dtype(n_samples=2):
 
 
 def peak_dtype(
-    n_channels=100, n_sum_wv_samples=200, n_widths=11, digitize_top=True, hits_timing=True
+    n_channels=100, n_sum_wv_samples=200, n_widths=11, digitize_top=True, hits_timing=True, save_waveform_start = True
 ):
     """Data type for peaks - ranges across all channels in a detector
     Remember to set channel to -1 (todo: make enum)
@@ -227,6 +227,14 @@ def peak_dtype(
             n_sum_wv_samples,
         )
         dtype.insert(9, top_field)
+
+    if save_waveform_start:
+        dtype += [(
+            ("Waveform data in PE/sample (not PE/ns!), first 110 sample", "data_start"),
+            np.float32,
+            110,
+        )]
+        
     return dtype
 
 
diff --git a/strax/processing/peak_building.py b/strax/processing/peak_building.py
index da5f45f7c..0334400f6 100644
--- a/strax/processing/peak_building.py
+++ b/strax/processing/peak_building.py
@@ -138,7 +138,7 @@ def find_peaks(
 @export
 @numba.jit(nopython=True, nogil=True, cache=True)
 def store_downsampled_waveform(
-    p, wv_buffer, store_in_data_top=False, wv_buffer_top=np.ones(1, dtype=np.float32)
+    p, wv_buffer, store_in_data_top=False, store_waveform_start=False, wv_buffer_top=np.ones(1, dtype=np.float32)
 ):
     """Downsample the waveform in buffer and store it in p['data'] and in p['data_top'] if indicated
     to do so.
@@ -170,16 +170,23 @@ def store_downsampled_waveform(
             wv_buffer[: p["length"] * downsample_factor].reshape(-1, downsample_factor).sum(axis=1)
         )
         p["dt"] *= downsample_factor
+
+        # If the waveform is downsampled, we can store the first samples of the waveform
+        if store_waveform_start & (downsample_factor <= 6):
+            if p["length"] > len(p["data_start"]):
+                p["data_start"] = wv_buffer[: len(p["data_start"])]
+            else:
+                p["data_start"][: p["length"]] = wv_buffer[: p["length"]]
+
     else:
         if store_in_data_top:
             p["data_top"][: p["length"]] = wv_buffer_top[: p["length"]]
         p["data"][: p["length"]] = wv_buffer[: p["length"]]
 
-
 @export
 @numba.jit(nopython=True, nogil=True, cache=True)
 def sum_waveform(
-    peaks, hits, records, record_links, adc_to_pe, n_top_channels=0, select_peaks_indices=None
+    peaks, hits, records, record_links, adc_to_pe, n_top_channels=0, select_peaks_indices=None, save_waveform_start=False
 ):
     """Compute sum waveforms for all peaks in peaks. Only builds summed waveform other regions in
     which hits were found. This is required to avoid any bias due to zero-padding and baselining.
@@ -307,10 +314,10 @@ def sum_waveform(
             p["area"] += area_pe
 
         if n_top_channels > 0:
-            store_downsampled_waveform(p, swv_buffer, True, twv_buffer)
+            store_downsampled_waveform(p, swv_buffer, True, save_waveform_start, twv_buffer)
         else:
-            store_downsampled_waveform(p, swv_buffer)
-
+            store_downsampled_waveform(p, swv_buffer, False, save_waveform_start)
+            
         p["n_saturated_channels"] = p["saturated_channel"].sum()
         p["area_per_channel"][:] = area_per_channel
 

From 92793e9d88824593aab831288c5dc90a8457a5a2 Mon Sep 17 00:00:00 2001
From: HenningSE <h_schu55@uni-muenster.de>
Date: Wed, 7 Aug 2024 05:11:52 -0500
Subject: [PATCH 02/16] Bugfix

---
 strax/processing/peak_splitting.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/strax/processing/peak_splitting.py b/strax/processing/peak_splitting.py
index b04b0467e..54b51c100 100644
--- a/strax/processing/peak_splitting.py
+++ b/strax/processing/peak_splitting.py
@@ -15,6 +15,7 @@ def split_peaks(
     algorithm="local_minimum",
     data_type="peaks",
     n_top_channels=0,
+    save_waveform_start=False,
     **kwargs,
 ):
     """Return peaks split according to algorithm, with waveforms summed and widths computed.
@@ -49,7 +50,7 @@ def split_peaks(
     if data_type_is_not_supported:
         raise TypeError(f'Data_type "{data_type}" is not supported.')
     return splitter(
-        peaks, hits, records, rlinks, to_pe, data_type, n_top_channels=n_top_channels, **kwargs
+        peaks, hits, records, rlinks, to_pe, data_type, n_top_channels=n_top_channels,save_waveform_start=save_waveform_start, **kwargs
     )
 
 
@@ -88,6 +89,7 @@ def __call__(
         do_iterations=1,
         min_area=0,
         n_top_channels=0,
+        save_waveform_start=False,
         **kwargs,
     ):
         if not len(records) or not len(peaks) or not do_iterations:
@@ -127,7 +129,7 @@ def __call__(
         if is_split.sum() != 0:
             # Found new peaks: compute basic properties
             if data_type == "peaks":
-                strax.sum_waveform(new_peaks, hits, records, rlinks, to_pe, n_top_channels)
+                strax.sum_waveform(new_peaks, hits, records, rlinks, to_pe, n_top_channels, save_waveform_start=save_waveform_start)
                 strax.compute_widths(new_peaks)
             elif data_type == "hitlets":
                 # Add record fields here

From 8680dc39c5025d69c7718a1b8d134e18ef1b03c9 Mon Sep 17 00:00:00 2001
From: HenningSE <h_schu55@uni-muenster.de>
Date: Wed, 7 Aug 2024 07:19:57 -0500
Subject: [PATCH 03/16] Bugfix

---
 strax/processing/peak_merging.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/strax/processing/peak_merging.py b/strax/processing/peak_merging.py
index 3f04a3634..0f9bec811 100644
--- a/strax/processing/peak_merging.py
+++ b/strax/processing/peak_merging.py
@@ -85,7 +85,7 @@ def merge_peaks(peaks, start_merge_at, end_merge_at, max_buffer=int(1e5)):
 
         # Downsample the buffers into new_p['data'], new_p['data_top'],
         # and new_p['data_bot']
-        strax.store_downsampled_waveform(new_p, buffer, True, buffer_top)
+        strax.store_downsampled_waveform(new_p, buffer, True, True, buffer_top)
 
         new_p["n_saturated_channels"] = new_p["saturated_channel"].sum()
 

From f82924ddd3d15108e327a4359ed2655c8c837112 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 15 Aug 2024 08:44:18 +0000
Subject: [PATCH 04/16] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 strax/dtypes.py                    | 21 ++++++++++++++-------
 strax/processing/peak_building.py  | 18 +++++++++++++++---
 strax/processing/peak_splitting.py | 20 ++++++++++++++++++--
 3 files changed, 47 insertions(+), 12 deletions(-)

diff --git a/strax/dtypes.py b/strax/dtypes.py
index 520f4aaef..b16b8ec41 100644
--- a/strax/dtypes.py
+++ b/strax/dtypes.py
@@ -182,7 +182,12 @@ def hitlet_with_data_dtype(n_samples=2):
 
 
 def peak_dtype(
-    n_channels=100, n_sum_wv_samples=200, n_widths=11, digitize_top=True, hits_timing=True, save_waveform_start = True
+    n_channels=100,
+    n_sum_wv_samples=200,
+    n_widths=11,
+    digitize_top=True,
+    hits_timing=True,
+    save_waveform_start=True,
 ):
     """Data type for peaks - ranges across all channels in a detector
     Remember to set channel to -1 (todo: make enum)
@@ -229,12 +234,14 @@ def peak_dtype(
         dtype.insert(9, top_field)
 
     if save_waveform_start:
-        dtype += [(
-            ("Waveform data in PE/sample (not PE/ns!), first 110 sample", "data_start"),
-            np.float32,
-            110,
-        )]
-        
+        dtype += [
+            (
+                ("Waveform data in PE/sample (not PE/ns!), first 110 sample", "data_start"),
+                np.float32,
+                110,
+            )
+        ]
+
     return dtype
 
 
diff --git a/strax/processing/peak_building.py b/strax/processing/peak_building.py
index 0334400f6..89d95abcd 100644
--- a/strax/processing/peak_building.py
+++ b/strax/processing/peak_building.py
@@ -138,7 +138,11 @@ def find_peaks(
 @export
 @numba.jit(nopython=True, nogil=True, cache=True)
 def store_downsampled_waveform(
-    p, wv_buffer, store_in_data_top=False, store_waveform_start=False, wv_buffer_top=np.ones(1, dtype=np.float32)
+    p,
+    wv_buffer,
+    store_in_data_top=False,
+    store_waveform_start=False,
+    wv_buffer_top=np.ones(1, dtype=np.float32),
 ):
     """Downsample the waveform in buffer and store it in p['data'] and in p['data_top'] if indicated
     to do so.
@@ -183,10 +187,18 @@ def store_downsampled_waveform(
             p["data_top"][: p["length"]] = wv_buffer_top[: p["length"]]
         p["data"][: p["length"]] = wv_buffer[: p["length"]]
 
+
 @export
 @numba.jit(nopython=True, nogil=True, cache=True)
 def sum_waveform(
-    peaks, hits, records, record_links, adc_to_pe, n_top_channels=0, select_peaks_indices=None, save_waveform_start=False
+    peaks,
+    hits,
+    records,
+    record_links,
+    adc_to_pe,
+    n_top_channels=0,
+    select_peaks_indices=None,
+    save_waveform_start=False,
 ):
     """Compute sum waveforms for all peaks in peaks. Only builds summed waveform other regions in
     which hits were found. This is required to avoid any bias due to zero-padding and baselining.
@@ -317,7 +329,7 @@ def sum_waveform(
             store_downsampled_waveform(p, swv_buffer, True, save_waveform_start, twv_buffer)
         else:
             store_downsampled_waveform(p, swv_buffer, False, save_waveform_start)
-            
+
         p["n_saturated_channels"] = p["saturated_channel"].sum()
         p["area_per_channel"][:] = area_per_channel
 
diff --git a/strax/processing/peak_splitting.py b/strax/processing/peak_splitting.py
index 54b51c100..273afb99b 100644
--- a/strax/processing/peak_splitting.py
+++ b/strax/processing/peak_splitting.py
@@ -50,7 +50,15 @@ def split_peaks(
     if data_type_is_not_supported:
         raise TypeError(f'Data_type "{data_type}" is not supported.')
     return splitter(
-        peaks, hits, records, rlinks, to_pe, data_type, n_top_channels=n_top_channels,save_waveform_start=save_waveform_start, **kwargs
+        peaks,
+        hits,
+        records,
+        rlinks,
+        to_pe,
+        data_type,
+        n_top_channels=n_top_channels,
+        save_waveform_start=save_waveform_start,
+        **kwargs,
     )
 
 
@@ -129,7 +137,15 @@ def __call__(
         if is_split.sum() != 0:
             # Found new peaks: compute basic properties
             if data_type == "peaks":
-                strax.sum_waveform(new_peaks, hits, records, rlinks, to_pe, n_top_channels, save_waveform_start=save_waveform_start)
+                strax.sum_waveform(
+                    new_peaks,
+                    hits,
+                    records,
+                    rlinks,
+                    to_pe,
+                    n_top_channels,
+                    save_waveform_start=save_waveform_start,
+                )
                 strax.compute_widths(new_peaks)
             elif data_type == "hitlets":
                 # Add record fields here

From 50e0b9873a9ceceeb1e276823e1fc46ca3470e68 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henning=20Schulze=20Ei=C3=9Fing?= <h_schu55@uni-muenster.de>
Date: Tue, 17 Sep 2024 09:26:45 +0200
Subject: [PATCH 05/16] Bugfixing the test_simple_summed_waveform test

---
 tests/test_peak_processing.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_peak_processing.py b/tests/test_peak_processing.py
index 2c2b1e5ce..09ba1a416 100644
--- a/tests/test_peak_processing.py
+++ b/tests/test_peak_processing.py
@@ -316,6 +316,7 @@ def test_simple_summed_waveform(pulses):
     fake_event_dtype = strax.time_dt_fields + [
         ("data", np.float32, 200),
         ("data_top", np.float32, 200),
+        ("data_start", np.float32, 110),
     ]
 
     records = np.zeros(len(pulses), dtype=strax.record_dtype())

From 30f5191872ced85c60d833a7329f691aa3fd45ac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henning=20Schulze=20Ei=C3=9Fing?= <h_schu55@uni-muenster.de>
Date: Tue, 17 Sep 2024 09:37:28 +0200
Subject: [PATCH 06/16] take waveform length from n_sum_wv_samples

---
 strax/dtypes.py               | 4 ++--
 tests/test_peak_processing.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/strax/dtypes.py b/strax/dtypes.py
index d0d4cee3e..26d5a0aa2 100644
--- a/strax/dtypes.py
+++ b/strax/dtypes.py
@@ -215,9 +215,9 @@ def peak_dtype(
     if save_waveform_start:
         dtype += [
             (
-                ("Waveform data in PE/sample (not PE/ns!), first 110 sample", "data_start"),
+                ("Waveform data in PE/sample (not PE/ns!), first 200 not downsampled samples", "data_start"),
                 np.float32,
-                110,
+                n_sum_wv_samples,
             )
         ]
 
diff --git a/tests/test_peak_processing.py b/tests/test_peak_processing.py
index 09ba1a416..a0fde4b43 100644
--- a/tests/test_peak_processing.py
+++ b/tests/test_peak_processing.py
@@ -316,7 +316,7 @@ def test_simple_summed_waveform(pulses):
     fake_event_dtype = strax.time_dt_fields + [
         ("data", np.float32, 200),
         ("data_top", np.float32, 200),
-        ("data_start", np.float32, 110),
+        ("data_start", np.float32, 200),
     ]
 
     records = np.zeros(len(pulses), dtype=strax.record_dtype())

From fd5aa591a6287ff0f991a577a37ef901d7ff2ada Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 17 Sep 2024 07:37:44 +0000
Subject: [PATCH 07/16] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 strax/dtypes.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/strax/dtypes.py b/strax/dtypes.py
index 26d5a0aa2..b66644469 100644
--- a/strax/dtypes.py
+++ b/strax/dtypes.py
@@ -215,7 +215,10 @@ def peak_dtype(
     if save_waveform_start:
         dtype += [
             (
-                ("Waveform data in PE/sample (not PE/ns!), first 200 not downsampled samples", "data_start"),
+                (
+                    "Waveform data in PE/sample (not PE/ns!), first 200 not downsampled samples",
+                    "data_start",
+                ),
                 np.float32,
                 n_sum_wv_samples,
             )

From 7f4000ba091afe3ac41ba2a3ae4c571a1d261778 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henning=20Schulze=20Ei=C3=9Fing?= <h_schu55@uni-muenster.de>
Date: Wed, 9 Oct 2024 15:07:16 +0200
Subject: [PATCH 08/16] Update docstring of store_downsampled_waveform

---
 strax/processing/peak_building.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/strax/processing/peak_building.py b/strax/processing/peak_building.py
index 715912fc5..ce0426777 100644
--- a/strax/processing/peak_building.py
+++ b/strax/processing/peak_building.py
@@ -154,7 +154,8 @@ def store_downsampled_waveform(
     :param store_in_data_top: Boolean which indicates whether to also store into p['data_top'] When
         downsampling results in a fractional number of samples, the peak is shortened rather than
         extended. This causes data loss, but it is necessary to prevent overlaps between peaks.
-
+    :param store_waveform_start: Boolean which indicates whether to store the first samples of the
+        waveform in the peak. It will only store the first samples if the waveform is downsampled and the downsample factor is smaller equal to 6. It should cover basically all S1s while keeping the disk usage low.
     """
 
     n_samples = len(p["data"])

From bc6ff947bd5646cb8abfb48d73e7f7d357b0bd65 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 9 Oct 2024 13:07:35 +0000
Subject: [PATCH 09/16] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 strax/processing/peak_building.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/strax/processing/peak_building.py b/strax/processing/peak_building.py
index ce0426777..7a379e3b0 100644
--- a/strax/processing/peak_building.py
+++ b/strax/processing/peak_building.py
@@ -155,7 +155,10 @@ def store_downsampled_waveform(
         downsampling results in a fractional number of samples, the peak is shortened rather than
         extended. This causes data loss, but it is necessary to prevent overlaps between peaks.
     :param store_waveform_start: Boolean which indicates whether to store the first samples of the
-        waveform in the peak. It will only store the first samples if the waveform is downsampled and the downsample factor is smaller equal to 6. It should cover basically all S1s while keeping the disk usage low.
+        waveform in the peak. It will only store the first samples if the waveform is downsampled
+        and the downsample factor is smaller equal to 6. It should cover basically all S1s while
+        keeping the disk usage low.
+
     """
 
     n_samples = len(p["data"])

From ae4143775932cdb38faa386412e375d4c6e316c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henning=20Schulze=20Ei=C3=9Fing?= <h_schu55@uni-muenster.de>
Date: Thu, 10 Oct 2024 11:04:37 +0200
Subject: [PATCH 10/16] Make max downsample factor a function parameter

---
 strax/processing/peak_building.py  | 20 ++++++++++++++------
 strax/processing/peak_merging.py   |  2 +-
 strax/processing/peak_splitting.py | 14 ++++++++++++++
 3 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/strax/processing/peak_building.py b/strax/processing/peak_building.py
index 7a379e3b0..34fe1fe06 100644
--- a/strax/processing/peak_building.py
+++ b/strax/processing/peak_building.py
@@ -142,6 +142,7 @@ def store_downsampled_waveform(
     wv_buffer,
     store_in_data_top=False,
     store_waveform_start=False,
+    max_downsample_factor_waveform_start=None,
     wv_buffer_top=np.ones(1, dtype=np.float32),
 ):
     """Downsample the waveform in buffer and store it in p['data'] and in p['data_top'] if indicated
@@ -156,9 +157,9 @@ def store_downsampled_waveform(
         extended. This causes data loss, but it is necessary to prevent overlaps between peaks.
     :param store_waveform_start: Boolean which indicates whether to store the first samples of the
         waveform in the peak. It will only store the first samples if the waveform is downsampled
-        and the downsample factor is smaller equal to 6. It should cover basically all S1s while
-        keeping the disk usage low.
-
+        and the downsample factor is smaller equal to max_downsample_factor_waveform_start.
+    :param max_downsample_factor_waveform_start: Maximum downsample factor for storing the first 
+        samples of the waveform. It should cover basically all S1s while keeping the disk usage low.
     """
 
     n_samples = len(p["data"])
@@ -180,7 +181,7 @@ def store_downsampled_waveform(
         p["dt"] *= downsample_factor
 
         # If the waveform is downsampled, we can store the first samples of the waveform
-        if store_waveform_start & (downsample_factor <= 6):
+        if store_waveform_start & (downsample_factor <= max_downsample_factor_waveform_start):
             if p["length"] > len(p["data_start"]):
                 p["data_start"] = wv_buffer[: len(p["data_start"])]
             else:
@@ -253,6 +254,8 @@ def sum_waveform(
     n_top_channels=0,
     select_peaks_indices=None,
     save_waveform_start=False,
+    max_downsample_factor_waveform_start=None,
+
 ):
     """Compute sum waveforms for all peaks in peaks. Only builds summed waveform other regions in
     which hits were found. This is required to avoid any bias due to zero-padding and baselining.
@@ -266,6 +269,11 @@ def sum_waveform(
     :param select_peaks_indices: Indices of the peaks for partial processing. In the form of
         np.array([np.int, np.int, ..]). If None (default), all the peaks are used for the summation.
         Assumes all peaks AND pulses have the same dt!
+    :param save_waveform_start: Boolean which indicates whether to store the first samples of the
+        waveform in the peak. It will only store the first samples if the waveform is downsampled
+        and the downsample factor is smaller equal to max_downsample_factor_waveform_start.
+    :param max_downsample_factor_waveform_start: Maximum downsample factor for storing the first
+        samples of the waveform. It should cover basically all S1s while keeping the disk usage low.
 
     """
     if not len(records):
@@ -380,9 +388,9 @@ def sum_waveform(
             p["area"] += area_pe
 
         if n_top_channels > 0:
-            store_downsampled_waveform(p, swv_buffer, True, save_waveform_start, twv_buffer)
+            store_downsampled_waveform(p, swv_buffer, True, save_waveform_start, max_downsample_factor_waveform_start, twv_buffer)
         else:
-            store_downsampled_waveform(p, swv_buffer, False, save_waveform_start)
+            store_downsampled_waveform(p, swv_buffer, False, save_waveform_start, max_downsample_factor_waveform_start)
 
         p["n_saturated_channels"] = p["saturated_channel"].sum()
         p["area_per_channel"][:] = area_per_channel
diff --git a/strax/processing/peak_merging.py b/strax/processing/peak_merging.py
index 0f9bec811..97421893b 100644
--- a/strax/processing/peak_merging.py
+++ b/strax/processing/peak_merging.py
@@ -85,7 +85,7 @@ def merge_peaks(peaks, start_merge_at, end_merge_at, max_buffer=int(1e5)):
 
         # Downsample the buffers into new_p['data'], new_p['data_top'],
         # and new_p['data_bot']
-        strax.store_downsampled_waveform(new_p, buffer, True, True, buffer_top)
+        strax.store_downsampled_waveform(new_p, buffer, True, True, 6, buffer_top)
 
         new_p["n_saturated_channels"] = new_p["saturated_channel"].sum()
 
diff --git a/strax/processing/peak_splitting.py b/strax/processing/peak_splitting.py
index 273afb99b..f32fb8873 100644
--- a/strax/processing/peak_splitting.py
+++ b/strax/processing/peak_splitting.py
@@ -16,6 +16,7 @@ def split_peaks(
     data_type="peaks",
     n_top_channels=0,
     save_waveform_start=False,
+    max_downsample_factor_waveform_start=None,
     **kwargs,
 ):
     """Return peaks split according to algorithm, with waveforms summed and widths computed.
@@ -38,6 +39,11 @@ def split_peaks(
         the new split peaks/hitlets.
     :param n_top_channels: Number of top array channels.
     :param result_dtype: dtype of the result.
+    :param save_waveform_start: Boolean which indicates whether to store the first samples of the
+        waveform in the peak. It will only store the first samples if the waveform is downsampled
+        and the downsample factor is smaller equal to max_downsample_factor_waveform_start.
+    :param max_downsample_factor_waveform_start: Maximum downsample factor for storing the first
+        samples of the waveform. It should cover basically all S1s while keeping the disk usage low.
 
     Any other options are passed to the algorithm.
 
@@ -58,6 +64,7 @@ def split_peaks(
         data_type,
         n_top_channels=n_top_channels,
         save_waveform_start=save_waveform_start,
+        max_downsample_factor_waveform_star=max_downsample_factor_waveform_start,
         **kwargs,
     )
 
@@ -81,6 +88,11 @@ class PeakSplitter:
         implemented in each subclass defines the algorithm, which takes in a peak's waveform and
         returns the index to split the peak at, if a split point is found. Otherwise NO_MORE_SPLITS
         is returned and the peak is left as is.
+    :param save_waveform_start: Boolean which indicates whether to store the first samples of the
+        waveform in the peak. It will only store the first samples if the waveform is downsampled
+        and the downsample factor is smaller equal to max_downsample_factor_waveform_start.
+    :param max_downsample_factor_waveform_start: Maximum downsample factor for storing the first
+        samples of the waveform. It should cover basically all S1s while keeping the disk usage low.
 
     """
 
@@ -98,6 +110,7 @@ def __call__(
         min_area=0,
         n_top_channels=0,
         save_waveform_start=False,
+        max_downsample_factor_waveform_start=None,
         **kwargs,
     ):
         if not len(records) or not len(peaks) or not do_iterations:
@@ -145,6 +158,7 @@ def __call__(
                     to_pe,
                     n_top_channels,
                     save_waveform_start=save_waveform_start,
+                    max_downsample_factor_waveform_start=max_downsample_factor_waveform_start,
                 )
                 strax.compute_widths(new_peaks)
             elif data_type == "hitlets":

From b3da65e42ac1d5a51b12ed7874b003a89a0e0790 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 10 Oct 2024 09:05:23 +0000
Subject: [PATCH 11/16] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 strax/processing/peak_building.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/strax/processing/peak_building.py b/strax/processing/peak_building.py
index 34fe1fe06..95608ca83 100644
--- a/strax/processing/peak_building.py
+++ b/strax/processing/peak_building.py
@@ -158,8 +158,9 @@ def store_downsampled_waveform(
     :param store_waveform_start: Boolean which indicates whether to store the first samples of the
         waveform in the peak. It will only store the first samples if the waveform is downsampled
         and the downsample factor is smaller equal to max_downsample_factor_waveform_start.
-    :param max_downsample_factor_waveform_start: Maximum downsample factor for storing the first 
+    :param max_downsample_factor_waveform_start: Maximum downsample factor for storing the first
         samples of the waveform. It should cover basically all S1s while keeping the disk usage low.
+
     """
 
     n_samples = len(p["data"])
@@ -255,7 +256,6 @@ def sum_waveform(
     select_peaks_indices=None,
     save_waveform_start=False,
     max_downsample_factor_waveform_start=None,
-
 ):
     """Compute sum waveforms for all peaks in peaks. Only builds summed waveform other regions in
     which hits were found. This is required to avoid any bias due to zero-padding and baselining.
@@ -388,9 +388,18 @@ def sum_waveform(
             p["area"] += area_pe
 
         if n_top_channels > 0:
-            store_downsampled_waveform(p, swv_buffer, True, save_waveform_start, max_downsample_factor_waveform_start, twv_buffer)
+            store_downsampled_waveform(
+                p,
+                swv_buffer,
+                True,
+                save_waveform_start,
+                max_downsample_factor_waveform_start,
+                twv_buffer,
+            )
         else:
-            store_downsampled_waveform(p, swv_buffer, False, save_waveform_start, max_downsample_factor_waveform_start)
+            store_downsampled_waveform(
+                p, swv_buffer, False, save_waveform_start, max_downsample_factor_waveform_start
+            )
 
         p["n_saturated_channels"] = p["saturated_channel"].sum()
         p["area_per_channel"][:] = area_per_channel

From 990834b48d509ecc3f88d74a7c7491c58ac3780f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henning=20Schulze=20Ei=C3=9Fing?= <h_schu55@uni-muenster.de>
Date: Thu, 10 Oct 2024 12:30:02 +0200
Subject: [PATCH 12/16] Bugfix

---
 strax/processing/peak_splitting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/strax/processing/peak_splitting.py b/strax/processing/peak_splitting.py
index f32fb8873..4fbb7cb89 100644
--- a/strax/processing/peak_splitting.py
+++ b/strax/processing/peak_splitting.py
@@ -64,7 +64,7 @@ def split_peaks(
         data_type,
         n_top_channels=n_top_channels,
         save_waveform_start=save_waveform_start,
-        max_downsample_factor_waveform_star=max_downsample_factor_waveform_start,
+        max_downsample_factor_waveform_start=max_downsample_factor_waveform_start,
         **kwargs,
     )
 

From 45f1726116af85276a2eddc6b5cf2883930bdd9e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henning=20Schulze=20Ei=C3=9Fing?= <h_schu55@uni-muenster.de>
Date: Thu, 10 Oct 2024 13:17:17 +0200
Subject: [PATCH 13/16] Make numba happy and set a integer as default value

---
 strax/processing/peak_building.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/strax/processing/peak_building.py b/strax/processing/peak_building.py
index 95608ca83..d880beeae 100644
--- a/strax/processing/peak_building.py
+++ b/strax/processing/peak_building.py
@@ -142,7 +142,7 @@ def store_downsampled_waveform(
     wv_buffer,
     store_in_data_top=False,
     store_waveform_start=False,
-    max_downsample_factor_waveform_start=None,
+    max_downsample_factor_waveform_start=2,
     wv_buffer_top=np.ones(1, dtype=np.float32),
 ):
     """Downsample the waveform in buffer and store it in p['data'] and in p['data_top'] if indicated
@@ -255,7 +255,7 @@ def sum_waveform(
     n_top_channels=0,
     select_peaks_indices=None,
     save_waveform_start=False,
-    max_downsample_factor_waveform_start=None,
+    max_downsample_factor_waveform_start=2,
 ):
     """Compute sum waveforms for all peaks in peaks. Only builds summed waveform other regions in
     which hits were found. This is required to avoid any bias due to zero-padding and baselining.

From 30e5b92eee5a5870c3a54aaac315df345ab4b6b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henning=20Schulze=20Ei=C3=9Fing?= <h_schu55@uni-muenster.de>
Date: Thu, 10 Oct 2024 13:41:54 +0200
Subject: [PATCH 14/16] Bugfix

---
 strax/processing/peak_splitting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/strax/processing/peak_splitting.py b/strax/processing/peak_splitting.py
index 4fbb7cb89..8ba2a7cfa 100644
--- a/strax/processing/peak_splitting.py
+++ b/strax/processing/peak_splitting.py
@@ -16,7 +16,7 @@ def split_peaks(
     data_type="peaks",
     n_top_channels=0,
     save_waveform_start=False,
-    max_downsample_factor_waveform_start=None,
+    max_downsample_factor_waveform_start=2,
     **kwargs,
 ):
     """Return peaks split according to algorithm, with waveforms summed and widths computed.

From 5be14aecb3491c04b4b618d6a097e7a5f20b1b8f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henning=20Schulze=20Ei=C3=9Fing?= <h_schu55@uni-muenster.de>
Date: Fri, 11 Oct 2024 08:57:46 +0200
Subject: [PATCH 15/16] Make waveform start optional in merge_peaks

---
 strax/processing/peak_merging.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/strax/processing/peak_merging.py b/strax/processing/peak_merging.py
index 97421893b..72226425a 100644
--- a/strax/processing/peak_merging.py
+++ b/strax/processing/peak_merging.py
@@ -8,7 +8,8 @@
 
 
 @export
-def merge_peaks(peaks, start_merge_at, end_merge_at, max_buffer=int(1e5)):
+def merge_peaks(peaks, start_merge_at, end_merge_at, max_buffer=int(1e5), save_waveform_start=False,
+    max_downsample_factor_waveform_start=2):
     """Merge specified peaks with their neighbors, return merged peaks.
 
     :param peaks: Record array of strax peak dtype.
@@ -17,6 +18,11 @@ def merge_peaks(peaks, start_merge_at, end_merge_at, max_buffer=int(1e5)):
     :param max_buffer: Maximum number of samples in the sum_waveforms and other waveforms of the
         resulting peaks (after merging). Peaks must be constructed based on the properties of
         constituent peaks, it being too time-consuming to revert to records/hits.
+    :param save_waveform_start: Boolean which indicates whether to store the first samples of the
+        waveform in the peak. It will only store the first samples if the waveform is downsampled
+        and the downsample factor is smaller equal to max_downsample_factor_waveform_start.
+    :param max_downsample_factor_waveform_start: Maximum downsample factor for storing the first
+        samples of the waveform. It should cover basically all S1s while keeping the disk usage low.
 
     """
     assert len(start_merge_at) == len(end_merge_at)
@@ -85,7 +91,7 @@ def merge_peaks(peaks, start_merge_at, end_merge_at, max_buffer=int(1e5)):
 
         # Downsample the buffers into new_p['data'], new_p['data_top'],
         # and new_p['data_bot']
-        strax.store_downsampled_waveform(new_p, buffer, True, True, 6, buffer_top)
+        strax.store_downsampled_waveform(new_p, buffer, True, save_waveform_start, max_downsample_factor_waveform_start, buffer_top)
 
         new_p["n_saturated_channels"] = new_p["saturated_channel"].sum()
 

From e9bb7e8ac319d9c56bdc6f4e593e967764ef93ac Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 11 Oct 2024 06:58:03 +0000
Subject: [PATCH 16/16] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 strax/processing/peak_merging.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/strax/processing/peak_merging.py b/strax/processing/peak_merging.py
index 72226425a..94e785568 100644
--- a/strax/processing/peak_merging.py
+++ b/strax/processing/peak_merging.py
@@ -8,8 +8,14 @@
 
 
 @export
-def merge_peaks(peaks, start_merge_at, end_merge_at, max_buffer=int(1e5), save_waveform_start=False,
-    max_downsample_factor_waveform_start=2):
+def merge_peaks(
+    peaks,
+    start_merge_at,
+    end_merge_at,
+    max_buffer=int(1e5),
+    save_waveform_start=False,
+    max_downsample_factor_waveform_start=2,
+):
     """Merge specified peaks with their neighbors, return merged peaks.
 
     :param peaks: Record array of strax peak dtype.
@@ -91,7 +97,14 @@ def merge_peaks(peaks, start_merge_at, end_merge_at, max_buffer=int(1e5), save_w
 
         # Downsample the buffers into new_p['data'], new_p['data_top'],
         # and new_p['data_bot']
-        strax.store_downsampled_waveform(new_p, buffer, True, save_waveform_start, max_downsample_factor_waveform_start, buffer_top)
+        strax.store_downsampled_waveform(
+            new_p,
+            buffer,
+            True,
+            save_waveform_start,
+            max_downsample_factor_waveform_start,
+            buffer_top,
+        )
 
         new_p["n_saturated_channels"] = new_p["saturated_channel"].sum()