replace all np.int with np.int32 (#305)

-use `dtype=numpy.int32` for all Numpy int arrays
jianfch · Jan 28, 2024 · 3886bc6 · 3886bc6
1 parent e8f72a3
commit 3886bc6
Show file tree

Hide file tree

Showing 4 changed files with 9 additions and 9 deletions.
diff --git a/stable_whisper/_version.py b/stable_whisper/_version.py
@@ -1 +1 @@
-__version__ = "2.15.1"
+__version__ = "2.15.2"
diff --git a/stable_whisper/alignment.py b/stable_whisper/alignment.py
@@ -688,7 +688,7 @@ def refine(
     def ts_to_frames(timestamps: Union[np.ndarray, list]) -> np.ndarray:
         if isinstance(timestamps, list):
             timestamps = np.array(timestamps)
-        return (timestamps * FRAMES_PER_SECOND).round().astype(int)
+        return (timestamps * FRAMES_PER_SECOND).round().astype(np.int32)
 
     def curr_segments():
         all_words = result.all_words()
@@ -753,8 +753,8 @@ def _refine(_step: str):
             min_starts = ts_to_frames(np.array(min_starts) - time_offset)
             max_ends = ts_to_frames(np.array(max_ends) - time_offset)
 
-            mid_starts = min_starts + ((max_starts - min_starts) / 2).round().astype(int)
-            mid_ends = min_ends + ((max_ends - min_ends) / 2).round().astype(int)
+            mid_starts = min_starts + ((max_starts - min_starts) / 2).round().astype(np.int32)
+            mid_ends = min_ends + ((max_ends - min_ends) / 2).round().astype(np.int32)
 
             text_tokens = [t for w in words for t in w.tokens if t < tokenizer.eot]
             word_tokens = [[t for t in w.tokens if t < tokenizer.eot] for w in words]
@@ -854,7 +854,7 @@ def update_ts():
                     _p = 0 if idx == 0 else mid_starts[idx-1]
                     mel_segment[row, :, _p:_i] = 0
             orig_probs, orig_tk_poss = get_prob()
-            changes = np.zeros((orig_probs.shape[-1], 3), dtype=int)
+            changes = np.zeros((orig_probs.shape[-1], 3), dtype=np.int32)
             changes[:, -1] = -1
             frame_indices = (mid_ends, max_starts) if is_end_ts else (min_ends, mid_starts)
             for idx, (_s, _e) in enumerate(zip(*frame_indices)):

diff --git a/stable_whisper/stabilization/__init__.py b/stable_whisper/stabilization/__init__.py
@@ -166,8 +166,8 @@ def _vad_probs(
         if self._default_probs:
             assert offset is not None, 'offset is required for default probs'
             sample_offset = offset * self.sampling_rate
-            s = np.floor(sample_offset / self.vad_window).astype(np.int)
-            e = np.ceil((sample_offset + audio.shape[-1]) / self.vad_window).astype(np.int)
+            s = np.floor(sample_offset / self.vad_window).astype(np.int32)
+            e = np.ceil((sample_offset + audio.shape[-1]) / self.vad_window).astype(np.int32)
             new_offset = s * self.vad_window / self.sampling_rate
             return self._default_probs[s:e], new_offset
 

diff --git a/stable_whisper/stabilization/utils.py b/stable_whisper/stabilization/utils.py
@@ -97,8 +97,8 @@ def timing2mask(
     if time_offset:
         silent_starts = (silent_starts - time_offset).clip(min=0)
         silent_ends = (silent_ends - time_offset).clip(min=0)
-    mask_i = (silent_starts * units_per_second).round().astype(np.int)
-    mask_e = (silent_ends * units_per_second).round().astype(np.int)
+    mask_i = (silent_starts * units_per_second).round().astype(np.int32)
+    mask_e = (silent_ends * units_per_second).round().astype(np.int32)
     for mi, me in zip(mask_i, mask_e):
         ts_token_mask[mi:me+1] = True