Skip to content

Commit

Permalink
word timing tweaks (openai#1559)
Browse files Browse the repository at this point in the history
* word timing tweaks

* comment on eot

* clearer comments
  • Loading branch information
taylorchu authored and abyesilyurt committed Nov 13, 2023
1 parent 3f59ccf commit 02db96b
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions whisper/timing.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,13 @@ def find_alignment(
text_indices, time_indices = dtw(-matrix)

words, word_tokens = tokenizer.split_to_word_tokens(text_tokens + [tokenizer.eot])
if len(word_tokens) <= 1:
# return on eot only
# >>> np.pad([], (1, 0))
# array([0.])
# This results in crashes when we lookup jump_times with float, like
# IndexError: arrays used as indices must be of integer (or boolean) type
return []
word_boundaries = np.pad(np.cumsum([len(t) for t in word_tokens[:-1]]), (1, 0))

jumps = np.pad(np.diff(text_indices), (1, 0), constant_values=1).astype(bool)
Expand Down Expand Up @@ -297,8 +304,6 @@ def add_word_timestamps(
# hack: truncate long words at sentence boundaries.
# a better segmentation algorithm based on VAD should be able to replace this.
if len(word_durations) > 0:
median_duration = np.median(word_durations)
max_duration = median_duration * 2
sentence_end_marks = ".。!!??"
# ensure words at sentence boundaries are not longer than twice the median word duration.
for i in range(1, len(alignment)):
Expand Down

0 comments on commit 02db96b

Please sign in to comment.