Skip to content

Commit

Permalink
Constraint the speaker counting only with max_speakers
Browse files Browse the repository at this point in the history
  • Loading branch information
flyingleafe committed Aug 8, 2023
1 parent 9eebf4b commit d49f5fe
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 7 deletions.
5 changes: 2 additions & 3 deletions pyannote/audio/pipelines/speaker_diarization.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,6 @@ def apply(
# estimate frame-level number of instantaneous speakers
count = self.speaker_count(
binarized_segmentations,
max_speakers,
frames=self._frames,
warm_up=(0.0, 0.0),
)
Expand Down Expand Up @@ -547,8 +546,8 @@ def apply(

# during counting, we could possibly overcount the number of instantaneous
# speakers due to segmentation errors, so we cap the maximum instantaneous number
# of speakers by the number of detected clusters
count.data = np.minimum(count.data, num_different_speakers)
# of speakers by the `max_speakers` value
count.data = np.minimum(count.data, max_speakers)

# reconstruct discrete diarization from raw hard clusters

Expand Down
4 changes: 0 additions & 4 deletions pyannote/audio/pipelines/utils/diarization.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ def optimal_mapping(
@staticmethod
def speaker_count(
binarized_segmentations: SlidingWindowFeature,
max_speakers: Union[int, float] = np.inf,
warm_up: Tuple[float, float] = (0.1, 0.1),
frames: SlidingWindow = None,
) -> SlidingWindowFeature:
Expand All @@ -131,8 +130,6 @@ def speaker_count(
----------
binarized_segmentations : SlidingWindowFeature
(num_chunks, num_frames, num_classes)-shaped binarized scores.
max_speakers : int or np.inf
Maximum number of speakers allowed. Counts will not exceed this number
warm_up : (float, float) tuple, optional
Left/right warm up ratio of chunk duration.
Defaults to (0.1, 0.1), i.e. 10% on both sides.
Expand All @@ -155,7 +152,6 @@ def speaker_count(
missing=0.0,
skip_average=False,
)
count.data[count.data > max_speakers] = max_speakers
count.data = np.rint(count.data).astype(np.uint8)

return count
Expand Down

0 comments on commit d49f5fe

Please sign in to comment.