Constraint the speaker counting only with max_speakers

pyannote · Aug 8, 2023 · d49f5fe · d49f5fe
1 parent 9eebf4b
commit d49f5fe
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 7 deletions.
diff --git a/pyannote/audio/pipelines/speaker_diarization.py b/pyannote/audio/pipelines/speaker_diarization.py
@@ -490,7 +490,6 @@ def apply(
         # estimate frame-level number of instantaneous speakers
         count = self.speaker_count(
             binarized_segmentations,
-            max_speakers,
             frames=self._frames,
             warm_up=(0.0, 0.0),
         )
@@ -547,8 +546,8 @@ def apply(
 
         # during counting, we could possibly overcount the number of instantaneous
         # speakers due to segmentation errors, so we cap the maximum instantaneous number
-        # of speakers by the number of detected clusters
-        count.data = np.minimum(count.data, num_different_speakers)
+        # of speakers by the `max_speakers` value
+        count.data = np.minimum(count.data, max_speakers)
 
         # reconstruct discrete diarization from raw hard clusters
 

diff --git a/pyannote/audio/pipelines/utils/diarization.py b/pyannote/audio/pipelines/utils/diarization.py
@@ -121,7 +121,6 @@ def optimal_mapping(
     @staticmethod
     def speaker_count(
         binarized_segmentations: SlidingWindowFeature,
-        max_speakers: Union[int, float] = np.inf,
         warm_up: Tuple[float, float] = (0.1, 0.1),
         frames: SlidingWindow = None,
     ) -> SlidingWindowFeature:
@@ -131,8 +130,6 @@ def speaker_count(
         ----------
         binarized_segmentations : SlidingWindowFeature
             (num_chunks, num_frames, num_classes)-shaped binarized scores.
-        max_speakers : int or np.inf
-            Maximum number of speakers allowed. Counts will not exceed this number
         warm_up : (float, float) tuple, optional
             Left/right warm up ratio of chunk duration.
             Defaults to (0.1, 0.1), i.e. 10% on both sides.
@@ -155,7 +152,6 @@ def speaker_count(
             missing=0.0,
             skip_average=False,
         )
-        count.data[count.data > max_speakers] = max_speakers
         count.data = np.rint(count.data).astype(np.uint8)
 
         return count