-
Notifications
You must be signed in to change notification settings - Fork 238
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: handle web alternates properly
Before this change multiple phrases were being seen as alternates instead of phrases and the alternates were being ignored.
- Loading branch information
sowens-csd
committed
Oct 23, 2023
1 parent
010a207
commit a6c21b7
Showing
5 changed files
with
199 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
import 'dart:math'; | ||
|
||
import 'package:speech_to_text/speech_recognition_result.dart'; | ||
|
||
class BalancedAlternates { | ||
final Map<int, List<SpeechRecognitionWords>> _alternates = {}; | ||
|
||
/// Add a new phrase to a particular alternate. The way this works is | ||
/// that the first alternate is the most likely, the second alternate is | ||
/// the second most likely, etc. The first alternate is the one that | ||
/// is returned by the speech recognition engine as 'the answer'. Engines | ||
/// may return more than one alternate, but the first one will always | ||
/// contain the most phrases. If a phrase is added to an alternate that | ||
/// | ||
void add(int phrase, String words, double confidence) { | ||
_alternates[phrase] ??= []; | ||
_alternates[phrase]?.add(SpeechRecognitionWords(words, confidence)); | ||
} | ||
|
||
/// Return the full speech recognition results which is the concatenation | ||
/// of all the alternates and all their phrases into separate results. The | ||
/// approach is to concatenate the all phrases from the first, or most likely, | ||
/// alternate. The first is assumed to have the most phrases, since there | ||
/// must be a recognition result for a phrase or it wouldn't have alternates. | ||
/// Then all the phrases for each subsequent alternate are concatenated, any | ||
/// phrase that is missing an alternate has that alternate filled in with the | ||
/// previous alternate. This is done so that the result is a complete | ||
/// transcript of all the alternates. | ||
List<SpeechRecognitionWords> getAlternates() { | ||
final phraseCount = _alternates.length; | ||
var result = <SpeechRecognitionWords>[]; | ||
final maxAlternates = _alternates.values | ||
.fold(0, (max, list) => max = list.length > max ? list.length : max); | ||
for (var phraseIndex = 0; phraseIndex < phraseCount; ++phraseIndex) { | ||
final phraseAlternates = _alternates[phraseIndex] ?? []; | ||
for (var altIndex = max(1, phraseAlternates.length); | ||
altIndex < maxAlternates; | ||
++altIndex) { | ||
phraseAlternates.add(phraseAlternates[altIndex - 1]); | ||
} | ||
} | ||
|
||
for (var altCount = 0; altCount < maxAlternates; ++altCount) { | ||
var alternatePhrase = ''; | ||
var alternateConfidence = 1.0; | ||
for (var phraseIndex = 0; phraseIndex < phraseCount; ++phraseIndex) { | ||
alternatePhrase += _alternates[phraseIndex]![altCount].recognizedWords; | ||
alternateConfidence = min(alternateConfidence, | ||
_alternates[phraseIndex]![altCount].confidence); | ||
} | ||
result.add(SpeechRecognitionWords(alternatePhrase, alternateConfidence)); | ||
} | ||
return result; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
import 'dart:math'; | ||
|
||
import 'package:flutter_test/flutter_test.dart'; | ||
import 'package:speech_to_text/balanced_alternates.dart'; | ||
|
||
void main() { | ||
late BalancedAlternates balancedAlternates; | ||
|
||
setUp(() { | ||
balancedAlternates = BalancedAlternates(); | ||
}); | ||
|
||
test('empty results with no alternates', () async { | ||
expect(balancedAlternates.getAlternates(), isEmpty); | ||
}); | ||
|
||
test('one phrase, no alternates returns that phrase', () async { | ||
balancedAlternates.add(0, 'one', 0.85); | ||
final alts = balancedAlternates.getAlternates(); | ||
expect(alts, hasLength(1)); | ||
expect(alts[0].recognizedWords, 'one'); | ||
expect(alts[0].confidence, 0.85); | ||
}); | ||
|
||
test('one phrase, one alternate returns that phrase and alternate', () async { | ||
balancedAlternates.add(0, 'one', 0.85); | ||
balancedAlternates.add(0, 'an', 0.65); | ||
final alts = balancedAlternates.getAlternates(); | ||
expect(alts, hasLength(2)); | ||
expect(alts[0].recognizedWords, 'one'); | ||
expect(alts[0].confidence, 0.85); | ||
expect(alts[1].recognizedWords, 'an'); | ||
expect(alts[1].confidence, 0.65); | ||
}); | ||
|
||
test('one phrase, two alternates returns that phrase and alternates', | ||
() async { | ||
balancedAlternates.add(0, 'one', 0.85); | ||
balancedAlternates.add(0, 'an', 0.65); | ||
balancedAlternates.add(0, 'and', 0.55); | ||
final alts = balancedAlternates.getAlternates(); | ||
expect(alts, hasLength(3)); | ||
expect(alts[0].recognizedWords, 'one'); | ||
expect(alts[0].confidence, 0.85); | ||
expect(alts[1].recognizedWords, 'an'); | ||
expect(alts[1].confidence, 0.65); | ||
expect(alts[2].recognizedWords, 'and'); | ||
expect(alts[2].confidence, 0.55); | ||
}); | ||
|
||
test('two phrases, no alternates returns concatenated phrase', () async { | ||
balancedAlternates.add(0, 'one ', 0.85); | ||
balancedAlternates.add(1, 'tree', 0.95); | ||
final alts = balancedAlternates.getAlternates(); | ||
expect(alts, hasLength(1)); | ||
expect(alts[0].recognizedWords, 'one tree'); | ||
expect(alts[0].confidence, 0.85); | ||
}); | ||
test('two phrases, one alternate each returns expected', () async { | ||
balancedAlternates.add(0, 'one ', 0.85); | ||
balancedAlternates.add(0, 'an ', 0.65); | ||
balancedAlternates.add(1, 'tree', 0.95); | ||
balancedAlternates.add(1, 'free', 0.35); | ||
final alts = balancedAlternates.getAlternates(); | ||
expect(alts, hasLength(2)); | ||
expect(alts[0].recognizedWords, 'one tree'); | ||
expect(alts[0].confidence, 0.85); | ||
expect(alts[1].recognizedWords, 'an free'); | ||
expect(alts[1].confidence, 0.35); | ||
}); | ||
|
||
test('two phrases, missing alternate for second', () async { | ||
balancedAlternates.add(0, 'one ', 0.85); | ||
balancedAlternates.add(0, 'an ', 0.65); | ||
balancedAlternates.add(1, 'tree', 0.95); | ||
final alts = balancedAlternates.getAlternates(); | ||
expect(alts, hasLength(2)); | ||
expect(alts[0].recognizedWords, 'one tree'); | ||
expect(alts[0].confidence, 0.85); | ||
expect(alts[1].recognizedWords, 'an tree'); | ||
expect(alts[1].confidence, 0.65); | ||
}); | ||
} |