Skip to content

Commit

Permalink
feat: handle web alternates properly
Browse files Browse the repository at this point in the history
Before this change multiple phrases were being
seen as alternates instead of phrases and the
alternates were being ignored.
  • Loading branch information
sowens-csd committed Oct 23, 2023
1 parent 010a207 commit a6c21b7
Show file tree
Hide file tree
Showing 5 changed files with 199 additions and 27 deletions.
32 changes: 20 additions & 12 deletions speech_to_text/example/pubspec.lock
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ packages:
dependency: transitive
description:
name: collection
sha256: "4a07be6cb69c84d677a6c3096fcf960cc3285a8330b4603e0d463d15d9bd934c"
sha256: f092b211a4319e98e5ff58223576de6c2803db36221657b46c82574721240687
url: "https://pub.dev"
source: hosted
version: "1.17.1"
version: "1.17.2"
fake_async:
dependency: transitive
description:
Expand Down Expand Up @@ -100,18 +100,18 @@ packages:
dependency: transitive
description:
name: matcher
sha256: "6501fbd55da300384b768785b83e5ce66991266cec21af89ab9ae7f5ce1c4cbb"
sha256: "1803e76e6653768d64ed8ff2e1e67bea3ad4b923eb5c56a295c3e634bad5960e"
url: "https://pub.dev"
source: hosted
version: "0.12.15"
version: "0.12.16"
material_color_utilities:
dependency: transitive
description:
name: material_color_utilities
sha256: d92141dc6fe1dad30722f9aa826c7fbc896d021d792f80678280601aff8cf724
sha256: "9528f2f296073ff54cb9fee677df673ace1218163c3bc7628093e7eed5203d41"
url: "https://pub.dev"
source: hosted
version: "0.2.0"
version: "0.5.0"
meta:
dependency: transitive
description:
Expand Down Expand Up @@ -169,17 +169,17 @@ packages:
dependency: transitive
description:
name: source_span
sha256: dd904f795d4b4f3b870833847c461801f6750a9fa8e61ea5ac53f9422b31f250
sha256: "53e943d4206a5e30df338fd4c6e7a077e02254531b138a15aec3bd143c1a8b3c"
url: "https://pub.dev"
source: hosted
version: "1.9.1"
version: "1.10.0"
speech_to_text:
dependency: "direct main"
description:
path: ".."
relative: true
source: path
version: "6.2.0"
version: "6.3.0"
speech_to_text_macos:
dependency: transitive
description:
Expand Down Expand Up @@ -232,10 +232,10 @@ packages:
dependency: transitive
description:
name: test_api
sha256: eb6ac1540b26de412b3403a163d919ba86f6a973fe6cc50ae3541b80092fdcfb
sha256: "75760ffd7786fffdfb9597c35c5b27eaeec82be8edfb6d71d32651128ed7aab8"
url: "https://pub.dev"
source: hosted
version: "0.5.1"
version: "0.6.0"
vector_math:
dependency: transitive
description:
Expand All @@ -244,6 +244,14 @@ packages:
url: "https://pub.dev"
source: hosted
version: "2.1.4"
web:
dependency: transitive
description:
name: web
sha256: dc8ccd225a2005c1be616fe02951e2e342092edf968cf0844220383757ef8f10
url: "https://pub.dev"
source: hosted
version: "0.1.4-beta"
sdks:
dart: ">=3.0.0 <4.0.0"
dart: ">=3.1.0-185.0.dev <4.0.0"
flutter: ">=3.10.0"
55 changes: 55 additions & 0 deletions speech_to_text/lib/balanced_alternates.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import 'dart:math';

import 'package:speech_to_text/speech_recognition_result.dart';

class BalancedAlternates {
final Map<int, List<SpeechRecognitionWords>> _alternates = {};

/// Add a new phrase to a particular alternate. The way this works is
/// that the first alternate is the most likely, the second alternate is
/// the second most likely, etc. The first alternate is the one that
/// is returned by the speech recognition engine as 'the answer'. Engines
/// may return more than one alternate, but the first one will always
/// contain the most phrases. If a phrase is added to an alternate that
///
void add(int phrase, String words, double confidence) {
_alternates[phrase] ??= [];
_alternates[phrase]?.add(SpeechRecognitionWords(words, confidence));
}

/// Return the full speech recognition results which is the concatenation
/// of all the alternates and all their phrases into separate results. The
/// approach is to concatenate the all phrases from the first, or most likely,
/// alternate. The first is assumed to have the most phrases, since there
/// must be a recognition result for a phrase or it wouldn't have alternates.
/// Then all the phrases for each subsequent alternate are concatenated, any
/// phrase that is missing an alternate has that alternate filled in with the
/// previous alternate. This is done so that the result is a complete
/// transcript of all the alternates.
List<SpeechRecognitionWords> getAlternates() {
final phraseCount = _alternates.length;
var result = <SpeechRecognitionWords>[];
final maxAlternates = _alternates.values
.fold(0, (max, list) => max = list.length > max ? list.length : max);
for (var phraseIndex = 0; phraseIndex < phraseCount; ++phraseIndex) {
final phraseAlternates = _alternates[phraseIndex] ?? [];
for (var altIndex = max(1, phraseAlternates.length);
altIndex < maxAlternates;
++altIndex) {
phraseAlternates.add(phraseAlternates[altIndex - 1]);
}
}

for (var altCount = 0; altCount < maxAlternates; ++altCount) {
var alternatePhrase = '';
var alternateConfidence = 1.0;
for (var phraseIndex = 0; phraseIndex < phraseCount; ++phraseIndex) {
alternatePhrase += _alternates[phraseIndex]![altCount].recognizedWords;
alternateConfidence = min(alternateConfidence,
_alternates[phraseIndex]![altCount].confidence);
}
result.add(SpeechRecognitionWords(alternatePhrase, alternateConfidence));
}
return result;
}
}
26 changes: 22 additions & 4 deletions speech_to_text/lib/speech_to_text_web.dart
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ import 'dart:async';
import 'dart:convert';
import 'dart:html' as html;
import 'dart:js_util' as js_util;
import 'dart:math';

import 'package:flutter_web_plugins/flutter_web_plugins.dart';
import 'package:speech_to_text/balanced_alternates.dart';
import 'package:speech_to_text/speech_recognition_error.dart';
import 'package:speech_to_text/speech_recognition_result.dart';
import 'package:speech_to_text_platform_interface/speech_to_text_platform_interface.dart';
Expand Down Expand Up @@ -199,21 +201,37 @@ class SpeechToTextPlugin extends SpeechToTextPlatform {
var recogResults = <SpeechRecognitionWords>[];
var results = event.results;
if (null == results) return;
final balanced = BalancedAlternates();
var resultIndex = 0;
var longestAlt = 0;
for (var recognitionResult in results) {
if (null == recognitionResult.length || recognitionResult.length == 0) {
continue;
}
for (var altIndex = 0; altIndex < recognitionResult.length!; ++altIndex) {

for (var altIndex = 0;
altIndex < (recognitionResult.length ?? 0);
++altIndex) {
longestAlt = max(longestAlt, altIndex);
var alt = js_util.callMethod(recognitionResult, 'item', [altIndex]);
if (null == alt) continue;
String? transcript = js_util.getProperty(alt, 'transcript');
num? confidence = js_util.getProperty(alt, 'confidence');
if (null != transcript && null != confidence) {
recogResults
.add(SpeechRecognitionWords(transcript, confidence.toDouble()));
if (null != transcript) {
balanced.add(resultIndex, transcript, confidence?.toDouble() ?? 1.0);
// final fullTranscript =
// recogResults[altIndex].recognizedWords + transcript;
// final fullConfidence = min(
// recogResults[altIndex].confidence, confidence?.toDouble() ?? 1.0);
// recogResults[altIndex] =
// SpeechRecognitionWords(fullTranscript, fullConfidence.toDouble());
// recogResults
// .add(SpeechRecognitionWords(transcript, confidence.toDouble()));
}
}
++resultIndex;
}
recogResults = balanced.getAlternates();
var result = SpeechRecognitionResult(recogResults, isFinal);
onTextRecognition?.call(jsonEncode(result.toJson()));
_resultSent = true;
Expand Down
30 changes: 19 additions & 11 deletions speech_to_text/pubspec.lock
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,10 @@ packages:
dependency: transitive
description:
name: collection
sha256: "4a07be6cb69c84d677a6c3096fcf960cc3285a8330b4603e0d463d15d9bd934c"
sha256: f092b211a4319e98e5ff58223576de6c2803db36221657b46c82574721240687
url: "https://pub.dev"
source: hosted
version: "1.17.1"
version: "1.17.2"
convert:
dependency: transitive
description:
Expand Down Expand Up @@ -316,18 +316,18 @@ packages:
dependency: transitive
description:
name: matcher
sha256: "6501fbd55da300384b768785b83e5ce66991266cec21af89ab9ae7f5ce1c4cbb"
sha256: "1803e76e6653768d64ed8ff2e1e67bea3ad4b923eb5c56a295c3e634bad5960e"
url: "https://pub.dev"
source: hosted
version: "0.12.15"
version: "0.12.16"
material_color_utilities:
dependency: transitive
description:
name: material_color_utilities
sha256: d92141dc6fe1dad30722f9aa826c7fbc896d021d792f80678280601aff8cf724
sha256: "9528f2f296073ff54cb9fee677df673ace1218163c3bc7628093e7eed5203d41"
url: "https://pub.dev"
source: hosted
version: "0.2.0"
version: "0.5.0"
meta:
dependency: "direct main"
description:
Expand Down Expand Up @@ -449,10 +449,10 @@ packages:
dependency: transitive
description:
name: source_span
sha256: dd904f795d4b4f3b870833847c461801f6750a9fa8e61ea5ac53f9422b31f250
sha256: "53e943d4206a5e30df338fd4c6e7a077e02254531b138a15aec3bd143c1a8b3c"
url: "https://pub.dev"
source: hosted
version: "1.9.1"
version: "1.10.0"
speech_to_text_macos:
dependency: "direct main"
description:
Expand Down Expand Up @@ -513,10 +513,10 @@ packages:
dependency: transitive
description:
name: test_api
sha256: eb6ac1540b26de412b3403a163d919ba86f6a973fe6cc50ae3541b80092fdcfb
sha256: "75760ffd7786fffdfb9597c35c5b27eaeec82be8edfb6d71d32651128ed7aab8"
url: "https://pub.dev"
source: hosted
version: "0.5.1"
version: "0.6.0"
timing:
dependency: transitive
description:
Expand Down Expand Up @@ -549,6 +549,14 @@ packages:
url: "https://pub.dev"
source: hosted
version: "1.0.0"
web:
dependency: transitive
description:
name: web
sha256: dc8ccd225a2005c1be616fe02951e2e342092edf968cf0844220383757ef8f10
url: "https://pub.dev"
source: hosted
version: "0.1.4-beta"
web_socket_channel:
dependency: transitive
description:
Expand All @@ -566,5 +574,5 @@ packages:
source: hosted
version: "3.1.0"
sdks:
dart: ">=3.0.0 <4.0.0"
dart: ">=3.1.0-185.0.dev <4.0.0"
flutter: ">=3.10.0"
83 changes: 83 additions & 0 deletions speech_to_text/test/balanced_alternates_test.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import 'dart:math';

import 'package:flutter_test/flutter_test.dart';
import 'package:speech_to_text/balanced_alternates.dart';

void main() {
late BalancedAlternates balancedAlternates;

setUp(() {
balancedAlternates = BalancedAlternates();
});

test('empty results with no alternates', () async {
expect(balancedAlternates.getAlternates(), isEmpty);
});

test('one phrase, no alternates returns that phrase', () async {
balancedAlternates.add(0, 'one', 0.85);
final alts = balancedAlternates.getAlternates();
expect(alts, hasLength(1));
expect(alts[0].recognizedWords, 'one');
expect(alts[0].confidence, 0.85);
});

test('one phrase, one alternate returns that phrase and alternate', () async {
balancedAlternates.add(0, 'one', 0.85);
balancedAlternates.add(0, 'an', 0.65);
final alts = balancedAlternates.getAlternates();
expect(alts, hasLength(2));
expect(alts[0].recognizedWords, 'one');
expect(alts[0].confidence, 0.85);
expect(alts[1].recognizedWords, 'an');
expect(alts[1].confidence, 0.65);
});

test('one phrase, two alternates returns that phrase and alternates',
() async {
balancedAlternates.add(0, 'one', 0.85);
balancedAlternates.add(0, 'an', 0.65);
balancedAlternates.add(0, 'and', 0.55);
final alts = balancedAlternates.getAlternates();
expect(alts, hasLength(3));
expect(alts[0].recognizedWords, 'one');
expect(alts[0].confidence, 0.85);
expect(alts[1].recognizedWords, 'an');
expect(alts[1].confidence, 0.65);
expect(alts[2].recognizedWords, 'and');
expect(alts[2].confidence, 0.55);
});

test('two phrases, no alternates returns concatenated phrase', () async {
balancedAlternates.add(0, 'one ', 0.85);
balancedAlternates.add(1, 'tree', 0.95);
final alts = balancedAlternates.getAlternates();
expect(alts, hasLength(1));
expect(alts[0].recognizedWords, 'one tree');
expect(alts[0].confidence, 0.85);
});
test('two phrases, one alternate each returns expected', () async {
balancedAlternates.add(0, 'one ', 0.85);
balancedAlternates.add(0, 'an ', 0.65);
balancedAlternates.add(1, 'tree', 0.95);
balancedAlternates.add(1, 'free', 0.35);
final alts = balancedAlternates.getAlternates();
expect(alts, hasLength(2));
expect(alts[0].recognizedWords, 'one tree');
expect(alts[0].confidence, 0.85);
expect(alts[1].recognizedWords, 'an free');
expect(alts[1].confidence, 0.35);
});

test('two phrases, missing alternate for second', () async {
balancedAlternates.add(0, 'one ', 0.85);
balancedAlternates.add(0, 'an ', 0.65);
balancedAlternates.add(1, 'tree', 0.95);
final alts = balancedAlternates.getAlternates();
expect(alts, hasLength(2));
expect(alts[0].recognizedWords, 'one tree');
expect(alts[0].confidence, 0.85);
expect(alts[1].recognizedWords, 'an tree');
expect(alts[1].confidence, 0.65);
});
}

0 comments on commit a6c21b7

Please sign in to comment.