Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Robustify speech recognition test #76652

Merged
merged 4 commits into from
Oct 6, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 112 additions & 19 deletions src/libraries/System.Speech/tests/SynthesizeRecognizeTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
using System.Threading;
using System.Xml;
using Xunit;
using Xunit.Abstractions;

namespace SampleSynthesisTests
{
Expand All @@ -26,33 +27,125 @@ public class SynthesizeRecognizeTests : FileCleanupTestBase
PlatformDetection.IsNotWindowsNanoNorServerCore &&
SpeechRecognitionEngine.InstalledRecognizers().Count > 0;

private ITestOutputHelper _output;

public SynthesizeRecognizeTests(ITestOutputHelper output)
{
_output = output;
}

[ConditionalFact(nameof(HasInstalledRecognizers))]
public void SpeechSynthesizerToSpeechRecognitionEngine()
public void SpeechSynthesizerToSpeechRecognitionEngine1()
{
if (Thread.CurrentThread.CurrentCulture.ToString() != "en-US")
return;
// word chosen to be recognized with high confidence
SpeechSynthesizerToSpeechRecognitionEngine_Core("recognize", "recognize");
}

using var ms = new MemoryStream();
[ConditionalFact(nameof(HasInstalledRecognizers))]
public void SpeechSynthesizerToSpeechRecognitionEngine2()
{
// word chosen to be recognized with high confidence
SpeechSynthesizerToSpeechRecognitionEngine_Core("apple", "apple");
}

using (var synth = new SpeechSynthesizer())
{
synth.SetOutputToWaveStream(ms);
var prompt = new Prompt("synthesizer");
synth.Speak(prompt);
}
[ConditionalFact(nameof(HasInstalledRecognizers))]
public void SpeechSynthesizerToSpeechRecognitionEngine_SilenceFails()
{
SpeechSynthesizerToSpeechRecognitionEngine_Core(" ", null);
}

ms.Position = 0;
private void SpeechSynthesizerToSpeechRecognitionEngine_Core(string input, string output)
{
if (PlatformDetection.IsWindows7 && PlatformDetection.IsX86Process)
return; // Flaky on this configuration

using (var rec = new SpeechRecognitionEngine())
RetryHelper.Execute(() => // Flaky in some cases
{
rec.LoadGrammar(new DictationGrammar());
rec.SetInputToWaveStream(ms);
RecognitionResult result = rec.Recognize();
if (Thread.CurrentThread.CurrentCulture.ToString() != "en-US")
danmoseley marked this conversation as resolved.
Show resolved Hide resolved
return;

Assert.True(result.Confidence > 0.1);
// handles "synthesizer", "synthesizes", etc.
Assert.StartsWith("synthe", result.Text, StringComparison.OrdinalIgnoreCase);
}
using var ms = new MemoryStream();

using (var synth = new SpeechSynthesizer())
{
synth.SetOutputToWaveStream(ms);
var prompt = new Prompt(input);
synth.Speak(prompt);
}

ms.Position = 0;

using (var rec = new SpeechRecognitionEngine())
{
Stopwatch sw = new();
rec.LoadGrammar(new DictationGrammar());
rec.SetInputToWaveStream(ms);
rec.InitialSilenceTimeout = TimeSpan.FromSeconds(60); // for slow machines
rec.BabbleTimeout = TimeSpan.FromSeconds(60); // for slow machines/robustness

StringBuilder diagnostics = new();
diagnostics.AppendLine($"Passing synthesized input '{input}'");
try
{
rec.SpeechDetected += (o, args) =>
{
diagnostics.AppendLine($"Speech detected at position {args.AudioPosition}");
};

rec.SpeechRecognitionRejected += (o, args) =>
{
if (output != null)
{
foreach (RecognizedPhrase phrase in args.Result.Alternates)
{
diagnostics.AppendLine($"Alternatives included '{phrase.Text}' with confidence {phrase.Confidence}");
}
diagnostics.Append($"Elapsed {sw.Elapsed}");
Assert.Fail($"Recognition of '{input}' was expected to produce a string containing '{output}', but failed");
}
};

RecognitionResult argsResult = null;
rec.SpeechRecognized += (o, args) =>
{
argsResult = args.Result;
diagnostics.AppendLine($"Received speech recognized event with result '{args.Result.Text}'");
};

sw.Start();
RecognitionResult result = rec.Recognize();
sw.Stop();

Assert.Equal(argsResult, result);

if (output == null)
{
Assert.Null(result);
}
else
{
Assert.NotNull(result);
diagnostics.AppendLine($"Recognized '{result.Text}' with confidence {result.Confidence}");
diagnostics.AppendLine($"Elapsed {sw.Elapsed}");

foreach (RecognizedPhrase phrase in result.Alternates)
{
diagnostics.AppendLine($"Alternatives included '{phrase.Text}' with confidence {phrase.Confidence}");
}

Assert.True(result.Confidence > 0.1); // strings we use are normally > 0.8

// Use Contains as sometimes we get garbage on the end, eg., "recognize" can be "recognized" or "a recognize"
Assert.Contains(output, result.Text, StringComparison.OrdinalIgnoreCase);
}
}
catch
{
_output.WriteLine(diagnostics.ToString());
throw;
}
}
});
}

[ConditionalFact(nameof(HasInstalledRecognizers))]
Expand Down