I'm using Microsoft.CognitiveServices.Speech speech to text service where I'm giving input as MemoryStream instead of file input using a custom api. However I get the error "NOMATCH: Speech could not be recognized". The code works when I'm using a file input where I read the file and give the input as FileStream. Here is the code I'm using:
public static async Task<string> RecognizeSpeechFromStreamAsync(Stream audioStream)
{
try
{
byte channels = 1;
byte bitsPerSample = 16;
uint samplesPerSecond = 16000; // or 8000
var audioFormat = AudioStreamFormat.GetWaveFormatPCM(samplesPerSecond, bitsPerSample, channels);
var contosoStream = new ContosoAudioStream(audioStream);
var audioConfig = AudioConfig.FromStreamInput(contosoStream, audioFormat);
var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);
speechConfig.SpeechRecognitionLanguage = "en-US";
using (var speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig))
{
Console.WriteLine("Starting speech recognition from stream...");
var speechRecognitionResult = await speechRecognizer.RecognizeOnceAsync();
if (speechRecognitionResult.Reason == ResultReason.RecognizedSpeech)
{
Console.WriteLine($"RECOGNIZED: Text={speechRecognitionResult.Text}");
return speechRecognitionResult.Text;
}
else if (speechRecognitionResult.Reason == ResultReason.NoMatch)
{
Console.WriteLine($"NOMATCH: Speech could not be recognized.");
return null; // Or an appropriate error message
}
else if (speechRecognitionResult.Reason == ResultReason.Canceled)
{
var cancellation = CancellationDetails.FromResult(speechRecognitionResult);
Console.WriteLine($"CANCELED: Reason={cancellation.Reason}");
if (cancellation.Reason == CancellationReason.Error)
{
Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}");
Console.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}");
Console.WriteLine($"CANCELED: Did you set the speech resource key and region values?");
// Consider throwing an exception here to propagate the error
}
return null; // Or an appropriate error message
}
else
{
Console.WriteLine($"Unexpected result reason: {speechRecognitionResult.Reason}");
return null; // Or an appropriate error message
}
}
}
catch (Exception ex)
{
Console.Error.WriteLine($"Exception during speech recognition: {ex.Message}");
return null; // Or throw the exception, depending on your error handling strategy
}
}
}
public class ContosoAudioStream : PullAudioInputStreamCallback
{
private BinaryReader _reader;
private int _chunkSize;
public ContosoAudioStream(Stream audioStream, int chunkSize = 1024)
{
_reader = new BinaryReader(audioStream);
_chunkSize = chunkSize;
}
public override int Read(byte[] buffer, uint size)
{
try
{
byte[] tempBuffer = _reader.ReadBytes((int)Math.Min(size, _chunkSize));
tempBuffer.CopyTo(buffer, 0);
return tempBuffer.Length;
}
catch (EndOfStreamException)
{
return 0; // Signal the end of the stream
}
catch (Exception ex)
{
Console.Error.WriteLine($"Error reading from stream: {ex.Message}");
return 0;
}
}
public override void Close()
{
_reader?.Close();
Console.WriteLine("ContosoAudioStream closed.");
}
}
I'm using Microsoft.CognitiveServices.Speech speech to text service where I'm giving input as MemoryStream instead of file input using a custom api. However I get the error "NOMATCH: Speech could not be recognized". The code works when I'm using a file input where I read the file and give the input as FileStream. Here is the code I'm using:
public static async Task<string> RecognizeSpeechFromStreamAsync(Stream audioStream)
{
try
{
byte channels = 1;
byte bitsPerSample = 16;
uint samplesPerSecond = 16000; // or 8000
var audioFormat = AudioStreamFormat.GetWaveFormatPCM(samplesPerSecond, bitsPerSample, channels);
var contosoStream = new ContosoAudioStream(audioStream);
var audioConfig = AudioConfig.FromStreamInput(contosoStream, audioFormat);
var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);
speechConfig.SpeechRecognitionLanguage = "en-US";
using (var speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig))
{
Console.WriteLine("Starting speech recognition from stream...");
var speechRecognitionResult = await speechRecognizer.RecognizeOnceAsync();
if (speechRecognitionResult.Reason == ResultReason.RecognizedSpeech)
{
Console.WriteLine($"RECOGNIZED: Text={speechRecognitionResult.Text}");
return speechRecognitionResult.Text;
}
else if (speechRecognitionResult.Reason == ResultReason.NoMatch)
{
Console.WriteLine($"NOMATCH: Speech could not be recognized.");
return null; // Or an appropriate error message
}
else if (speechRecognitionResult.Reason == ResultReason.Canceled)
{
var cancellation = CancellationDetails.FromResult(speechRecognitionResult);
Console.WriteLine($"CANCELED: Reason={cancellation.Reason}");
if (cancellation.Reason == CancellationReason.Error)
{
Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}");
Console.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}");
Console.WriteLine($"CANCELED: Did you set the speech resource key and region values?");
// Consider throwing an exception here to propagate the error
}
return null; // Or an appropriate error message
}
else
{
Console.WriteLine($"Unexpected result reason: {speechRecognitionResult.Reason}");
return null; // Or an appropriate error message
}
}
}
catch (Exception ex)
{
Console.Error.WriteLine($"Exception during speech recognition: {ex.Message}");
return null; // Or throw the exception, depending on your error handling strategy
}
}
}
public class ContosoAudioStream : PullAudioInputStreamCallback
{
private BinaryReader _reader;
private int _chunkSize;
public ContosoAudioStream(Stream audioStream, int chunkSize = 1024)
{
_reader = new BinaryReader(audioStream);
_chunkSize = chunkSize;
}
public override int Read(byte[] buffer, uint size)
{
try
{
byte[] tempBuffer = _reader.ReadBytes((int)Math.Min(size, _chunkSize));
tempBuffer.CopyTo(buffer, 0);
return tempBuffer.Length;
}
catch (EndOfStreamException)
{
return 0; // Signal the end of the stream
}
catch (Exception ex)
{
Console.Error.WriteLine($"Error reading from stream: {ex.Message}");
return 0;
}
}
public override void Close()
{
_reader?.Close();
Console.WriteLine("ContosoAudioStream closed.");
}
}
Share
Improve this question
edited Mar 26 at 9:59
VLAZ
29.1k9 gold badges63 silver badges84 bronze badges
asked Mar 25 at 9:40
Maryam MirzaMaryam Mirza
52 bronze badges
0
1 Answer
Reset to default 0error "NOMATCH: Speech could not be recognized"
I got the same error when I tried with a WAV file with a sample rate of 48,000 Hz.
Use the command below to check the sample rate of your WAV file.
ffmpeg -i <path/to/.wav file>
So, to resolve the issue, I converted my WAV file to 16,000 Hz using the command below and successfully got the speech to text output.
ffmpeg -i "<path/to/.wav file>" -ar 16000 -ac 1 -sample_fmt s16 "<path/to/converted.wav file>"
Code :
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
class Program
{
private static string speechKey = "<SpeechKey>";
private static string speechRegion = "<SpeechKey>";
static async Task Main(string[] args)
{
string filePath = "<path/to/.wav file>";
try
{
if (!File.Exists(filePath))
{
Console.WriteLine("Error: Audio file not found.");
return;
}
byte[] audioData = File.ReadAllBytes(filePath);
using (var memoryStream = new MemoryStream(audioData))
{
string resultText = await RecognizeSpeechFromStreamAsync(memoryStream);
Console.WriteLine($"Recognition Result: {resultText}");
}
}
catch (Exception ex)
{
Console.WriteLine($"Exception: {ex.Message}");
}
}
public static async Task<string> RecognizeSpeechFromStreamAsync(Stream audioStream)
{
try
{
byte channels = 1;
byte bitsPerSample = 16;
uint samplesPerSecond = 16000;
var audioFormat = AudioStreamFormat.GetWaveFormatPCM(samplesPerSecond, bitsPerSample, channels);
var contosoStream = new ContosoAudioStream(audioStream);
var audioConfig = AudioConfig.FromStreamInput(contosoStream, audioFormat);
var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);
speechConfig.SpeechRecognitionLanguage = "en-US";
using (var speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig))
{
Console.WriteLine("Starting speech recognition from stream...");
var speechRecognitionResult = await speechRecognizer.RecognizeOnceAsync();
if (speechRecognitionResult.Reason == ResultReason.RecognizedSpeech)
{
Console.WriteLine($"RECOGNIZED: Text={speechRecognitionResult.Text}");
return speechRecognitionResult.Text;
}
else if (speechRecognitionResult.Reason == ResultReason.NoMatch)
{
Console.WriteLine($"NOMATCH: Speech could not be recognized.");
return null;
}
else if (speechRecognitionResult.Reason == ResultReason.Canceled)
{
var cancellation = CancellationDetails.FromResult(speechRecognitionResult);
Console.WriteLine($"CANCELED: Reason={cancellation.Reason}");
if (cancellation.Reason == CancellationReason.Error)
{
Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}");
Console.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}");
Console.WriteLine($"CANCELED: Did you set the speech resource key and region values?");
}
return null;
}
else
{
Console.WriteLine($"Unexpected result reason: {speechRecognitionResult.Reason}");
return null;
}
}
}
catch (Exception ex)
{
Console.Error.WriteLine($"Exception during speech recognition: {ex.Message}");
return null;
}
}
}
public class ContosoAudioStream : PullAudioInputStreamCallback
{
private BinaryReader _reader;
private int _chunkSize;
public ContosoAudioStream(Stream audioStream, int chunkSize = 1024)
{
_reader = new BinaryReader(audioStream);
_chunkSize = chunkSize;
}
public override int Read(byte[] buffer, uint size)
{
try
{
byte[] tempBuffer = _reader.ReadBytes((int)Math.Min(size, _chunkSize));
tempBuffer.CopyTo(buffer, 0);
return tempBuffer.Length;
}
catch (EndOfStreamException)
{
return 0;
}
catch (Exception ex)
{
Console.Error.WriteLine($"Error reading from stream: {ex.Message}");
return 0;
}
}
public override void Close()
{
_reader?.Close();
Console.WriteLine("ContosoAudioStream closed.");
}
}
Output :
发布者:admin,转转请注明出处:http://www.yc00.com/questions/1744205963a4563103.html
评论列表(0条)