Using Azure Speech To Text Service where I'm giving input as memory stream but getting error "NOMATCH: Speech c

I'm using Microsoft.CognitiveServices.Speech speech to text service where I'm giving input as

I'm using Microsoft.CognitiveServices.Speech speech to text service where I'm giving input as MemoryStream instead of file input using a custom api. However I get the error "NOMATCH: Speech could not be recognized". The code works when I'm using a file input where I read the file and give the input as FileStream. Here is the code I'm using:

    public static async Task<string> RecognizeSpeechFromStreamAsync(Stream audioStream)
    {
        try
        {
            byte channels = 1;
            byte bitsPerSample = 16;
            uint samplesPerSecond = 16000; // or 8000  
            var audioFormat = AudioStreamFormat.GetWaveFormatPCM(samplesPerSecond, bitsPerSample, channels);

            var contosoStream = new ContosoAudioStream(audioStream);
            var audioConfig = AudioConfig.FromStreamInput(contosoStream, audioFormat);

            var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);
            speechConfig.SpeechRecognitionLanguage = "en-US";

            using (var speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig))
            {
                Console.WriteLine("Starting speech recognition from stream...");
                var speechRecognitionResult = await speechRecognizer.RecognizeOnceAsync();

                if (speechRecognitionResult.Reason == ResultReason.RecognizedSpeech)
                {
                    Console.WriteLine($"RECOGNIZED: Text={speechRecognitionResult.Text}");
                    return speechRecognitionResult.Text;
                }
                else if (speechRecognitionResult.Reason == ResultReason.NoMatch)
                {
                    Console.WriteLine($"NOMATCH: Speech could not be recognized.");
                    return null; // Or an appropriate error message  
                }
                else if (speechRecognitionResult.Reason == ResultReason.Canceled)
                {
                    var cancellation = CancellationDetails.FromResult(speechRecognitionResult);
                    Console.WriteLine($"CANCELED: Reason={cancellation.Reason}");

                    if (cancellation.Reason == CancellationReason.Error)
                    {
                        Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}");
                        Console.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}");
                        Console.WriteLine($"CANCELED: Did you set the speech resource key and region values?");
                        // Consider throwing an exception here to propagate the error  
                    }
                    return null; // Or an appropriate error message  
                }
                else
                {
                    Console.WriteLine($"Unexpected result reason: {speechRecognitionResult.Reason}");
                    return null; // Or an appropriate error message  
                }
            }
        }
        catch (Exception ex)
        {
            Console.Error.WriteLine($"Exception during speech recognition: {ex.Message}");
            return null; // Or throw the exception, depending on your error handling strategy  
        }
    }
}

public class ContosoAudioStream : PullAudioInputStreamCallback
{
    private BinaryReader _reader;
    private int _chunkSize;

    public ContosoAudioStream(Stream audioStream, int chunkSize = 1024)
    {
        _reader = new BinaryReader(audioStream);
        _chunkSize = chunkSize;
    }

    public override int Read(byte[] buffer, uint size)
    {
        try
        {
            byte[] tempBuffer = _reader.ReadBytes((int)Math.Min(size, _chunkSize));
            tempBuffer.CopyTo(buffer, 0);
            return tempBuffer.Length;
        }
        catch (EndOfStreamException)
        {
            return 0; // Signal the end of the stream  
        }
        catch (Exception ex)
        {
            Console.Error.WriteLine($"Error reading from stream: {ex.Message}");
            return 0;
        }
    }

    public override void Close()
    {
        _reader?.Close();
        Console.WriteLine("ContosoAudioStream closed.");
    }
}

I'm using Microsoft.CognitiveServices.Speech speech to text service where I'm giving input as MemoryStream instead of file input using a custom api. However I get the error "NOMATCH: Speech could not be recognized". The code works when I'm using a file input where I read the file and give the input as FileStream. Here is the code I'm using:

    public static async Task<string> RecognizeSpeechFromStreamAsync(Stream audioStream)
    {
        try
        {
            byte channels = 1;
            byte bitsPerSample = 16;
            uint samplesPerSecond = 16000; // or 8000  
            var audioFormat = AudioStreamFormat.GetWaveFormatPCM(samplesPerSecond, bitsPerSample, channels);

            var contosoStream = new ContosoAudioStream(audioStream);
            var audioConfig = AudioConfig.FromStreamInput(contosoStream, audioFormat);

            var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);
            speechConfig.SpeechRecognitionLanguage = "en-US";

            using (var speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig))
            {
                Console.WriteLine("Starting speech recognition from stream...");
                var speechRecognitionResult = await speechRecognizer.RecognizeOnceAsync();

                if (speechRecognitionResult.Reason == ResultReason.RecognizedSpeech)
                {
                    Console.WriteLine($"RECOGNIZED: Text={speechRecognitionResult.Text}");
                    return speechRecognitionResult.Text;
                }
                else if (speechRecognitionResult.Reason == ResultReason.NoMatch)
                {
                    Console.WriteLine($"NOMATCH: Speech could not be recognized.");
                    return null; // Or an appropriate error message  
                }
                else if (speechRecognitionResult.Reason == ResultReason.Canceled)
                {
                    var cancellation = CancellationDetails.FromResult(speechRecognitionResult);
                    Console.WriteLine($"CANCELED: Reason={cancellation.Reason}");

                    if (cancellation.Reason == CancellationReason.Error)
                    {
                        Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}");
                        Console.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}");
                        Console.WriteLine($"CANCELED: Did you set the speech resource key and region values?");
                        // Consider throwing an exception here to propagate the error  
                    }
                    return null; // Or an appropriate error message  
                }
                else
                {
                    Console.WriteLine($"Unexpected result reason: {speechRecognitionResult.Reason}");
                    return null; // Or an appropriate error message  
                }
            }
        }
        catch (Exception ex)
        {
            Console.Error.WriteLine($"Exception during speech recognition: {ex.Message}");
            return null; // Or throw the exception, depending on your error handling strategy  
        }
    }
}

public class ContosoAudioStream : PullAudioInputStreamCallback
{
    private BinaryReader _reader;
    private int _chunkSize;

    public ContosoAudioStream(Stream audioStream, int chunkSize = 1024)
    {
        _reader = new BinaryReader(audioStream);
        _chunkSize = chunkSize;
    }

    public override int Read(byte[] buffer, uint size)
    {
        try
        {
            byte[] tempBuffer = _reader.ReadBytes((int)Math.Min(size, _chunkSize));
            tempBuffer.CopyTo(buffer, 0);
            return tempBuffer.Length;
        }
        catch (EndOfStreamException)
        {
            return 0; // Signal the end of the stream  
        }
        catch (Exception ex)
        {
            Console.Error.WriteLine($"Error reading from stream: {ex.Message}");
            return 0;
        }
    }

    public override void Close()
    {
        _reader?.Close();
        Console.WriteLine("ContosoAudioStream closed.");
    }
}
Share Improve this question edited Mar 26 at 9:59 VLAZ 29.1k9 gold badges63 silver badges84 bronze badges asked Mar 25 at 9:40 Maryam MirzaMaryam Mirza 52 bronze badges 0
Add a comment  | 

1 Answer 1

Reset to default 0

error "NOMATCH: Speech could not be recognized"

I got the same error when I tried with a WAV file with a sample rate of 48,000 Hz.

Use the command below to check the sample rate of your WAV file.

ffmpeg -i <path/to/.wav file>

So, to resolve the issue, I converted my WAV file to 16,000 Hz using the command below and successfully got the speech to text output.

ffmpeg -i "<path/to/.wav file>" -ar 16000 -ac 1 -sample_fmt s16 "<path/to/converted.wav file>"

Code :

using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;

class Program
{
    private static string speechKey = "<SpeechKey>"; 
    private static string speechRegion = "<SpeechKey>"; 

    static async Task Main(string[] args)
    {
        string filePath = "<path/to/.wav file>"; 
        try
        {
            if (!File.Exists(filePath))
            {
                Console.WriteLine("Error: Audio file not found.");
                return;
            }
            byte[] audioData = File.ReadAllBytes(filePath);
            using (var memoryStream = new MemoryStream(audioData))
            {
                string resultText = await RecognizeSpeechFromStreamAsync(memoryStream);
                Console.WriteLine($"Recognition Result: {resultText}");
            }
        }
        catch (Exception ex)
        {
            Console.WriteLine($"Exception: {ex.Message}");
        }
    }

    public static async Task<string> RecognizeSpeechFromStreamAsync(Stream audioStream)
    {
        try
        {
            byte channels = 1;
            byte bitsPerSample = 16;
            uint samplesPerSecond = 16000; 
            var audioFormat = AudioStreamFormat.GetWaveFormatPCM(samplesPerSecond, bitsPerSample, channels);
            var contosoStream = new ContosoAudioStream(audioStream);
            var audioConfig = AudioConfig.FromStreamInput(contosoStream, audioFormat);
            var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);
            speechConfig.SpeechRecognitionLanguage = "en-US";

            using (var speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig))
            {
                Console.WriteLine("Starting speech recognition from stream...");
                var speechRecognitionResult = await speechRecognizer.RecognizeOnceAsync();
                if (speechRecognitionResult.Reason == ResultReason.RecognizedSpeech)
                {
                    Console.WriteLine($"RECOGNIZED: Text={speechRecognitionResult.Text}");
                    return speechRecognitionResult.Text;
                }
                else if (speechRecognitionResult.Reason == ResultReason.NoMatch)
                {
                    Console.WriteLine($"NOMATCH: Speech could not be recognized.");
                    return null;
                }
                else if (speechRecognitionResult.Reason == ResultReason.Canceled)
                {
                    var cancellation = CancellationDetails.FromResult(speechRecognitionResult);
                    Console.WriteLine($"CANCELED: Reason={cancellation.Reason}");

                    if (cancellation.Reason == CancellationReason.Error)
                    {
                        Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}");
                        Console.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}");
                        Console.WriteLine($"CANCELED: Did you set the speech resource key and region values?");
                    }
                    return null;
                }
                else
                {
                    Console.WriteLine($"Unexpected result reason: {speechRecognitionResult.Reason}");
                    return null;
                }
            }
        }
        catch (Exception ex)
        {
            Console.Error.WriteLine($"Exception during speech recognition: {ex.Message}");
            return null;
        }
    }
}

public class ContosoAudioStream : PullAudioInputStreamCallback
{
    private BinaryReader _reader;
    private int _chunkSize;
    public ContosoAudioStream(Stream audioStream, int chunkSize = 1024)
    {
        _reader = new BinaryReader(audioStream);
        _chunkSize = chunkSize;
    }

    public override int Read(byte[] buffer, uint size)
    {
        try
        {
            byte[] tempBuffer = _reader.ReadBytes((int)Math.Min(size, _chunkSize));
            tempBuffer.CopyTo(buffer, 0);
            return tempBuffer.Length;
        }
        catch (EndOfStreamException)
        {
            return 0;  
        }
        catch (Exception ex)
        {
            Console.Error.WriteLine($"Error reading from stream: {ex.Message}");
            return 0;
        }
    }
    public override void Close()
    {
        _reader?.Close();
        Console.WriteLine("ContosoAudioStream closed.");
    }
}

Output :

发布者:admin,转转请注明出处:http://www.yc00.com/questions/1744205963a4563103.html

相关推荐

发表回复

评论列表(0条)

  • 暂无评论

联系我们

400-800-8888

在线咨询: QQ交谈

邮件:admin@example.com

工作时间:周一至周五,9:30-18:30,节假日休息

关注微信