I am building a webpage that records audio from the user's device and will be sending it to Microsoft's Cognitive Speech Services for a Speech-to-Text conversion. So far, I have been able to create and play back .ogg files made in JavaScript, but I need to get the files in .wav format.
Blob type audio/wav
can't be relied on since not all browsers support it (mine doesn't, at least). The blobs are sent to and stored by a Django server. When I try to open these files with PySoundFile, I get an error that says File contains data in an unknown format
. The blobs are being created with new Blob(chunks, { type: 'audio/ogg; codecs=opus' })
and saved using django.db.FileField
. The blob chunks e from MediaRecorder.ondataavailable
.
UPDATE: I gave up on using MediaRecorder and opted for ScriptProcessorNode instead. Again, Firefox works, but Chrome does not. It seems that Chrome is getting a small portion at the end of the audio and repeating that for the length of the audio. Here is the code that I used, which was based on Matt Diamond's work at github/mattdiamond/Recorderjs. A demo using his work can be seen at webaudiodemos.appspot/AudioRecorder/index.html, which works on both Firefox and Chrome for me. Also, my original code was in a class, but I did not want to include the entire class. I apologize if I made any syntactic errors in translation.
let recBuffers = [[], []];
let recLength = 0;
let numChannels = 2;
let listening = false;
let timeout = null;
let constraints = {
audio: true
};
let failedToGetUserMedia = false;
if (navigator.getUserMedia) {
navigator.getUserMedia(constraints, (stream) => {
init(stream);
}, (err) => {
alert('Unable to access audio.\n\n' + err);
console.log('The following error occurred: ' + err);
failedToGetUserMedia = true;
});
}
else if (navigator.mediaDevices.getUserMedia) {
navigator.mediaDevices.getUserMedia(constraints).then((stream) => {
init(stream);
}).catch((err) => {
alert('Unable to access audio.\n\n' + err);
console.log('The following error occurred: ' + err);
failedToGetUserMedia = true;
});
}
else failedToGetUserMedia = true;
function beginRecording() {
recBuffers = [[], []];
recLength = 0;
listening = true;
timeout = setTimeout(() => {
endRecording();
}, maxTime);
}
function endRecording() {
clearTimeout(timeout);
timeout = null;
exportWAV();
}
function init(stream) {
let audioContext = new AudioContext();
let source = audioContext.createMediaStreamSource(stream);
let context = source.context;
let node = (context.createScriptProcessor || context.createJavaScriptNode).call(context, 4096, numChannels, numChannels);
node.onaudioprocess = (e) => {
if (!listening) return;
for (var i = 0; i < numChannels; i++) {
recBuffers[i].push(e.inputBuffer.getChannelData(i));
}
recLength += recBuffers[0][0].length;
}
source.connect(node);
node.connect(context.destination);
}
function mergeBuffers(buffers, len) {
let result = new Float32Array(len);
let offset = 0;
for (var i = 0; i < buffers.length; i++) {
result.set(buffers[i], offset);
offset += buffers[i].length;
}
return result;
}
function interleave(inputL, inputR) {
let len = inputL.length + inputR.length;
let result = new Float32Array(len);
let index = 0;
let inputIndex = 0;
while (index < len) {
result[index++] = inputL[inputIndex];
result[index++] = inputR[inputIndex];
inputIndex++;
}
return result;
}
function exportWAV() {
let buffers = [];
for (var i = 0; i < numChannels; i++) {
buffers.push(mergeBuffers(recBuffers[i], recLength));
}
let interleaved = numChannels == 2 ? interleave(buffers[0], buffers[1]) : buffers[0];
let dataView = encodeWAV(interleaved);
let blob = new Blob([ dataView ], { type: 'audio/wav' });
blob.name = Math.floor((new Date()).getTime() / 1000) + '.wav';
listening = false;
return blob;
}
function floatTo16BitPCM(output, offset, input){
for (var i = 0; i < input.length; i++, offset+=2){
var s = Math.max(-1, Math.min(1, input[i]));
output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
}
function writeString(view, offset, string){
for (var i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
}
function encodeWAV(samples){
var buffer = new ArrayBuffer(44 + samples.length * 2);
var view = new DataView(buffer);
/* RIFF identifier */
writeString(view, 0, 'RIFF');
/* file length */
view.setUint32(4, 36 + samples.length * 2, true);
/* RIFF type */
writeString(view, 8, 'WAVE');
/* format chunk identifier */
writeString(view, 12, 'fmt ');
/* format chunk length */
view.setUint32(16, 16, true);
/* sample format (raw) */
view.setUint16(20, 1, true);
/* channel count */
view.setUint16(22, numChannels, true);
/* sample rate */
view.setUint32(24, context.sampleRate, true);
/* byte rate (sample rate * block align) */
view.setUint32(28, context.sampleRate * 4, true);
/* block align (channel count * bytes per sample) */
view.setUint16(32, numChannels * 2, true);
/* bits per sample */
view.setUint16(34, 16, true);
/* data chunk identifier */
writeString(view, 36, 'data');
/* data chunk length */
view.setUint32(40, samples.length * 2, true);
floatTo16BitPCM(view, 44, samples);
return view;
}
if (!failedToGetUserMedia) beginRecording();
UPDATE: I have confirmed that when the value of Chrome's buffers is provided as input to the interleave on Firefox, the output is the same as the output of Chrome's. This suggests that Chrome is not filling recBuffers with the correct values. Indeed, when I look at recBuffers on Chrome, each channel is full of alternating lists. For example:
recBuffers = [[
[2, 3],
[7, 1],
[2, 3],
[7, 1],
[2, 3],
[7, 1],
[2, 3],
[7, 1],
[2, 3],
[7, 1]
], [
[5, 4],
[6, 8],
[5, 4],
[6, 8],
[5, 4],
[6, 8],
[5, 4],
[6, 8],
[5, 4],
[6, 8]
]]
Of course, the actual values are different. This is just an example to illustrate the point.
I am building a webpage that records audio from the user's device and will be sending it to Microsoft's Cognitive Speech Services for a Speech-to-Text conversion. So far, I have been able to create and play back .ogg files made in JavaScript, but I need to get the files in .wav format.
Blob type audio/wav
can't be relied on since not all browsers support it (mine doesn't, at least). The blobs are sent to and stored by a Django server. When I try to open these files with PySoundFile, I get an error that says File contains data in an unknown format
. The blobs are being created with new Blob(chunks, { type: 'audio/ogg; codecs=opus' })
and saved using django.db.FileField
. The blob chunks e from MediaRecorder.ondataavailable
.
UPDATE: I gave up on using MediaRecorder and opted for ScriptProcessorNode instead. Again, Firefox works, but Chrome does not. It seems that Chrome is getting a small portion at the end of the audio and repeating that for the length of the audio. Here is the code that I used, which was based on Matt Diamond's work at github./mattdiamond/Recorderjs. A demo using his work can be seen at webaudiodemos.appspot./AudioRecorder/index.html, which works on both Firefox and Chrome for me. Also, my original code was in a class, but I did not want to include the entire class. I apologize if I made any syntactic errors in translation.
let recBuffers = [[], []];
let recLength = 0;
let numChannels = 2;
let listening = false;
let timeout = null;
let constraints = {
audio: true
};
let failedToGetUserMedia = false;
if (navigator.getUserMedia) {
navigator.getUserMedia(constraints, (stream) => {
init(stream);
}, (err) => {
alert('Unable to access audio.\n\n' + err);
console.log('The following error occurred: ' + err);
failedToGetUserMedia = true;
});
}
else if (navigator.mediaDevices.getUserMedia) {
navigator.mediaDevices.getUserMedia(constraints).then((stream) => {
init(stream);
}).catch((err) => {
alert('Unable to access audio.\n\n' + err);
console.log('The following error occurred: ' + err);
failedToGetUserMedia = true;
});
}
else failedToGetUserMedia = true;
function beginRecording() {
recBuffers = [[], []];
recLength = 0;
listening = true;
timeout = setTimeout(() => {
endRecording();
}, maxTime);
}
function endRecording() {
clearTimeout(timeout);
timeout = null;
exportWAV();
}
function init(stream) {
let audioContext = new AudioContext();
let source = audioContext.createMediaStreamSource(stream);
let context = source.context;
let node = (context.createScriptProcessor || context.createJavaScriptNode).call(context, 4096, numChannels, numChannels);
node.onaudioprocess = (e) => {
if (!listening) return;
for (var i = 0; i < numChannels; i++) {
recBuffers[i].push(e.inputBuffer.getChannelData(i));
}
recLength += recBuffers[0][0].length;
}
source.connect(node);
node.connect(context.destination);
}
function mergeBuffers(buffers, len) {
let result = new Float32Array(len);
let offset = 0;
for (var i = 0; i < buffers.length; i++) {
result.set(buffers[i], offset);
offset += buffers[i].length;
}
return result;
}
function interleave(inputL, inputR) {
let len = inputL.length + inputR.length;
let result = new Float32Array(len);
let index = 0;
let inputIndex = 0;
while (index < len) {
result[index++] = inputL[inputIndex];
result[index++] = inputR[inputIndex];
inputIndex++;
}
return result;
}
function exportWAV() {
let buffers = [];
for (var i = 0; i < numChannels; i++) {
buffers.push(mergeBuffers(recBuffers[i], recLength));
}
let interleaved = numChannels == 2 ? interleave(buffers[0], buffers[1]) : buffers[0];
let dataView = encodeWAV(interleaved);
let blob = new Blob([ dataView ], { type: 'audio/wav' });
blob.name = Math.floor((new Date()).getTime() / 1000) + '.wav';
listening = false;
return blob;
}
function floatTo16BitPCM(output, offset, input){
for (var i = 0; i < input.length; i++, offset+=2){
var s = Math.max(-1, Math.min(1, input[i]));
output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
}
function writeString(view, offset, string){
for (var i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
}
function encodeWAV(samples){
var buffer = new ArrayBuffer(44 + samples.length * 2);
var view = new DataView(buffer);
/* RIFF identifier */
writeString(view, 0, 'RIFF');
/* file length */
view.setUint32(4, 36 + samples.length * 2, true);
/* RIFF type */
writeString(view, 8, 'WAVE');
/* format chunk identifier */
writeString(view, 12, 'fmt ');
/* format chunk length */
view.setUint32(16, 16, true);
/* sample format (raw) */
view.setUint16(20, 1, true);
/* channel count */
view.setUint16(22, numChannels, true);
/* sample rate */
view.setUint32(24, context.sampleRate, true);
/* byte rate (sample rate * block align) */
view.setUint32(28, context.sampleRate * 4, true);
/* block align (channel count * bytes per sample) */
view.setUint16(32, numChannels * 2, true);
/* bits per sample */
view.setUint16(34, 16, true);
/* data chunk identifier */
writeString(view, 36, 'data');
/* data chunk length */
view.setUint32(40, samples.length * 2, true);
floatTo16BitPCM(view, 44, samples);
return view;
}
if (!failedToGetUserMedia) beginRecording();
UPDATE: I have confirmed that when the value of Chrome's buffers is provided as input to the interleave on Firefox, the output is the same as the output of Chrome's. This suggests that Chrome is not filling recBuffers with the correct values. Indeed, when I look at recBuffers on Chrome, each channel is full of alternating lists. For example:
recBuffers = [[
[2, 3],
[7, 1],
[2, 3],
[7, 1],
[2, 3],
[7, 1],
[2, 3],
[7, 1],
[2, 3],
[7, 1]
], [
[5, 4],
[6, 8],
[5, 4],
[6, 8],
[5, 4],
[6, 8],
[5, 4],
[6, 8],
[5, 4],
[6, 8]
]]
Of course, the actual values are different. This is just an example to illustrate the point.
Share Improve this question edited Nov 17, 2019 at 1:15 hutch90 asked Nov 10, 2019 at 2:37 hutch90hutch90 3511 gold badge4 silver badges15 bronze badges 10- Rather than converting later, use ScriptProcessorNode to record raw PCM in the first place. – Brad Commented Nov 10, 2019 at 2:44
- I'd like to record it in .wav format in the first place, but my browser does not support audio/wav blob type and, according to developer.mozilla/en-US/docs/Web/API/ScriptProcessorNode, ScriptProcessorNode is deprecated. Thanks for the suggestion. If you have any others, please let me know. – hutch90 Commented Nov 10, 2019 at 2:46
-
Your browser doesn't care what type the blob is... what you're actually referring to is that your browser doesn't support
audio/wav
with MediaRecorder. And, while ScriptProcessorNode is deprecated... barely so. They're not going to remove it until AudioWorkletNode is better supported, at which time you could just use that instead. (Or, more likely,audio/wav
will be supported in your browser first.) It's going to be many years before they actually break ScriptProcessorNode. – Brad Commented Nov 10, 2019 at 2:48 - Can you provide any insight as to how to record the audio in wav format with ScriptProcessorNode? Setting the Blob type to 'audio/wav; codecs=ms_pcm' didn't work, I couldn't get the example at Mozilla's ScriptProcessorNode to work, and I've only been able to get Chrome to play the audio files. PySoundFile can't open them and neither can Windows Media Player. – hutch90 Commented Nov 12, 2019 at 3:26
-
Handle the
audioprocess
event.e.inputBuffer.getChannelData(buffer)
will get you the raw 32-bit PCM data. From there, you can write it to your own WAV file as-is (don't forget the header: soundfile.sapp/doc/WaveFormat ), or convert it from 32-bit float to something like 16-bit little-endian. Also, don't forget to check the sample rate of the audio context and include that in your WAV file. – Brad Commented Nov 12, 2019 at 3:34
1 Answer
Reset to default 5Originally, I was using MediaRecorder to get audio and create a Blob from said audio with type audio/wav
. That was not working in Chrome, but it was in Firefox. I gave up on that and started working with ScriptProcessorNode. Again, it worked on Firefox, but not Chrome. After some debugging, it became clear that on Chrome, recBuffers was getting filled with alternating lists. I'm still not sure why that was happening, but my guess is something like scoping or caching since spread syntax solved it. Changing a line in onaudioprocess from this.recBuffers[i].push(e.inputBuffer.getChannelData(i));
to this.recBuffers[i].push([...e.inputBuffer.getChannelData(i)]);
worked.
发布者:admin,转转请注明出处:http://www.yc00.com/questions/1744084950a4555996.html
评论列表(0条)