当检测到静音（JS）时，如何将前面的audio（从麦克风）提取为缓冲区？

我正在使用Google Cloud API进行语音到文本，并使用NodeJS后端。该应用程序需要能够侦听语音命令，并将其作为缓冲区传输到后端。为此，当检测到静音时，我需要发送前一个audio的缓冲区。

任何帮助，将不胜感激。包括下面的js代码

if (!navigator.getUserMedia) navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia || navigator.mozGetUserMedia || navigator.msGetUserMedia; if (navigator.getUserMedia) { navigator.getUserMedia({audio: true}, success, function (e) { alert('Error capturing audio.'); }); } else alert('getUserMedia not supported in this browser.'); var recording = false; window.startRecording = function () { recording = true; }; window.stopRecording = function () { recording = false; // window.Stream.end(); }; function success(e) { audioContext = window.AudioContext || window.webkitAudioContext; context = new audioContext(); // the sample rate is in context.sampleRate audioInput = context.createMediaStreamSource(e); var bufferSize = 4096; recorder = context.createScriptProcessor(bufferSize, 1, 1); recorder.onaudioprocess = function (e) { if (!recording) return; console.log('recording'); var left = e.inputBuffer.getChannelData(0); console.log(convertoFloat32ToInt16(left)); }; audioInput.connect(recorder); recorder.connect(context.destination); }

我不太确定在问题中究竟是什么，所以这个答案只是为了给出一个方法来检测AudioStream中的沉默。

要检测AudioStream中的静音，可以使用AudioAnalyser节点，您将在其中定期调用getByteFrequencyData方法，并检查在给定时间内是否有超出预期级别的声音。

您可以直接使用minDecibels属性设置阈值级别。

 function detectSilence( stream, onSoundEnd = _=>{}, onSoundStart = _=>{}, silence_delay = 500, min_decibels = -80 ) { const ctx = new AudioContext(); const analyser = ctx.createAnalyser(); const streamNode = ctx.createMediaStreamSource(stream); streamNode.connect(analyser); analyser.minDecibels = min_decibels; const data = new Uint8Array(analyser.frequencyBinCount); // will hold our data let silence_start = performance.now(); let triggered = false; // trigger only once per silence event function loop(time) { requestAnimationFrame(loop); // we'll loop every 60th of a second to check analyser.getByteFrequencyData(data); // get current data if (data.some(v => v)) { // if there is data above the given db limit if(triggered){ triggered = false; onSoundStart(); } silence_start = time; // set it to now } if (!triggered && time - silence_start > silence_delay) { onSoundEnd(); triggered = true; } } loop(); } function onSilence() { console.log('silence'); } function onSpeak() { console.log('speaking'); } navigator.mediaDevices.getUserMedia({ audio: true }) .then(stream => { detectSilence(stream, onSilence, onSpeak); // do something else with the stream }) .catch(console.error);

您可以使用SpeechRecognition result事件来确定何时识别出单词或短语，例如ls ， cd ， pwd或其他命令，将.transcript传递给.transcript的附加start和end事件的SpeechSynthesisUtterance在MediaStream传递的MediaRecorder对象上调用.resume() .start()或.resume() ; 使用FileReader或Response.arrayBuffer()将dataavailable事件中的Blob转换为ArrayBuffer 。

我们也可以使用audiostart或audiostart与audioend或soundend事件来logging用户的实际声音，尽pipe结束可能不会一致地发出与仅由标准系统麦克风捕获的audio的实际开始和结束有关。

 <!DOCTYPE html> <html> <head> <title>Speech Recognition Recording</title> </head> <body> <input type="button" value="Stop speech command recognition" id="stop"> <script> navigator.mediaDevices.getUserMedia({ audio: true }) .then(stream => { const recorder = new MediaRecorder(stream); const recognition = new webkitSpeechRecognition(); const synthesis = new SpeechSynthesisUtterance(); const handleResult = e => { recognition.onresult = null; console.log(e.results); const result = e.results[e.results.length - 1]; if (result.isFinal) { const [{transcript}] = result; console.log(transcript); synthesis.text = transcript; window.speechSynthesis.speak(synthesis); } } synthesis.onstart = () => { if (recorder.state === "inactive") { recorder.start() } else { if (recorder.state === "paused") { recorder.resume(); } } } synthesis.onend = () => { recorder.pause(); recorder.requestData(); } recorder.ondataavailable = async(e) => { if (stream.active) { try { const blobURL = URL.createObjectURL(e.data); const request = await fetch(blobURL); const ab = await request.arrayBuffer(); console.log(blobURL, ab); recognition.onresult = handleResult; // URL.revokeObjectURL(blobURL); } catch (err) { throw err } } } recorder.onpause = e => { console.log("recorder " + recorder.state); } recognition.continuous = true; recognition.interimResults = false; recognition.maxAlternatives = 1; recognition.start(); recognition.onend = e => { console.log("recognition ended, stream.active", stream.active); if (stream.active) { console.log(e); // the service disconnects after a period of time recognition.start(); } } recognition.onresult = handleResult; stream.oninactive = () => { console.log("stream ended"); } document.getElementById("stop") .onclick = () => { console.log("stream.active:", stream.active); if (stream && stream.active && recognition) { recognition.abort(); recorder.stop(); for (let track of stream.getTracks()) { track.stop(); } console.log("stream.active:", stream.active); } } }) .catch(err => { console.error(err) }); </script> </body> </html>

plnkr https://plnkr.co/edit/4DVEg6mhFRR94M5gdaIp?p=preview

最简单的方法是使用.pause()和.resume() .stop()方法，以允许用户启动，暂停和停止使用navigator.mediaDevices.getUserMedia()导致Blob到一个ArrayBuffer ，如果这是api期望被POST到服务器

 <!DOCTYPE html> <html> <head> <title>User Media Recording</title> </head> <body> <input type="button" value="Start/resume recording audio" id="start"> <input type="button" value="Pause recording audio" id="pause"> <input type="button" value="Stop recording audio" id="stop"> <script> navigator.mediaDevices.getUserMedia({ audio: true }) .then(stream => { const recorder = new MediaRecorder(stream); recorder.ondataavailable = async(e) => { if (stream.active) { try { const blobURL = URL.createObjectURL(e.data); const request = await fetch(blobURL); const ab = await request.arrayBuffer(); // do stuff with `ArrayBuffer` of recorded audio console.log(blobURL, ab); // we do not need the `Blob URL`, we can revoke the object // URL.revokeObjectURL(blobURL); } catch (err) { throw err } } } recorder.onpause = e => { console.log("recorder " + recorder.state); recorder.requestData(); } stream.oninactive = () => { console.log("stream ended"); } document.getElementById("start") .onclick = () => { if (recorder.state === "inactive") { recorder.start(); } else { recorder.resume(); } console.log("recorder.state:", recorder.state); } document.getElementById("pause") .onclick = () => { if (recorder.state === "recording") { recorder.pause(); } console.log("recorder.state:", recorder.state); } document.getElementById("stop") .onclick = () => { if (recorder.state === "recording" || recorder.state === "paused") { recorder.stop(); } for (let track of stream.getTracks()) { track.stop(); } document.getElementById("start").onclick = null; document.getElementById("pause").onclick = null; console.log("recorder.state:", recorder.state , "stream.active", stream.active); } }) .catch(err => { console.error(err) }); </script> </body> </html>

plnkr https://plnkr.co/edit/7caWYMsvub90G6pwDdQp?p=preview

当检测到静音（JS）时，如何将前面的audio（从麦克风）提取为缓冲区？

带有AudioContext的Socket.io在接收时发送和接收audio错误

从AudioBuffer中的Node.js中写入一个wav文件