格式化并将无缝音频从 JS 发送到本地 PyAudio

Format and send seamless audio from JS to local PyAudio

我正在使用 Unix 套接字将音频从我的麦克风(通过 Electron NodeJS 应用程序)流式传输到 python 程序监听套接字并将音频发送到 PyAudio 进行播放。

电子应用程序 getUserMedia() => WAV 格式 => NodeJS 套接字 => Unix 套接字 => Python 套接字 => PyAudio

我可以正常使用,但每个块开始或结束时都会发出持续的咔哒声。我应该从哪里开始调试它?这是代码:

NodeJS 应用程序 (发件人)

var net = require('net');
const nodeWav = require("node-wav");

var recorder = null;
var volume = null;
var audioInput = null;
var sampleRate = null;
var audioContext = null;
var context = null;
var outputElement = document.getElementById('output');
var outputString;
var bufferSize = 1024;

var mediaSourceIn;

// callback for navigator.mediaDevices.getUserMedia()
function audioReceiver(e) {
    // creates Socket
    mediaSourceIn = e;
    initSocket();
}

var audioSocket;
function initSocket() {
  audioSocket = net.connect('/tmp/audio_input', connected)
  .catch(function(err) {
    console.log("Could not connect...");
    console.log(err);
  });
}

function connected() {
  console.log("CONNECTED TO UNIX SOCKET!");
  audioSocket = this;
  createRecordingTask();
}

function createRecordingTask() {
  // creates the audio context
    audioContext = window.AudioContext || window.webkitAudioContext;
    context = new audioContext();

    // retrieve the current sample rate to be used for WAV packaging
    sampleRate = context.sampleRate;

    // creates a gain node
    volume = context.createGain();

    // creates an audio node from the microphone incoming stream
    audioInput = context.createMediaStreamSource(mediaSourceIn);

    // connect the stream to the gain node
    audioInput.connect(volume);

    /* From the spec: This value controls how frequently the audioprocess event is
    dispatched and how many sample-frames need to be processed each call.
    Lower values for buffer size will result in a lower (better) latency.
    Higher values will be necessary to avoid audio breakup and glitches */
    recorder = context.createScriptProcessor(bufferSize, 2, 2);

    recorder.onaudioprocess = function(e){
        console.log ('recording');
        var left = e.inputBuffer.getChannelData (0);
        var right = e.inputBuffer.getChannelData (1);
        var bf = createAudioBuffer(
          new Float32Array (left),
          new Float32Array (right));

        upload(bf);
    }

    // we connect the recorder
    volume.connect (recorder);
    recorder.connect (context.destination);
}

function mergeBuffers(channelBuffer){
  var result = new Float32Array(bufferSize);
  result.set(channelBuffer); // make a copy?
  return result;
}

function interleave(leftChannel, rightChannel){
  var length = leftChannel.length + rightChannel.length;
  var result = new Float32Array(length);

  var inputIndex = 0;

  for (var index = 0; index < length; ){
    result[index++] = leftChannel[inputIndex];
    result[index++] = rightChannel[inputIndex];
    inputIndex++;
  }
  return result;
}

function writeUTFBytes(view, offset, string){
  var lng = string.length;
  for (var i = 0; i < lng; i++){
    view.setUint8(offset + i, string.charCodeAt(i));
  }
}

function createAudioBuffer(leftchannel, rightchannel) {

  // we flat the left and right channels down
  var leftBuffer = mergeBuffers ( leftchannel, bufferSize );
  var rightBuffer = mergeBuffers ( rightchannel, bufferSize );

  // we interleave both channels together
  var interleaved = interleave ( leftBuffer, rightBuffer );

  // we create our wav file
  var buffer = new ArrayBuffer(44 + interleaved.length * 2);
  //var buffer = new ArrayBuffer(interleaved.length * 2);
  var view = new DataView(buffer);

  // RIFF chunk descriptor
  writeUTFBytes(view, 0, 'RIFF');
  view.setUint32(4, 44 + interleaved.length * 2, true);
  writeUTFBytes(view, 8, 'WAVE');
  // FMT sub-chunk
  writeUTFBytes(view, 12, 'fmt ');
  view.setUint32(16, 16, true);
  view.setUint16(20, 1, true);
  // stereo (2 channels)
  view.setUint16(22, 2, true);
  view.setUint32(24, sampleRate, true);
  view.setUint32(28, sampleRate * 4, true);
  view.setUint16(32, 4, true);
  view.setUint16(34, 16, true);
  // data sub-chunk
  writeUTFBytes(view, 36, 'data');
  view.setUint32(40, interleaved.length * 2, true);
  // write the PCM samples
  var lng = interleaved.length;
  //var index = 0;
  var index = 44;
  var volume = 0.6;
  for (var i = 0; i < lng; i++){
      view.setInt16(index, interleaved[i] * (0x7FFF * volume), true);
      index += 2;
  }
  // our final binary blob
  return Buffer.from(view.buffer);
}


function upload(thatAudio) {
  if (audioSocket.writable) {
    audioSocket.write(thatAudio);
  } else {
    console.log("DISCONNECTED!");
  }
}

Python 程序 (接收器):

import socket
import os
import pyaudio
from threading import Thread

sockfile = "/tmp/audio_input"

FORMAT = pyaudio.paInt16
CHUNK = 1024
CHANNELS = 2
RATE = 44100
frames = []

if os.path.exists(sockfile):
    os.remove(sockfile)

print("Opening socket...")
server = socket.socket( socket.AF_UNIX, socket.SOCK_STREAM )
server.bind(sockfile)
server.listen(5)
conn, addr = server.accept()

print("Creating PyAudio stream...")
p = pyaudio.PyAudio()

stream = p.open(format=FORMAT,
                channels = CHANNELS,
                rate = RATE,
                output = True,
                frames_per_buffer = CHUNK,
                )

print "Listening..."
singleChunkSizeBytes = 44 + (CHUNK * CHANNELS*2)
print singleChunkSizeBytes, "bytes at a time"
while True:
    soundData = conn.recv(singleChunkSizeBytes)
    if soundData:
        stream.write(soundData, CHUNK)

server.close()
os.remove( sockfile )

首先你应该检查 stream.write() 是否导致缓冲区不足。这可能可以通过 exception_on_underflow 选项来完成(参见 docs)。 如果你想要 write() 函数的非抛出版本,你可以尝试 sounddevice module (see its write() docs).

如果有 underrun,这可能意味着套接字提供数据的速度不够快。在这种情况下,您可能应该在接收端实现一些缓冲,例如使用 queue.Queue.

如果没有underrun,错误可能在发送方...

您可以尝试发送新的 Blob 对象

recorder.onaudioprocess = function(e){
    console.log ('recording');
    var left = e.inputBuffer.getChannelData (0);
    var right = e.inputBuffer.getChannelData (1);
    var bf = createAudioBuffer(
      new Float32Array (left),
      new Float32Array (right));
    let wavBlob: Blob = new Blob([bf], { type: 'audio/wav' });
    upload(bf);
}