使用 python 进行实时音频信号处理
Real-time audio signal processing using python
我一直在尝试使用 python 中的 'pyAudio' 模块进行实时音频信号处理。我所做的是一个从麦克风读取音频数据并通过耳机播放的简单案例。我尝试使用以下代码(Python 和 Cython 版本)。认为它有效,但不幸的是它停滞而且不够流畅。我怎样才能改进代码,使其 运行 顺利进行。我的电脑是 i7,8GB 内存。
Python版本
import pyaudio
import numpy as np
RATE = 16000
CHUNK = 256
p = pyaudio.PyAudio()
player = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, output=True,
frames_per_buffer=CHUNK)
stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK)
for i in range(int(20*RATE/CHUNK)): #do this for 10 seconds
player.write(np.fromstring(stream.read(CHUNK),dtype=np.int16))
stream.stop_stream()
stream.close()
p.terminate()
Cython 版本
import pyaudio
import numpy as np
cdef int RATE = 16000
cdef int CHUNK = 1024
cdef int i
p = pyaudio.PyAudio()
player = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, output=True, frames_per_buffer=CHUNK)
stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK)
for i in range(500): #do this for 10 seconds
player.write(np.fromstring(stream.read(CHUNK),dtype=np.int16))
stream.stop_stream()
stream.close()
p.terminate()
我相信您缺少 CHUNK
作为 player.write
调用的第二个参数。
player.write(np.fromstring(stream.read(CHUNK),dtype=np.int16),CHUNK)
另外,不确定是不是格式错误。但是 player.write
需要跳转到 for
循环
并且根据 pyaudio site,您需要 RATE / CHUNK * RECORD_SECONDS
而不是 RECORD *RATE/CHUNK
,因为 python
在 /
除法之前执行 *
乘法。
for i in range(int(20*RATE/CHUNK)): #do this for 10 seconds
player.write(np.fromstring(stream.read(CHUNK),dtype=np.int16),CHUNK)
stream.stop_stream()
stream.close()
p.terminate()
最后,您可能希望将 rate
增加到 44100
,将 CHUNK
增加到 1024
并将 CHANNEL
增加到 2
以获得更好的保真度.
下面的代码将采用默认输入设备,并将记录的内容输出到默认输出设备。
import PyAudio
import numpy as np
p = pyaudio.PyAudio()
CHANNELS = 2
RATE = 44100
def callback(in_data, frame_count, time_info, flag):
# using Numpy to convert to array for processing
# audio_data = np.fromstring(in_data, dtype=np.float32)
return in_data, pyaudio.paContinue
stream = p.open(format=pyaudio.paFloat32,
channels=CHANNELS,
rate=RATE,
output=True,
input=True,
stream_callback=callback)
stream.start_stream()
while stream.is_active():
time.sleep(20)
stream.stop_stream()
print("Stream is stopped")
stream.close()
p.terminate()
这将 运行 持续 20 秒然后停止。方法回调是您可以处理信号的地方:
audio_data = np.fromstring(in_data, dtype=np.float32)
return in_data
是您将 post 处理后的数据发送回输出设备的地方。
注意块的默认参数为 1024,如 PyAudio 文档中所述:
http://people.csail.mit.edu/hubert/pyaudio/docs/#pyaudio.PyAudio.open
我正在做一个类似的项目。我修改了你的代码,现在摊位不见了。块越大,延迟越大。这就是我保持低调的原因。
import pyaudio
import numpy as np
CHUNK = 2**5
RATE = 44100
LEN = 10
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK)
player = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, output=True, frames_per_buffer=CHUNK)
for i in range(int(LEN*RATE/CHUNK)): #go for a LEN seconds
data = np.fromstring(stream.read(CHUNK),dtype=np.int16)
player.write(data,CHUNK)
stream.stop_stream()
stream.close()
p.terminate()
我一直在尝试使用 python 中的 'pyAudio' 模块进行实时音频信号处理。我所做的是一个从麦克风读取音频数据并通过耳机播放的简单案例。我尝试使用以下代码(Python 和 Cython 版本)。认为它有效,但不幸的是它停滞而且不够流畅。我怎样才能改进代码,使其 运行 顺利进行。我的电脑是 i7,8GB 内存。
Python版本
import pyaudio
import numpy as np
RATE = 16000
CHUNK = 256
p = pyaudio.PyAudio()
player = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, output=True,
frames_per_buffer=CHUNK)
stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK)
for i in range(int(20*RATE/CHUNK)): #do this for 10 seconds
player.write(np.fromstring(stream.read(CHUNK),dtype=np.int16))
stream.stop_stream()
stream.close()
p.terminate()
Cython 版本
import pyaudio
import numpy as np
cdef int RATE = 16000
cdef int CHUNK = 1024
cdef int i
p = pyaudio.PyAudio()
player = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, output=True, frames_per_buffer=CHUNK)
stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK)
for i in range(500): #do this for 10 seconds
player.write(np.fromstring(stream.read(CHUNK),dtype=np.int16))
stream.stop_stream()
stream.close()
p.terminate()
我相信您缺少 CHUNK
作为 player.write
调用的第二个参数。
player.write(np.fromstring(stream.read(CHUNK),dtype=np.int16),CHUNK)
另外,不确定是不是格式错误。但是 player.write
需要跳转到 for
循环
并且根据 pyaudio site,您需要 RATE / CHUNK * RECORD_SECONDS
而不是 RECORD *RATE/CHUNK
,因为 python
在 /
除法之前执行 *
乘法。
for i in range(int(20*RATE/CHUNK)): #do this for 10 seconds
player.write(np.fromstring(stream.read(CHUNK),dtype=np.int16),CHUNK)
stream.stop_stream()
stream.close()
p.terminate()
最后,您可能希望将 rate
增加到 44100
,将 CHUNK
增加到 1024
并将 CHANNEL
增加到 2
以获得更好的保真度.
下面的代码将采用默认输入设备,并将记录的内容输出到默认输出设备。
import PyAudio
import numpy as np
p = pyaudio.PyAudio()
CHANNELS = 2
RATE = 44100
def callback(in_data, frame_count, time_info, flag):
# using Numpy to convert to array for processing
# audio_data = np.fromstring(in_data, dtype=np.float32)
return in_data, pyaudio.paContinue
stream = p.open(format=pyaudio.paFloat32,
channels=CHANNELS,
rate=RATE,
output=True,
input=True,
stream_callback=callback)
stream.start_stream()
while stream.is_active():
time.sleep(20)
stream.stop_stream()
print("Stream is stopped")
stream.close()
p.terminate()
这将 运行 持续 20 秒然后停止。方法回调是您可以处理信号的地方:
audio_data = np.fromstring(in_data, dtype=np.float32)
return in_data
是您将 post 处理后的数据发送回输出设备的地方。
注意块的默认参数为 1024,如 PyAudio 文档中所述: http://people.csail.mit.edu/hubert/pyaudio/docs/#pyaudio.PyAudio.open
我正在做一个类似的项目。我修改了你的代码,现在摊位不见了。块越大,延迟越大。这就是我保持低调的原因。
import pyaudio
import numpy as np
CHUNK = 2**5
RATE = 44100
LEN = 10
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK)
player = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, output=True, frames_per_buffer=CHUNK)
for i in range(int(LEN*RATE/CHUNK)): #go for a LEN seconds
data = np.fromstring(stream.read(CHUNK),dtype=np.int16)
player.write(data,CHUNK)
stream.stop_stream()
stream.close()
p.terminate()