存储来自 QAudioInput 的音频并将其传递给 SciPy FFT
Store audio from QAudioInput and pass it to SciPy FFT
如何从 QAudioInput 实时获取音频输入,将其存储在 NumPy 数组中并传递给 SciPy FFT?我尝试过的:
from PyQt5.QtMultimedia import QAudioDeviceInfo, QAudioFormat, QAudioInput
import sys
class Window(QMainWindow):
def __init__(self):
info = QAudioDeviceInfo()
input_device = info.defaultInputDevice()
if input_device.isNull():
# If no avaiable device is found, we display a error
print("There is no audio input device available.")
exit(-1)
audio_format = QAudioFormat()
audio_format.setSampleRate(44100)
audio_format.setSampleSize(8)
audio_format.setChannelCount(1)
audio_format.setCodec("audio/pcm")
audio_format.setSampleType(QAudioFormat.UnSignedInt)
if sys.byteorder == "little":
audio_format.setByteOrder(QAudioFormat.LittleEndian)
else:
audio_format.setByteOrder(QAudioFormat.BigEndian)
self.audioInput = QAudioInput(input_device, audio_format, self)
self.ioDevice = self.audioInput.start()
self.ioDevice.readyRead.connect(self.read_audio)
def read_audio(self):
data: QByteArray = self.ioDevice.readAll()
print(data.toUInt()) # Prints (0, False) which means error converting data
data.toUInt()
将整个字节数组转换为一个 uint 值——这不是您想要的。要获取示例值,您可以使用 numpy.frombuffer
或 struct.unpack
.
import numpy
def read_audio(self):
data = self.ioDevice.readAll()
values = numpy.frombuffer(data.data(), dtype=numpy.uint8)
或
import struct
def read_audio(self):
data = self.ioDevice.readAll()
fmt = "@{}B".format(data.size())
values = struct.unpack(fmt, data.data())
受官方示例的启发Audio Example我创建了一个QIODevice,可以获取数据。以下示例通过计算其 fft 并使用 matplotlib 显示它,每隔 T 秒获取最后 N 个样本。
import sys
import collections
from functools import cached_property
from PyQt5.QtCore import QIODevice, QObject, pyqtSignal, QTimer
from PyQt5.QtMultimedia import QAudioDeviceInfo, QAudioFormat, QAudioInput
from PyQt5.QtWidgets import QApplication, QMainWindow
from matplotlib.backends.backend_qt5agg import FigureCanvas
from matplotlib.figure import Figure
from scipy.fft import fft, fftfreq
import numpy as np
FS = 44100
SAMPLE_COUNT = 2 * 1000
class AudioDevice(QIODevice):
data_changed = pyqtSignal(list, name="dataChanged")
def __init__(self, interval=1000, parent: QObject = None):
super().__init__(parent)
self.m_buffer = collections.deque(
[0 for _ in range(SAMPLE_COUNT)], maxlen=SAMPLE_COUNT
)
self.timer.timeout.connect(self.send_data)
self.timer.setInterval(interval)
self.timer.start()
@cached_property
def timer(self):
return QTimer()
def send_data(self):
self.data_changed.emit(list(self.m_buffer))
def readData(self, data, max_size):
return -1
def writeData(self, data):
max_size = len(data)
resolution = 4
start = 0
available_samples = int(max_size) // resolution
if available_samples < self.m_buffer.maxlen:
start = self.m_buffer.maxlen - available_samples
pos = 0
for _ in range(start, self.m_buffer.maxlen):
y = (1.0 * (data[pos] - 128)) / 128.0
self.m_buffer.append(y)
pos += resolution
return (self.m_buffer.maxlen - start) * resolution
class PlotWidget(QMainWindow):
def __init__(self, parent=None):
super().__init__(parent)
self.canvas = FigureCanvas(Figure(figsize=(5, 3)))
self.setCentralWidget(self.canvas)
self.ax = self.canvas.figure.subplots()
self._line = None
def update_data(self, data):
T = 1 / FS
N = SAMPLE_COUNT
yf = fft(data)
xf = fftfreq(N, T)[: N // 2]
x = xf
y = 2.0 / N * np.abs(yf[0 : N // 2])
if self._line is None:
(self._line,) = self.ax.plot(x, y)
else:
self._line.set_data(x, y)
self.canvas.draw()
def main(args):
app = QApplication(args)
plot_widget = PlotWidget()
plot_widget.resize(640, 480)
plot_widget.show()
info = QAudioDeviceInfo()
input_device = info.defaultInputDevice()
if input_device.isNull():
print("There is no audio input device available.")
exit(-1)
audio_format = QAudioFormat()
audio_format.setSampleRate(FS)
audio_format.setSampleSize(8)
audio_format.setChannelCount(1)
audio_format.setCodec("audio/pcm")
audio_format.setSampleType(QAudioFormat.UnSignedInt)
if sys.byteorder == "little":
audio_format.setByteOrder(QAudioFormat.LittleEndian)
else:
audio_format.setByteOrder(QAudioFormat.BigEndian)
audio_input = QAudioInput(input_device, audio_format, None)
audio_device = AudioDevice(interval=100)
audio_device.data_changed.connect(plot_widget.update_data)
audio_device.open(QIODevice.WriteOnly)
audio_input.start(audio_device)
app.exec_()
if __name__ == "__main__":
main(sys.argv)
我添加了显示波形的小部件,以证明样本实际上反映了来自麦克风的信号 - 而不是随机数。
from PyQt5.QtMultimedia import QAudioDeviceInfo, QAudioFormat, QAudioInput
from PyQt5.QtWidgets import QMainWindow, QApplication, QWidget
from PyQt5.QtGui import QPainter, QPolygonF
from PyQt5.QtCore import QPointF
import sys
import numpy
class WaveWidget(QWidget):
def __init__(self, parent = None):
super().__init__(parent)
self._values = None
def setValues(self, values):
self._values = values
self.update()
def paintEvent(self, event):
if self._values is None:
return
painter = QPainter(self)
ys = self._values / 255 * self.height()
xs = numpy.linspace(0, self.width(), num = len(ys))
points = QPolygonF([QPointF(x,y) for x,y in zip(xs,ys)])
painter.drawPolyline(points)
class Window(QMainWindow):
def __init__(self):
super().__init__()
info = QAudioDeviceInfo()
input_device = info.defaultInputDevice()
if input_device.isNull():
# If no avaiable device is found, we display a error
print("There is no audio input device available.")
exit(-1)
audio_format = QAudioFormat()
audio_format.setSampleRate(44100)
audio_format.setSampleSize(8)
audio_format.setChannelCount(1)
audio_format.setCodec("audio/pcm")
audio_format.setSampleType(QAudioFormat.UnSignedInt)
if sys.byteorder == "little":
audio_format.setByteOrder(QAudioFormat.LittleEndian)
else:
audio_format.setByteOrder(QAudioFormat.BigEndian)
self.audioInput = QAudioInput(input_device, audio_format, self)
self.ioDevice = self.audioInput.start()
self.ioDevice.readyRead.connect(self.read_audio)
widget = WaveWidget()
self._widget = widget
self.setCentralWidget(widget)
def read_audio(self):
data = self.ioDevice.readAll()
values = numpy.frombuffer(data.data(), dtype=numpy.uint8)
self._widget.setValues(values)
if __name__ == "__main__":
app = QApplication([])
window = Window()
window.show()
app.exec()
如何从 QAudioInput 实时获取音频输入,将其存储在 NumPy 数组中并传递给 SciPy FFT?我尝试过的:
from PyQt5.QtMultimedia import QAudioDeviceInfo, QAudioFormat, QAudioInput
import sys
class Window(QMainWindow):
def __init__(self):
info = QAudioDeviceInfo()
input_device = info.defaultInputDevice()
if input_device.isNull():
# If no avaiable device is found, we display a error
print("There is no audio input device available.")
exit(-1)
audio_format = QAudioFormat()
audio_format.setSampleRate(44100)
audio_format.setSampleSize(8)
audio_format.setChannelCount(1)
audio_format.setCodec("audio/pcm")
audio_format.setSampleType(QAudioFormat.UnSignedInt)
if sys.byteorder == "little":
audio_format.setByteOrder(QAudioFormat.LittleEndian)
else:
audio_format.setByteOrder(QAudioFormat.BigEndian)
self.audioInput = QAudioInput(input_device, audio_format, self)
self.ioDevice = self.audioInput.start()
self.ioDevice.readyRead.connect(self.read_audio)
def read_audio(self):
data: QByteArray = self.ioDevice.readAll()
print(data.toUInt()) # Prints (0, False) which means error converting data
data.toUInt()
将整个字节数组转换为一个 uint 值——这不是您想要的。要获取示例值,您可以使用 numpy.frombuffer
或 struct.unpack
.
import numpy
def read_audio(self):
data = self.ioDevice.readAll()
values = numpy.frombuffer(data.data(), dtype=numpy.uint8)
或
import struct
def read_audio(self):
data = self.ioDevice.readAll()
fmt = "@{}B".format(data.size())
values = struct.unpack(fmt, data.data())
受官方示例的启发Audio Example我创建了一个QIODevice,可以获取数据。以下示例通过计算其 fft 并使用 matplotlib 显示它,每隔 T 秒获取最后 N 个样本。
import sys
import collections
from functools import cached_property
from PyQt5.QtCore import QIODevice, QObject, pyqtSignal, QTimer
from PyQt5.QtMultimedia import QAudioDeviceInfo, QAudioFormat, QAudioInput
from PyQt5.QtWidgets import QApplication, QMainWindow
from matplotlib.backends.backend_qt5agg import FigureCanvas
from matplotlib.figure import Figure
from scipy.fft import fft, fftfreq
import numpy as np
FS = 44100
SAMPLE_COUNT = 2 * 1000
class AudioDevice(QIODevice):
data_changed = pyqtSignal(list, name="dataChanged")
def __init__(self, interval=1000, parent: QObject = None):
super().__init__(parent)
self.m_buffer = collections.deque(
[0 for _ in range(SAMPLE_COUNT)], maxlen=SAMPLE_COUNT
)
self.timer.timeout.connect(self.send_data)
self.timer.setInterval(interval)
self.timer.start()
@cached_property
def timer(self):
return QTimer()
def send_data(self):
self.data_changed.emit(list(self.m_buffer))
def readData(self, data, max_size):
return -1
def writeData(self, data):
max_size = len(data)
resolution = 4
start = 0
available_samples = int(max_size) // resolution
if available_samples < self.m_buffer.maxlen:
start = self.m_buffer.maxlen - available_samples
pos = 0
for _ in range(start, self.m_buffer.maxlen):
y = (1.0 * (data[pos] - 128)) / 128.0
self.m_buffer.append(y)
pos += resolution
return (self.m_buffer.maxlen - start) * resolution
class PlotWidget(QMainWindow):
def __init__(self, parent=None):
super().__init__(parent)
self.canvas = FigureCanvas(Figure(figsize=(5, 3)))
self.setCentralWidget(self.canvas)
self.ax = self.canvas.figure.subplots()
self._line = None
def update_data(self, data):
T = 1 / FS
N = SAMPLE_COUNT
yf = fft(data)
xf = fftfreq(N, T)[: N // 2]
x = xf
y = 2.0 / N * np.abs(yf[0 : N // 2])
if self._line is None:
(self._line,) = self.ax.plot(x, y)
else:
self._line.set_data(x, y)
self.canvas.draw()
def main(args):
app = QApplication(args)
plot_widget = PlotWidget()
plot_widget.resize(640, 480)
plot_widget.show()
info = QAudioDeviceInfo()
input_device = info.defaultInputDevice()
if input_device.isNull():
print("There is no audio input device available.")
exit(-1)
audio_format = QAudioFormat()
audio_format.setSampleRate(FS)
audio_format.setSampleSize(8)
audio_format.setChannelCount(1)
audio_format.setCodec("audio/pcm")
audio_format.setSampleType(QAudioFormat.UnSignedInt)
if sys.byteorder == "little":
audio_format.setByteOrder(QAudioFormat.LittleEndian)
else:
audio_format.setByteOrder(QAudioFormat.BigEndian)
audio_input = QAudioInput(input_device, audio_format, None)
audio_device = AudioDevice(interval=100)
audio_device.data_changed.connect(plot_widget.update_data)
audio_device.open(QIODevice.WriteOnly)
audio_input.start(audio_device)
app.exec_()
if __name__ == "__main__":
main(sys.argv)
我添加了显示波形的小部件,以证明样本实际上反映了来自麦克风的信号 - 而不是随机数。
from PyQt5.QtMultimedia import QAudioDeviceInfo, QAudioFormat, QAudioInput
from PyQt5.QtWidgets import QMainWindow, QApplication, QWidget
from PyQt5.QtGui import QPainter, QPolygonF
from PyQt5.QtCore import QPointF
import sys
import numpy
class WaveWidget(QWidget):
def __init__(self, parent = None):
super().__init__(parent)
self._values = None
def setValues(self, values):
self._values = values
self.update()
def paintEvent(self, event):
if self._values is None:
return
painter = QPainter(self)
ys = self._values / 255 * self.height()
xs = numpy.linspace(0, self.width(), num = len(ys))
points = QPolygonF([QPointF(x,y) for x,y in zip(xs,ys)])
painter.drawPolyline(points)
class Window(QMainWindow):
def __init__(self):
super().__init__()
info = QAudioDeviceInfo()
input_device = info.defaultInputDevice()
if input_device.isNull():
# If no avaiable device is found, we display a error
print("There is no audio input device available.")
exit(-1)
audio_format = QAudioFormat()
audio_format.setSampleRate(44100)
audio_format.setSampleSize(8)
audio_format.setChannelCount(1)
audio_format.setCodec("audio/pcm")
audio_format.setSampleType(QAudioFormat.UnSignedInt)
if sys.byteorder == "little":
audio_format.setByteOrder(QAudioFormat.LittleEndian)
else:
audio_format.setByteOrder(QAudioFormat.BigEndian)
self.audioInput = QAudioInput(input_device, audio_format, self)
self.ioDevice = self.audioInput.start()
self.ioDevice.readyRead.connect(self.read_audio)
widget = WaveWidget()
self._widget = widget
self.setCentralWidget(widget)
def read_audio(self):
data = self.ioDevice.readAll()
values = numpy.frombuffer(data.data(), dtype=numpy.uint8)
self._widget.setValues(values)
if __name__ == "__main__":
app = QApplication([])
window = Window()
window.show()
app.exec()