音频数据集 .wav 文件的形状相同
same shape for audio dataset .wav files
所以,我正在创建一种 ANN 神经网络类型,它可以对说话的人是不是我进行分类,问题是我可以根据数据的形状来训练它。
X数据为
(262144,)
y数据为
(261768,)
如何使我的 .wav 音频文件数据具有相同的形状?
这是我的完整代码
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import numpy as np
from scipy.io import wavfile
from pathlib import Path
import os
### DATASET
pathlist = Path(os.path.abspath('Voiceclassification/Data/me/')).rglob('*.wav')
# My voice data
for path in pathlist:
filename = str(path)
# convert audio to numpy array and then 2D to 1D np Array
samplerate, data = wavfile.read(filename)
#print(f"sample rate: {samplerate}")
data = data.flatten()
#print(f"data: {data}")
pathlist2 = Path(os.path.abspath('Voiceclassification/Data/other/')).rglob('*.wav')
# other voice data
for path2 in pathlist2:
filename2 = str(path2)
samplerate2, data2 = wavfile.read(filename2)
data2 = data2.flatten()
#print(data2)
### ADAPTING THE DATA FOR THE MODEL
X = data # My voice
y = data2 # Other data
#print(X.shape)
#print(y.shape)
### Trainig the model
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)
# Performing future scaling
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)
### Creating the ANN
ann = tf.keras.models.Sequential()
# First hidden layer of the ann
ann.add(tf.keras.layers.Dense(units=6, activation="relu"))
# Second one
ann.add(tf.keras.layers.Dense(units=6, activation="relu"))
# Output layer
ann.add(tf.keras.layers.Dense(units=6, activation="sigmoid"))
# Compile our neural network
ann.compile(optimizer="adam",
loss="binary_crossentropy",
metrics=['accuracy'])
# Fit ANN
ann.fit(x_train, y_train, batch_size=32, epochs=100)
ann.save('train_model.model')
知道吗,每个 X 或 y 我总共有 18 个 .wav 文件
您可以使用 scipy.io 用于 wav 文件,重写文件只需 5 秒,我创建的这个小代码可以帮助您
def trim_wav( originalWavPath, newWavPath , start, new ):
sampleRate, waveData = wavfile.read( originalWavPath )
startSample = int( start * sampleRate )
endSample = int( new * sampleRate )
wavfile.write( newWavPath, sampleRate, waveData[startSample:endSample])
wp = "path of the wav file"
trim_wav(wp, wp.replace(".wav", ".wav"), 0,5)
这将裁剪您的音频文件并去除不会改变数据形状的毫秒数
所以,我正在创建一种 ANN 神经网络类型,它可以对说话的人是不是我进行分类,问题是我可以根据数据的形状来训练它。
X数据为
(262144,)
y数据为
(261768,)
如何使我的 .wav 音频文件数据具有相同的形状?
这是我的完整代码
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import numpy as np
from scipy.io import wavfile
from pathlib import Path
import os
### DATASET
pathlist = Path(os.path.abspath('Voiceclassification/Data/me/')).rglob('*.wav')
# My voice data
for path in pathlist:
filename = str(path)
# convert audio to numpy array and then 2D to 1D np Array
samplerate, data = wavfile.read(filename)
#print(f"sample rate: {samplerate}")
data = data.flatten()
#print(f"data: {data}")
pathlist2 = Path(os.path.abspath('Voiceclassification/Data/other/')).rglob('*.wav')
# other voice data
for path2 in pathlist2:
filename2 = str(path2)
samplerate2, data2 = wavfile.read(filename2)
data2 = data2.flatten()
#print(data2)
### ADAPTING THE DATA FOR THE MODEL
X = data # My voice
y = data2 # Other data
#print(X.shape)
#print(y.shape)
### Trainig the model
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)
# Performing future scaling
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)
### Creating the ANN
ann = tf.keras.models.Sequential()
# First hidden layer of the ann
ann.add(tf.keras.layers.Dense(units=6, activation="relu"))
# Second one
ann.add(tf.keras.layers.Dense(units=6, activation="relu"))
# Output layer
ann.add(tf.keras.layers.Dense(units=6, activation="sigmoid"))
# Compile our neural network
ann.compile(optimizer="adam",
loss="binary_crossentropy",
metrics=['accuracy'])
# Fit ANN
ann.fit(x_train, y_train, batch_size=32, epochs=100)
ann.save('train_model.model')
知道吗,每个 X 或 y 我总共有 18 个 .wav 文件
您可以使用 scipy.io 用于 wav 文件,重写文件只需 5 秒,我创建的这个小代码可以帮助您
def trim_wav( originalWavPath, newWavPath , start, new ):
sampleRate, waveData = wavfile.read( originalWavPath )
startSample = int( start * sampleRate )
endSample = int( new * sampleRate )
wavfile.write( newWavPath, sampleRate, waveData[startSample:endSample])
wp = "path of the wav file"
trim_wav(wp, wp.replace(".wav", ".wav"), 0,5)
这将裁剪您的音频文件并去除不会改变数据形状的毫秒数