无法将大小为 486 的数组重塑为形状 (1,1)
cannot reshape array of size 486 into shape (1,1)
我创建了一个通过说话来预测情绪的模型!当我尝试获取语音功能时出现错误
cannot reshape array of size 486 into shape (1,1)
我尝试了不同的重塑但没有任何效果!如果我更改 (1, -1) 中的重塑,我会遇到另一个错误
ValueError: Input 0 of layer "sequential" is incompatible with the layer: expected shape=(None, 162, 1), found shape=(None, 486)
这是我的模型:
# scaling our data with sklearn's Standard scaler
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
x_train.shape, y_train.shape, x_test.shape, y_test.shape
# making our data compatible to model.
x_train = np.expand_dims(x_train, axis=2)
x_test = np.expand_dims(x_test, axis=2)
x_train.shape, y_train.shape, x_test.shape, y_test.shape
model=Sequential()
model.add(Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu', input_shape=(x_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Conv1D(128, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Dropout(0.2))
model.add(Conv1D(64, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Flatten())
model.add(Dense(units=32, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(units=8, activation='softmax'))
model.compile(optimizer = 'adam' , loss = 'categorical_crossentropy' , metrics = ['accuracy'])
model.summary()
rlrp = ReduceLROnPlateau(monitor='loss', factor=0.4, verbose=0, patience=100, min_lr=0.0000001)
history=model.fit(x_train, y_train, batch_size=23, epochs=50, validation_data=(x_test, y_test), callbacks=[rlrp])
这是提取特征函数:
def extract_features(data, **kwargs):
# ZCR
result = np.array([])
zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0)
result=np.hstack((result, zcr)) # stacking horizontally
# Chroma_stft
stft = np.abs(librosa.stft(data))
chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
result = np.hstack((result, chroma_stft)) # stacking horizontally
# MFCC
mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate).T, axis=0)
result = np.hstack((result, mfcc)) # stacking horizontally
# Root Mean Square Value
rms = np.mean(librosa.feature.rms(y=data).T, axis=0)
result = np.hstack((result, rms)) # stacking horizontally
# MelSpectogram
mel = np.mean(librosa.feature.melspectrogram(y=data, sr=sample_rate).T, axis=0)
result = np.hstack((result, mel)) # stacking horizontally
return result
def get_features(path):
# duration and offset are used to take care of the no audio in start and the ending of each audio files as seen above.
data, sample_rate = librosa.load(path, duration=2.5, offset=0.6)
# without augmentation
res1 = extract_features(data)
result = np.array(res1)
# data with noise
noise_data = noise(data)
res2 = extract_features(noise_data)
result = np.vstack((result, res2)) # stacking vertically
# data with stretching and pitching
new_data = stretch(data)
data_stretch_pitch = pitch(new_data, sample_rate)
res3 = extract_features(data_stretch_pitch)
result = np.vstack((result, res3)) # stacking vertically
return result
这里是我遇到错误的主要地方:
if __name__ == "__main__":
# load the saved model (after training)
print("Please talk")
filename = "test.wav"
# record the file (start talking)
record_to_file(filename)
# extract features and reshape it
features =get_features(filename).reshape(1, -1)
# predict
result = model.predict(features)[0]
# show the result !
print("result:", result)
对这个错误有什么想法吗?
IIUC,你的错误来自 features
的形状,也许这对你有帮助。
例如,您 features
如下所示:
features = np.random.rand(1, 486)
# features.shape
# (1, 486)
然后你需要将这个 features
拆分为 three
部分:
features = np.array_split(features, 3, axis=1)
features_0 = features[0] # shape : (1, 162)
features_1 = features[1] # shape : (1, 162)
features_2 = features[2] # shape : (1, 162)
然后 expand_dims 并进行如下预测:
features_0 = np.expand_dims(features_0, axis=2)
result = model.predict(features_0)[0]
我创建了一个通过说话来预测情绪的模型!当我尝试获取语音功能时出现错误
cannot reshape array of size 486 into shape (1,1)
我尝试了不同的重塑但没有任何效果!如果我更改 (1, -1) 中的重塑,我会遇到另一个错误
ValueError: Input 0 of layer "sequential" is incompatible with the layer: expected shape=(None, 162, 1), found shape=(None, 486)
这是我的模型:
# scaling our data with sklearn's Standard scaler
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
x_train.shape, y_train.shape, x_test.shape, y_test.shape
# making our data compatible to model.
x_train = np.expand_dims(x_train, axis=2)
x_test = np.expand_dims(x_test, axis=2)
x_train.shape, y_train.shape, x_test.shape, y_test.shape
model=Sequential()
model.add(Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu', input_shape=(x_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Conv1D(128, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Dropout(0.2))
model.add(Conv1D(64, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Flatten())
model.add(Dense(units=32, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(units=8, activation='softmax'))
model.compile(optimizer = 'adam' , loss = 'categorical_crossentropy' , metrics = ['accuracy'])
model.summary()
rlrp = ReduceLROnPlateau(monitor='loss', factor=0.4, verbose=0, patience=100, min_lr=0.0000001)
history=model.fit(x_train, y_train, batch_size=23, epochs=50, validation_data=(x_test, y_test), callbacks=[rlrp])
这是提取特征函数:
def extract_features(data, **kwargs):
# ZCR
result = np.array([])
zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0)
result=np.hstack((result, zcr)) # stacking horizontally
# Chroma_stft
stft = np.abs(librosa.stft(data))
chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
result = np.hstack((result, chroma_stft)) # stacking horizontally
# MFCC
mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate).T, axis=0)
result = np.hstack((result, mfcc)) # stacking horizontally
# Root Mean Square Value
rms = np.mean(librosa.feature.rms(y=data).T, axis=0)
result = np.hstack((result, rms)) # stacking horizontally
# MelSpectogram
mel = np.mean(librosa.feature.melspectrogram(y=data, sr=sample_rate).T, axis=0)
result = np.hstack((result, mel)) # stacking horizontally
return result
def get_features(path):
# duration and offset are used to take care of the no audio in start and the ending of each audio files as seen above.
data, sample_rate = librosa.load(path, duration=2.5, offset=0.6)
# without augmentation
res1 = extract_features(data)
result = np.array(res1)
# data with noise
noise_data = noise(data)
res2 = extract_features(noise_data)
result = np.vstack((result, res2)) # stacking vertically
# data with stretching and pitching
new_data = stretch(data)
data_stretch_pitch = pitch(new_data, sample_rate)
res3 = extract_features(data_stretch_pitch)
result = np.vstack((result, res3)) # stacking vertically
return result
这里是我遇到错误的主要地方:
if __name__ == "__main__":
# load the saved model (after training)
print("Please talk")
filename = "test.wav"
# record the file (start talking)
record_to_file(filename)
# extract features and reshape it
features =get_features(filename).reshape(1, -1)
# predict
result = model.predict(features)[0]
# show the result !
print("result:", result)
对这个错误有什么想法吗?
IIUC,你的错误来自 features
的形状,也许这对你有帮助。
例如,您 features
如下所示:
features = np.random.rand(1, 486)
# features.shape
# (1, 486)
然后你需要将这个 features
拆分为 three
部分:
features = np.array_split(features, 3, axis=1)
features_0 = features[0] # shape : (1, 162)
features_1 = features[1] # shape : (1, 162)
features_2 = features[2] # shape : (1, 162)
然后 expand_dims 并进行如下预测:
features_0 = np.expand_dims(features_0, axis=2)
result = model.predict(features_0)[0]