在顺序读取的多个特征文件上训练 Keras 模型以节省内存
Training a Keras model on multiple feature files that are read in sequentially to save memory
我 运行 在尝试读取大量功能文件时遇到内存问题(见下文)。我想我应该拆分培训文件并按顺序阅读它们。 最好的方法是什么?
x_train = np.load(path_features + 'x_train.npy)
y_train = np.load(path_features + 'y_train.npy)
x_test = np.load(path_features + 'x_test.npy)
y_test = np.load(path_features + 'y_test.npy)
path_models = '../pipelines/' + pipeline + '/models/'
# global params
verbose_level = 1
inp_shape = x_train.shape[1:]
# models
if model_type == 'standard_4':
print('Starting to train ' + feature_type + '_' + model_type + '.')
num_classes = 1
dropout_prob = 0.5
activation_function = 'relu'
loss_function = 'binary_crossentropy'
batch_size = 32
epoch_count = 100
opt = SGD(lr=0.001)
model = Sequential()
model.add(Conv2D(filters=16, kernel_size=(3, 3), input_shape=inp_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(filters=32, kernel_size=(3, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64, activation=activation_function))
model.add(Dropout(rate=dropout_prob))
model.add(Dense(32, activation=activation_function))
model.add(Dense(num_classes, activation='sigmoid'))
model.summary()
model.compile(loss=loss_function, optimizer=opt, metrics=['accuracy'])
hist = model.fit(x_train, y_train, batch_size=batch_size, epochs=epoch_count,
verbose=verbose_level,
validation_data=(x_test, y_test))
model.save(path_models + category + '_' + feature_type + '_' + model_type + '.h5')
print('Finished training ' + model_type + '.')
plot_model(hist, path_models, category, feature_type, model_type)
print('Saved model charts.')
您可以使用 python generator
或 keras sequence。
生成器应该无限期地生成您的批次:
def myReader(trainOrTest):
while True:
do something to define path_features
x = np.load(path_features + 'x_' + trainOrTest + '.npy')
y = np.load(path_features + 'y_' + trainOrTest + '.npy')
#if you're loading them already in a shape accepted by your model:
yield (x,y)
然后您可以使用 fit_generator
进行训练并使用 predict_generator
来预测值:
model.fit_generator(myReader(trainOrTest),steps_per_epoch=howManyFiles,epochs=.......)
我 运行 在尝试读取大量功能文件时遇到内存问题(见下文)。我想我应该拆分培训文件并按顺序阅读它们。 最好的方法是什么?
x_train = np.load(path_features + 'x_train.npy)
y_train = np.load(path_features + 'y_train.npy)
x_test = np.load(path_features + 'x_test.npy)
y_test = np.load(path_features + 'y_test.npy)
path_models = '../pipelines/' + pipeline + '/models/'
# global params
verbose_level = 1
inp_shape = x_train.shape[1:]
# models
if model_type == 'standard_4':
print('Starting to train ' + feature_type + '_' + model_type + '.')
num_classes = 1
dropout_prob = 0.5
activation_function = 'relu'
loss_function = 'binary_crossentropy'
batch_size = 32
epoch_count = 100
opt = SGD(lr=0.001)
model = Sequential()
model.add(Conv2D(filters=16, kernel_size=(3, 3), input_shape=inp_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(filters=32, kernel_size=(3, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64, activation=activation_function))
model.add(Dropout(rate=dropout_prob))
model.add(Dense(32, activation=activation_function))
model.add(Dense(num_classes, activation='sigmoid'))
model.summary()
model.compile(loss=loss_function, optimizer=opt, metrics=['accuracy'])
hist = model.fit(x_train, y_train, batch_size=batch_size, epochs=epoch_count,
verbose=verbose_level,
validation_data=(x_test, y_test))
model.save(path_models + category + '_' + feature_type + '_' + model_type + '.h5')
print('Finished training ' + model_type + '.')
plot_model(hist, path_models, category, feature_type, model_type)
print('Saved model charts.')
您可以使用 python generator
或 keras sequence。
生成器应该无限期地生成您的批次:
def myReader(trainOrTest):
while True:
do something to define path_features
x = np.load(path_features + 'x_' + trainOrTest + '.npy')
y = np.load(path_features + 'y_' + trainOrTest + '.npy')
#if you're loading them already in a shape accepted by your model:
yield (x,y)
然后您可以使用 fit_generator
进行训练并使用 predict_generator
来预测值:
model.fit_generator(myReader(trainOrTest),steps_per_epoch=howManyFiles,epochs=.......)