Keras multi_gpu_model 导致系统崩溃
Keras multi_gpu_model causes system to crash
我正在尝试在大型数据集上训练相当大的 LSTM,并有 4 个 GPU 来分配负载。如果我尝试只训练其中一个(其中任何一个,我都尝试过)它可以正常运行,但是在添加 multi_gpu_model 代码后,当我尝试 运行 它时它会崩溃我的整个系统.
这是我的多 GPU 代码
batch_size = 8
model = Sequential()
model.add(Masking(mask_value=0., input_shape=(len(inputData[0]), len(inputData[0][0])) ))
model.add(LSTM(256, return_sequences=True))
model.add(Dropout(.2))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(.2))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(.2))
model.add(Dense(len(outputData[0][0]), activation='softmax'))
rms = RMSprop()
p_model = multi_gpu_model(model, gpus=4)
p_model.compile(loss='categorical_crossentropy',optimizer=rms, metrics=['categorical_accuracy'])
print("Fitting")
p_model.fit_generator(songBatchGenerator(songList,batch_size), epochs=250, verbose=1, shuffle=False, steps_per_epoch=math.ceil(len(songList)/batch_size))
pickleSave('kerasTrained.pickle', parallel_model)
print("Saved")
将其更改为
batch_size = 8
model = Sequential()
model.add(Masking(mask_value=0., input_shape=(len(inputData[0]), len(inputData[0][0])) ))
model.add(LSTM(256, return_sequences=True))
model.add(Dropout(.2))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(.2))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(.2))
model.add(Dense(len(outputData[0][0]), activation='softmax'))
rms = RMSprop()
model.compile(loss='categorical_crossentropy',optimizer=rms, metrics=['categorical_accuracy'])
print("Fitting")
model.fit_generator(songBatchGenerator(songList,batch_size), epochs=250, verbose=1, shuffle=False, steps_per_epoch=math.ceil(len(songList)/batch_size))
pickleSave('kerasTrained.pickle', parallel_model)
print("Saved")
功能完美
3 个 GPU 是 Nvidia 1060 3GB,1 个是 6GB,系统有大约 4GB 的内存(尽管我怀疑这是问题,因为我使用的是生成器)。
Keras 使用所有 4 个 GPU 计算,代码编译可以使用 CPU。你可以试试下面的代码。有关更多信息,请查看 tensorflow 网站 link https://www.tensorflow.org/api_docs/python/tf/keras/utils/multi_gpu_model
def create_model():
batch_size = 8
model = Sequential()
model.add(Masking(mask_value=0., input_shape=(len(inputData[0]), len(inputData[0][0])) ))
model.add(LSTM(256, return_sequences=True))
model.add(Dropout(.2))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(.2))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(.2))
model.add(Dense(len(outputData[0][0]), activation='softmax'))
return model
# we'll store a copy of the model on *every* GPU and then combine
# the results from the gradient updates on the CPU
# initialize the model
with tf.device("/cpu:0"):
model = create_model()
# make the model parallel
p_model = multi_gpu_model(model, gpus=4)
rms = RMSprop()
p_model.compile(loss='categorical_crossentropy',optimizer=rms, metrics=['categorical_accuracy'])
print("Fitting")
p_model.fit_generator(songBatchGenerator(songList,batch_size), epochs=250, verbose=1, shuffle=False, steps_per_epoch=math.ceil(len(songList)/batch_size))
pickleSave('kerasTrained.pickle', parallel_model)
print("Saved")
我正在尝试在大型数据集上训练相当大的 LSTM,并有 4 个 GPU 来分配负载。如果我尝试只训练其中一个(其中任何一个,我都尝试过)它可以正常运行,但是在添加 multi_gpu_model 代码后,当我尝试 运行 它时它会崩溃我的整个系统. 这是我的多 GPU 代码
batch_size = 8
model = Sequential()
model.add(Masking(mask_value=0., input_shape=(len(inputData[0]), len(inputData[0][0])) ))
model.add(LSTM(256, return_sequences=True))
model.add(Dropout(.2))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(.2))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(.2))
model.add(Dense(len(outputData[0][0]), activation='softmax'))
rms = RMSprop()
p_model = multi_gpu_model(model, gpus=4)
p_model.compile(loss='categorical_crossentropy',optimizer=rms, metrics=['categorical_accuracy'])
print("Fitting")
p_model.fit_generator(songBatchGenerator(songList,batch_size), epochs=250, verbose=1, shuffle=False, steps_per_epoch=math.ceil(len(songList)/batch_size))
pickleSave('kerasTrained.pickle', parallel_model)
print("Saved")
将其更改为
batch_size = 8
model = Sequential()
model.add(Masking(mask_value=0., input_shape=(len(inputData[0]), len(inputData[0][0])) ))
model.add(LSTM(256, return_sequences=True))
model.add(Dropout(.2))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(.2))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(.2))
model.add(Dense(len(outputData[0][0]), activation='softmax'))
rms = RMSprop()
model.compile(loss='categorical_crossentropy',optimizer=rms, metrics=['categorical_accuracy'])
print("Fitting")
model.fit_generator(songBatchGenerator(songList,batch_size), epochs=250, verbose=1, shuffle=False, steps_per_epoch=math.ceil(len(songList)/batch_size))
pickleSave('kerasTrained.pickle', parallel_model)
print("Saved")
功能完美
3 个 GPU 是 Nvidia 1060 3GB,1 个是 6GB,系统有大约 4GB 的内存(尽管我怀疑这是问题,因为我使用的是生成器)。
Keras 使用所有 4 个 GPU 计算,代码编译可以使用 CPU。你可以试试下面的代码。有关更多信息,请查看 tensorflow 网站 link https://www.tensorflow.org/api_docs/python/tf/keras/utils/multi_gpu_model
def create_model():
batch_size = 8
model = Sequential()
model.add(Masking(mask_value=0., input_shape=(len(inputData[0]), len(inputData[0][0])) ))
model.add(LSTM(256, return_sequences=True))
model.add(Dropout(.2))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(.2))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(.2))
model.add(Dense(len(outputData[0][0]), activation='softmax'))
return model
# we'll store a copy of the model on *every* GPU and then combine
# the results from the gradient updates on the CPU
# initialize the model
with tf.device("/cpu:0"):
model = create_model()
# make the model parallel
p_model = multi_gpu_model(model, gpus=4)
rms = RMSprop()
p_model.compile(loss='categorical_crossentropy',optimizer=rms, metrics=['categorical_accuracy'])
print("Fitting")
p_model.fit_generator(songBatchGenerator(songList,batch_size), epochs=250, verbose=1, shuffle=False, steps_per_epoch=math.ceil(len(songList)/batch_size))
pickleSave('kerasTrained.pickle', parallel_model)
print("Saved")