keras:如何保存历史对象的训练历史属性
keras: how to save the training history attribute of the history object
在 Keras 中,我们可以 return 将 model.fit
的输出输出到历史记录中,如下所示:
history = model.fit(X_train, y_train,
batch_size=batch_size,
nb_epoch=nb_epoch,
validation_data=(X_test, y_test))
现在,如何将 history 对象的 history 属性保存到文件中以供进一步使用(例如绘制 acc 或 loss 对纪元的图)?
一个history
对象有一个history
字段是一个字典,它包含跨越每个训练时期的不同训练指标。所以例如history.history['loss'][99]
将 return 在第 100 个训练周期中丢失您的模型。为了保存,您可以 pickle
这本词典或简单地将本词典中的不同列表保存到适当的文件。
我用的是下面的:
with open('/trainHistoryDict', 'wb') as file_pi:
pickle.dump(history.history, file_pi)
通过这种方式,我将历史保存为字典,以备日后绘制损失或准确性时使用。
model
历史可以保存到文件如下
import json
hist = model.fit(X_train, y_train, epochs=5, batch_size=batch_size,validation_split=0.1)
with open('file.json', 'w') as f:
json.dump(hist.history, f)
我遇到了一个问题,即 keras 列表中的值不是 json 可序列化的。因此我写了这两个方便的函数供我使用。
import json,codecs
import numpy as np
def saveHist(path,history):
new_hist = {}
for key in list(history.history.keys()):
new_hist[key]=history.history[key]
if type(history.history[key]) == np.ndarray:
new_hist[key] = history.history[key].tolist()
elif type(history.history[key]) == list:
if type(history.history[key][0]) == np.float64:
new_hist[key] = list(map(float, history.history[key]))
print(new_hist)
with codecs.open(path, 'w', encoding='utf-8') as file:
json.dump(new_hist, file, separators=(',', ':'), sort_keys=True, indent=4)
def loadHist(path):
with codecs.open(path, 'r', encoding='utf-8') as file:
n = json.loads(file.read())
return n
其中saveHist只需要获取json文件的保存路径,以及kerasfit
或fit_generator
方法返回的history对象
我敢肯定有很多方法可以做到这一点,但我四处摸索,想出了一个我自己的版本。
首先,自定义回调可以在每个纪元结束时抓取和更新历史记录。在那里我还有一个回调来保存模型。这两个都很方便,因为如果你崩溃或关机,你可以在最后完成的 epoch 上进行训练。
class LossHistory(Callback):
#
def on_epoch_end(self, epoch, logs = None):
new_history = {}
for k, v in logs.items(): # compile new history from logs
new_history[k] = [v] # convert values into lists
current_history = loadHist(history_filename) # load history from current training
current_history = appendHist(current_history, new_history) # append the logs
saveHist(history_filename, current_history) # save history from current training
model_checkpoint = ModelCheckpoint(model_filename, verbose = 0, period = 1)
history_checkpoint = LossHistory()
callbacks_list = [model_checkpoint, history_checkpoint]
其次,这里有一些 'helper' 功能可以完全按照他们所说的去做。这些都是从 LossHistory()
回调中调用的。
#
import json, codecs
def saveHist(path, history):
with codecs.open(path, 'w', encoding='utf-8') as f:
json.dump(history, f, separators=(',', ':'), sort_keys=True, indent=4)
def loadHist(path):
n = {} # set history to empty
if os.path.exists(path): # reload history if it exists
with codecs.open(path, 'r', encoding='utf-8') as f:
n = json.loads(f.read())
return n
def appendHist(h1, h2):
if h1 == {}:
return h2
else:
dest = {}
for key, value in h1.items():
dest[key] = value + h2[key]
return dest
之后,您只需将 history_filename
设置为 data/model-history.json
,并将 model_filename
设置为 data/model.h5
。假设您停止并开始并坚持回调,确保在训练结束时不会弄乱您的历史记录的最后一项调整是:
new_history = model.fit(X_train, y_train,
batch_size = batch_size,
nb_epoch = nb_epoch,
validation_data=(X_test, y_test),
callbacks=callbacks_list)
history = appendHist(history, new_history.history)
只要您需要,history = loadHist(history_filename)
就会恢复您的历史记录。
funkiness 来自 json 和列表,但我无法在不通过迭代转换它的情况下让它工作。无论如何,我知道这行得通,因为我已经研究了好几天了。 的 pickle.dump
答案可能更好,但我不知道那是什么。如果我在这里遗漏了什么或者你不能让它工作,请告诉我。
另一种方法:
由于 history.history
是一个 dict
,您也可以将其转换为 pandas
DataFrame
对象,然后可以将其保存以满足您的需要。
一步一步:
import pandas as pd
# assuming you stored your model.fit results in a 'history' variable:
history = model.fit(x_train, y_train, epochs=10)
# convert the history.history dict to a pandas DataFrame:
hist_df = pd.DataFrame(history.history)
# save to json:
hist_json_file = 'history.json'
with open(hist_json_file, mode='w') as f:
hist_df.to_json(f)
# or save to csv:
hist_csv_file = 'history.csv'
with open(hist_csv_file, mode='w') as f:
hist_df.to_csv(f)
最简单的方法:
节省:
np.save('my_history.npy',history.history)
正在加载:
history=np.load('my_history.npy',allow_pickle='TRUE').item()
然后历史是一个字典,你可以使用键检索所有需要的值。
以上答案在训练过程结束时保存历史记录时很有用。如果您想在训练期间保存历史记录,CSVLogger 回调会有所帮助。
下面的代码以数据表文件的形式保存模型权重和历史训练log.csv.
model_cb = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path)
history_cb = tf.keras.callbacks.CSVLogger('./log.csv', separator=",", append=False)
history = model.fit(callbacks=[model_cb, history_cb])
您可以在 .txt 格式
中保存 tf.keras.callbacks.History
的历史记录属性
with open("./result_model.txt",'w') as f:
for k in history.history.keys():
print(k,file=f)
for i in history.history[k]:
print(i,file=f)
这是一个将日志腌制到文件中的回调。实例化回调obj时提供模型文件路径;这将创建一个关联文件 - 给定模型路径“/home/user/model.h5”,腌制路径“/home/user/model_history_pickle”。重新加载模型后,回调将从它停止的纪元继续。
import os
import re
import pickle
#
from tensorflow.keras.callbacks import Callback
from tensorflow.keras import backend as K
class PickleHistoryCallback(Callback):
def __init__(self, path_file_model, *args, **kwargs):
super().__init__(*args, **kwargs)
self.__path_file_model = path_file_model
#
self.__path_file_history_pickle = None
self.__history = {}
self.__epoch = 0
#
self.__setup()
#
def __setup(self):
self.__path_file_history_pickle = re.sub(r'\.[^\.]*$', '_history_pickle', self.__path_file_model)
#
if (os.path.isfile(self.__path_file_history_pickle)):
with open(self.__path_file_history_pickle, 'rb') as fd:
self.__history = pickle.load(fd)
# Start from last epoch
self.__epoch = self.__history['e'][-1]
#
else:
print("Pickled history file unavailable; the following pickled history file creation will occur after the first training epoch:\n\t{}".format(
self.__path_file_history_pickle))
#
def __update_history_file(self):
with open(self.__path_file_history_pickle, 'wb') as fd:
pickle.dump(self.__history, fd)
#
def on_epoch_end(self, epoch, logs=None):
self.__epoch += 1
logs = logs or {}
#
logs['e'] = self.__epoch
logs['lr'] = K.get_value(self.model.optimizer.lr)
#
for k, v in logs.items():
self.__history.setdefault(k, []).append(v)
#
self.__update_history_file()
在 Keras 中,我们可以 return 将 model.fit
的输出输出到历史记录中,如下所示:
history = model.fit(X_train, y_train,
batch_size=batch_size,
nb_epoch=nb_epoch,
validation_data=(X_test, y_test))
现在,如何将 history 对象的 history 属性保存到文件中以供进一步使用(例如绘制 acc 或 loss 对纪元的图)?
一个history
对象有一个history
字段是一个字典,它包含跨越每个训练时期的不同训练指标。所以例如history.history['loss'][99]
将 return 在第 100 个训练周期中丢失您的模型。为了保存,您可以 pickle
这本词典或简单地将本词典中的不同列表保存到适当的文件。
我用的是下面的:
with open('/trainHistoryDict', 'wb') as file_pi:
pickle.dump(history.history, file_pi)
通过这种方式,我将历史保存为字典,以备日后绘制损失或准确性时使用。
model
历史可以保存到文件如下
import json
hist = model.fit(X_train, y_train, epochs=5, batch_size=batch_size,validation_split=0.1)
with open('file.json', 'w') as f:
json.dump(hist.history, f)
我遇到了一个问题,即 keras 列表中的值不是 json 可序列化的。因此我写了这两个方便的函数供我使用。
import json,codecs
import numpy as np
def saveHist(path,history):
new_hist = {}
for key in list(history.history.keys()):
new_hist[key]=history.history[key]
if type(history.history[key]) == np.ndarray:
new_hist[key] = history.history[key].tolist()
elif type(history.history[key]) == list:
if type(history.history[key][0]) == np.float64:
new_hist[key] = list(map(float, history.history[key]))
print(new_hist)
with codecs.open(path, 'w', encoding='utf-8') as file:
json.dump(new_hist, file, separators=(',', ':'), sort_keys=True, indent=4)
def loadHist(path):
with codecs.open(path, 'r', encoding='utf-8') as file:
n = json.loads(file.read())
return n
其中saveHist只需要获取json文件的保存路径,以及kerasfit
或fit_generator
方法返回的history对象
我敢肯定有很多方法可以做到这一点,但我四处摸索,想出了一个我自己的版本。
首先,自定义回调可以在每个纪元结束时抓取和更新历史记录。在那里我还有一个回调来保存模型。这两个都很方便,因为如果你崩溃或关机,你可以在最后完成的 epoch 上进行训练。
class LossHistory(Callback):
#
def on_epoch_end(self, epoch, logs = None):
new_history = {}
for k, v in logs.items(): # compile new history from logs
new_history[k] = [v] # convert values into lists
current_history = loadHist(history_filename) # load history from current training
current_history = appendHist(current_history, new_history) # append the logs
saveHist(history_filename, current_history) # save history from current training
model_checkpoint = ModelCheckpoint(model_filename, verbose = 0, period = 1)
history_checkpoint = LossHistory()
callbacks_list = [model_checkpoint, history_checkpoint]
其次,这里有一些 'helper' 功能可以完全按照他们所说的去做。这些都是从 LossHistory()
回调中调用的。
#
import json, codecs
def saveHist(path, history):
with codecs.open(path, 'w', encoding='utf-8') as f:
json.dump(history, f, separators=(',', ':'), sort_keys=True, indent=4)
def loadHist(path):
n = {} # set history to empty
if os.path.exists(path): # reload history if it exists
with codecs.open(path, 'r', encoding='utf-8') as f:
n = json.loads(f.read())
return n
def appendHist(h1, h2):
if h1 == {}:
return h2
else:
dest = {}
for key, value in h1.items():
dest[key] = value + h2[key]
return dest
之后,您只需将 history_filename
设置为 data/model-history.json
,并将 model_filename
设置为 data/model.h5
。假设您停止并开始并坚持回调,确保在训练结束时不会弄乱您的历史记录的最后一项调整是:
new_history = model.fit(X_train, y_train,
batch_size = batch_size,
nb_epoch = nb_epoch,
validation_data=(X_test, y_test),
callbacks=callbacks_list)
history = appendHist(history, new_history.history)
只要您需要,history = loadHist(history_filename)
就会恢复您的历史记录。
funkiness 来自 json 和列表,但我无法在不通过迭代转换它的情况下让它工作。无论如何,我知道这行得通,因为我已经研究了好几天了。 pickle.dump
答案可能更好,但我不知道那是什么。如果我在这里遗漏了什么或者你不能让它工作,请告诉我。
另一种方法:
由于 history.history
是一个 dict
,您也可以将其转换为 pandas
DataFrame
对象,然后可以将其保存以满足您的需要。
一步一步:
import pandas as pd
# assuming you stored your model.fit results in a 'history' variable:
history = model.fit(x_train, y_train, epochs=10)
# convert the history.history dict to a pandas DataFrame:
hist_df = pd.DataFrame(history.history)
# save to json:
hist_json_file = 'history.json'
with open(hist_json_file, mode='w') as f:
hist_df.to_json(f)
# or save to csv:
hist_csv_file = 'history.csv'
with open(hist_csv_file, mode='w') as f:
hist_df.to_csv(f)
最简单的方法:
节省:
np.save('my_history.npy',history.history)
正在加载:
history=np.load('my_history.npy',allow_pickle='TRUE').item()
然后历史是一个字典,你可以使用键检索所有需要的值。
以上答案在训练过程结束时保存历史记录时很有用。如果您想在训练期间保存历史记录,CSVLogger 回调会有所帮助。
下面的代码以数据表文件的形式保存模型权重和历史训练log.csv.
model_cb = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path)
history_cb = tf.keras.callbacks.CSVLogger('./log.csv', separator=",", append=False)
history = model.fit(callbacks=[model_cb, history_cb])
您可以在 .txt 格式
中保存tf.keras.callbacks.History
的历史记录属性
with open("./result_model.txt",'w') as f:
for k in history.history.keys():
print(k,file=f)
for i in history.history[k]:
print(i,file=f)
这是一个将日志腌制到文件中的回调。实例化回调obj时提供模型文件路径;这将创建一个关联文件 - 给定模型路径“/home/user/model.h5”,腌制路径“/home/user/model_history_pickle”。重新加载模型后,回调将从它停止的纪元继续。
import os
import re
import pickle
#
from tensorflow.keras.callbacks import Callback
from tensorflow.keras import backend as K
class PickleHistoryCallback(Callback):
def __init__(self, path_file_model, *args, **kwargs):
super().__init__(*args, **kwargs)
self.__path_file_model = path_file_model
#
self.__path_file_history_pickle = None
self.__history = {}
self.__epoch = 0
#
self.__setup()
#
def __setup(self):
self.__path_file_history_pickle = re.sub(r'\.[^\.]*$', '_history_pickle', self.__path_file_model)
#
if (os.path.isfile(self.__path_file_history_pickle)):
with open(self.__path_file_history_pickle, 'rb') as fd:
self.__history = pickle.load(fd)
# Start from last epoch
self.__epoch = self.__history['e'][-1]
#
else:
print("Pickled history file unavailable; the following pickled history file creation will occur after the first training epoch:\n\t{}".format(
self.__path_file_history_pickle))
#
def __update_history_file(self):
with open(self.__path_file_history_pickle, 'wb') as fd:
pickle.dump(self.__history, fd)
#
def on_epoch_end(self, epoch, logs=None):
self.__epoch += 1
logs = logs or {}
#
logs['e'] = self.__epoch
logs['lr'] = K.get_value(self.model.optimizer.lr)
#
for k, v in logs.items():
self.__history.setdefault(k, []).append(v)
#
self.__update_history_file()