为什么我的结果仍然不可重现?
Why are my results still not reproducible?
我想为 CNN 获得可重现的结果。我在 GPU 上使用 Keras 和 Google Colab。
除了建议插入某些代码片段(这应该允许重现性)之外,我还向图层添加了种子。
###### This is the first code snipped to run #####
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
###### This is the second code snipped to run #####
from __future__ import print_function
import numpy as np
import tensorflow as tf
print(tf.test.gpu_device_name())
import random as rn
import os
os.environ['PYTHONASHSEED'] = '0'
np.random.seed(1)
rn.seed(1)
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
###### This is the third code snipped to run #####
from keras import backend as K
tf.set_random_seed(1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)
###### This is the fourth code snipped to run #####
def model_cnn():
model = Sequential()
model.add(Conv2D(32, kernel_size=(3,3), kernel_initializer=initializers.glorot_uniform(seed=1), input_shape=(28,28,1)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(32, kernel_size=(3,3), kernel_initializer=initializers.glorot_uniform(seed=2)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25, seed=1))
model.add(Flatten())
model.add(Dense(512, kernel_initializer=initializers.glorot_uniform(seed=2)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.5, seed=1))
model.add(Dense(10, kernel_initializer=initializers.glorot_uniform(seed=2)))
model.add(Activation('softmax'))
model.compile(loss="categorical_crossentropy", optimizer=Adam(lr=0.001), metrics=['accuracy'])
return model
def split_data(X,y):
X_train_val, X_val, y_train_val, y_val = train_test_split(X, y, random_state=42, test_size=1/5, stratify=y)
return(X_train_val, X_val, y_train_val, y_val)
def train_model_with_EarlyStopping(model, X, y):
# make train and validation data
X_tr, X_val, y_tr, y_val = split_data(X,y)
es = EarlyStopping(monitor='val_loss', patience=20, mode='min', restore_best_weights=True)
history = model.fit(X_tr, y_tr,
batch_size=64,
epochs=200,
verbose=1,
validation_data=(X_val,y_val),
callbacks=[es])
return history
###### This is the fifth code snipped to run #####
train_model_with_EarlyStopping(model_cnn(), X, y)
总是我 运行 上面的代码我得到不同的结果。
原因是否出在代码中,或者根本不可能在 Google 具有 GPU 支持的 Colab 中获得可重现的结果?
完整代码(代码中有一些不需要的部分,比如没有用到的库):
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
from __future__ import print_function # NEU
import numpy as np
import tensorflow as tf
import random as rn
import os
os.environ['PYTHONASHSEED'] = '0'
np.random.seed(1)
rn.seed(1)
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
from keras import backend as K
tf.set_random_seed(1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)
import os
local_root_path = os.path.expanduser("~/data/data")
print(local_root_path)
try:
os.makedirs(local_root_path, exist_ok=True)
except: pass
def ListFolder(google_drive_id, destination):
file_list = drive.ListFile({'q': "'%s' in parents and trashed=false" % google_drive_id}).GetList()
counter = 0
for f in file_list:
# If it is a directory then, create the dicrectory and upload the file inside it
if f['mimeType']=='application/vnd.google-apps.folder':
folder_path = os.path.join(destination, f['title'])
os.makedirs(folder_path, exist_ok=True)
print('creating directory {}'.format(folder_path))
ListFolder(f['id'], folder_path)
else:
fname = os.path.join(destination, f['title'])
f_ = drive.CreateFile({'id': f['id']})
f_.GetContentFile(fname)
counter += 1
print('{} files were uploaded in {}'.format(counter, destination))
ListFolder("1DyM_D2ZJ5UHIXmXq4uHzKqXSkLTH-lSo", local_root_path)
import glob
import h5py
from time import time
from keras import initializers
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, model_from_json
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization, merge
from keras.layers import Convolution2D, MaxPooling2D, AveragePooling2D
from keras.optimizers import SGD, Adam, RMSprop, Adagrad, Adadelta, Adamax, Nadam
from keras.utils import np_utils
from keras.callbacks import LearningRateScheduler, ModelCheckpoint, TensorBoard, ReduceLROnPlateau
from keras.regularizers import l2
from keras.layers.advanced_activations import LeakyReLU, ELU
from keras import backend as K
import numpy as np
import pickle as pkl
from matplotlib import pyplot as plt
%matplotlib inline
import gzip
import numpy as np
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten
from keras.datasets import fashion_mnist
from numpy import mean, std
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold, StratifiedKFold
from keras.datasets import fashion_mnist
from keras.utils import to_categorical
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten
from keras.optimizers import SGD, Adam
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import auc, average_precision_score, f1_score
import time
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from google.colab import files
from PIL import Image
def model_cnn():
model = Sequential()
model.add(Conv2D(32, kernel_size=(3,3), kernel_initializer=initializers.glorot_uniform(seed=1), input_shape=(28,28,1)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(32, kernel_size=(3,3), kernel_initializer=initializers.glorot_uniform(seed=2)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25, seed=1))
model.add(Flatten())
model.add(Dense(512, kernel_initializer=initializers.glorot_uniform(seed=2)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.5, seed=1))
model.add(Dense(10, kernel_initializer=initializers.glorot_uniform(seed=2)))
model.add(Activation('softmax'))
model.compile(loss="categorical_crossentropy", optimizer=Adam(lr=0.001), metrics=['accuracy'])
return model
def train_model_with_EarlyStopping(model, X, y):
X_tr, X_val, y_tr, y_val = split_train_val_data(X,y)
es = EarlyStopping(monitor='val_loss', patience=20, mode='min', restore_best_weights=True)
history = model.fit(X_tr, y_tr,
batch_size=64,
epochs=200,
verbose=1,
validation_data=(X_val,y_val),
callbacks=[es])
evaluate_model(model, history, X_tr, y_tr)
return history
```
该问题不仅限于 Colab,而且可以在本地重现。然而,这种行为可能是不可避免的。
底部的代码是代码的最低限度可重现版本,调整了合适的参数以加快测试速度。我观察到的是,对于 468 次迭代 每 运行,跨越 5 运行 秒,损失的最大差异仅为 0.0144%。这很好。使用 batch_size=64
、60000
个样本和 20
个时期,您将进行 18750 次迭代 - 这将大大放大这个数字。
无论如何,GPU 并行性 是 最有可能驱动随机数的罪魁祸首 - 微小的差异 do 随着时间的推移会累积,从而产生实质性的结果区别-下面的演示。如果 1e-8
看起来很小,请尝试将随机噪声添加到您的权重的一半 w/ magnitude clipped at 1e-8
,并见证其生活哲学的变化。
如果您不使用它们,种子的作用将变得非常明显 - 尝试一下,您的所有指标将在前 10 次迭代中飞速发展。此外,loss 更适合测量 运行 时间差异,因为准确度对数字精度误差更加敏感:10 个样本的 60% 准确度和 70% 准确度之间的差异batch 是相差 0.000001
w.r.t 的预测。 0.5
- 但损失几乎不会改变。
最后,请注意您的 超参数选择 对模型性能的影响远大于随机性;不管你撒了多少种子,他们都不会把模型变成 SOTA。 -- 我推荐这个 fine clip.
您的代码 - 没问题。您已采取所有实际步骤来确保可重复性,但有一个例外:PYTHONHASHSEED
必须在 之前设置 您的 Python 内核启动。
你能做些什么来减少随机性?
重复运行秒,平均结果。可以理解这很昂贵,但请注意,即使是完全可重现的 运行 也不是完全 提供信息的 ,因为 模型方差 w.r.t .训练集和验证集可能比噪声引起的随机性大得多
K 折交叉验证:可以显着减轻数据和噪声方差
更大的验证集:由于噪声,提取的特征只能相差这么多;验证集越大,权重中的小扰动应反映在指标中越少
GPU 并行性:放大浮点误差
print(2. * 11. / 9.) # 2.4444444444444446
print(2. / 9. * 11.) # 2.444444444444444
操作顺序很重要,通过利用多线程,GPU 并行性无法保证任何操作都以相同的顺序执行。乍一看,差异可能看起来无足轻重 - 但要给它足够的迭代......
one = 1
for _ in range(int(1e8)):
one *= (2. / 9. * 11.) / (2. * 11. / 9.)
print(one) # 0.9999999777955395
print(1 - one) # 1.8167285897874308e-08
... 而 "one" 是典型的小权重值 1e-08
远离其原始自我。如果 1 亿次迭代似乎是一个延伸,请考虑该操作在约半分钟内完成,而您的模型可以训练一个多小时,而前 运行 完全在 CPU.[=24= 上]
最少的可重复实验:
import tensorflow as tf
import random as rn
import numpy as np
np.random.seed(1)
rn.seed(2)
tf.set_random_seed(3)
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization
from keras.layers import MaxPooling2D, Conv2D
from keras.optimizers import Adam
def model_cnn():
model = Sequential()
model.add(Conv2D(32, kernel_size=(3,3),
kernel_initializer='he_uniform', input_shape=(28,28,1)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(32, kernel_size=(3,3), kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512, kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(10, kernel_initializer='he_uniform'))
model.add(Activation('softmax'))
model.compile(loss="categorical_crossentropy", optimizer=Adam(lr=0.001),
metrics=['accuracy'])
return model
np.random.seed(1)
rn.seed(2)
tf.set_random_seed(3)
X_train = np.random.randn(30000, 28, 28, 1)
y_train = np.random.randint(0, 2, (30000, 10))
X_val = np.random.randn(30000, 28, 28, 1)
y_val = np.random.randint(0, 2, (30000, 10))
model = model_cnn()
np.random.seed(1)
rn.seed(2)
tf.set_random_seed(3)
history = model.fit(X_train, y_train, batch_size=64,shuffle=True,
epochs=1, verbose=1, validation_data=(X_val,y_val))
运行 差异:
loss: 12.5044 - acc: 0.0971 - val_loss: 11.5389 - val_acc: 0.1051
loss: 12.5047 - acc: 0.0958 - val_loss: 11.5369 - val_acc: 0.1018
loss: 12.5055 - acc: 0.0955 - val_loss: 11.5382 - val_acc: 0.0980
loss: 12.5042 - acc: 0.0961 - val_loss: 11.5382 - val_acc: 0.1179
loss: 12.5062 - acc: 0.0960 - val_loss: 11.5366 - val_acc: 0.1082
我想为 CNN 获得可重现的结果。我在 GPU 上使用 Keras 和 Google Colab。
除了建议插入某些代码片段(这应该允许重现性)之外,我还向图层添加了种子。
###### This is the first code snipped to run #####
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
###### This is the second code snipped to run #####
from __future__ import print_function
import numpy as np
import tensorflow as tf
print(tf.test.gpu_device_name())
import random as rn
import os
os.environ['PYTHONASHSEED'] = '0'
np.random.seed(1)
rn.seed(1)
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
###### This is the third code snipped to run #####
from keras import backend as K
tf.set_random_seed(1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)
###### This is the fourth code snipped to run #####
def model_cnn():
model = Sequential()
model.add(Conv2D(32, kernel_size=(3,3), kernel_initializer=initializers.glorot_uniform(seed=1), input_shape=(28,28,1)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(32, kernel_size=(3,3), kernel_initializer=initializers.glorot_uniform(seed=2)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25, seed=1))
model.add(Flatten())
model.add(Dense(512, kernel_initializer=initializers.glorot_uniform(seed=2)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.5, seed=1))
model.add(Dense(10, kernel_initializer=initializers.glorot_uniform(seed=2)))
model.add(Activation('softmax'))
model.compile(loss="categorical_crossentropy", optimizer=Adam(lr=0.001), metrics=['accuracy'])
return model
def split_data(X,y):
X_train_val, X_val, y_train_val, y_val = train_test_split(X, y, random_state=42, test_size=1/5, stratify=y)
return(X_train_val, X_val, y_train_val, y_val)
def train_model_with_EarlyStopping(model, X, y):
# make train and validation data
X_tr, X_val, y_tr, y_val = split_data(X,y)
es = EarlyStopping(monitor='val_loss', patience=20, mode='min', restore_best_weights=True)
history = model.fit(X_tr, y_tr,
batch_size=64,
epochs=200,
verbose=1,
validation_data=(X_val,y_val),
callbacks=[es])
return history
###### This is the fifth code snipped to run #####
train_model_with_EarlyStopping(model_cnn(), X, y)
总是我 运行 上面的代码我得到不同的结果。 原因是否出在代码中,或者根本不可能在 Google 具有 GPU 支持的 Colab 中获得可重现的结果?
完整代码(代码中有一些不需要的部分,比如没有用到的库):
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
from __future__ import print_function # NEU
import numpy as np
import tensorflow as tf
import random as rn
import os
os.environ['PYTHONASHSEED'] = '0'
np.random.seed(1)
rn.seed(1)
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
from keras import backend as K
tf.set_random_seed(1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)
import os
local_root_path = os.path.expanduser("~/data/data")
print(local_root_path)
try:
os.makedirs(local_root_path, exist_ok=True)
except: pass
def ListFolder(google_drive_id, destination):
file_list = drive.ListFile({'q': "'%s' in parents and trashed=false" % google_drive_id}).GetList()
counter = 0
for f in file_list:
# If it is a directory then, create the dicrectory and upload the file inside it
if f['mimeType']=='application/vnd.google-apps.folder':
folder_path = os.path.join(destination, f['title'])
os.makedirs(folder_path, exist_ok=True)
print('creating directory {}'.format(folder_path))
ListFolder(f['id'], folder_path)
else:
fname = os.path.join(destination, f['title'])
f_ = drive.CreateFile({'id': f['id']})
f_.GetContentFile(fname)
counter += 1
print('{} files were uploaded in {}'.format(counter, destination))
ListFolder("1DyM_D2ZJ5UHIXmXq4uHzKqXSkLTH-lSo", local_root_path)
import glob
import h5py
from time import time
from keras import initializers
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, model_from_json
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization, merge
from keras.layers import Convolution2D, MaxPooling2D, AveragePooling2D
from keras.optimizers import SGD, Adam, RMSprop, Adagrad, Adadelta, Adamax, Nadam
from keras.utils import np_utils
from keras.callbacks import LearningRateScheduler, ModelCheckpoint, TensorBoard, ReduceLROnPlateau
from keras.regularizers import l2
from keras.layers.advanced_activations import LeakyReLU, ELU
from keras import backend as K
import numpy as np
import pickle as pkl
from matplotlib import pyplot as plt
%matplotlib inline
import gzip
import numpy as np
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten
from keras.datasets import fashion_mnist
from numpy import mean, std
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold, StratifiedKFold
from keras.datasets import fashion_mnist
from keras.utils import to_categorical
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten
from keras.optimizers import SGD, Adam
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import auc, average_precision_score, f1_score
import time
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from google.colab import files
from PIL import Image
def model_cnn():
model = Sequential()
model.add(Conv2D(32, kernel_size=(3,3), kernel_initializer=initializers.glorot_uniform(seed=1), input_shape=(28,28,1)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(32, kernel_size=(3,3), kernel_initializer=initializers.glorot_uniform(seed=2)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25, seed=1))
model.add(Flatten())
model.add(Dense(512, kernel_initializer=initializers.glorot_uniform(seed=2)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.5, seed=1))
model.add(Dense(10, kernel_initializer=initializers.glorot_uniform(seed=2)))
model.add(Activation('softmax'))
model.compile(loss="categorical_crossentropy", optimizer=Adam(lr=0.001), metrics=['accuracy'])
return model
def train_model_with_EarlyStopping(model, X, y):
X_tr, X_val, y_tr, y_val = split_train_val_data(X,y)
es = EarlyStopping(monitor='val_loss', patience=20, mode='min', restore_best_weights=True)
history = model.fit(X_tr, y_tr,
batch_size=64,
epochs=200,
verbose=1,
validation_data=(X_val,y_val),
callbacks=[es])
evaluate_model(model, history, X_tr, y_tr)
return history
```
该问题不仅限于 Colab,而且可以在本地重现。然而,这种行为可能是不可避免的。
底部的代码是代码的最低限度可重现版本,调整了合适的参数以加快测试速度。我观察到的是,对于 468 次迭代 每 运行,跨越 5 运行 秒,损失的最大差异仅为 0.0144%。这很好。使用 batch_size=64
、60000
个样本和 20
个时期,您将进行 18750 次迭代 - 这将大大放大这个数字。
无论如何,GPU 并行性 是 最有可能驱动随机数的罪魁祸首 - 微小的差异 do 随着时间的推移会累积,从而产生实质性的结果区别-下面的演示。如果 1e-8
看起来很小,请尝试将随机噪声添加到您的权重的一半 w/ magnitude clipped at 1e-8
,并见证其生活哲学的变化。
如果您不使用它们,种子的作用将变得非常明显 - 尝试一下,您的所有指标将在前 10 次迭代中飞速发展。此外,loss 更适合测量 运行 时间差异,因为准确度对数字精度误差更加敏感:10 个样本的 60% 准确度和 70% 准确度之间的差异batch 是相差 0.000001
w.r.t 的预测。 0.5
- 但损失几乎不会改变。
最后,请注意您的 超参数选择 对模型性能的影响远大于随机性;不管你撒了多少种子,他们都不会把模型变成 SOTA。 -- 我推荐这个 fine clip.
您的代码 - 没问题。您已采取所有实际步骤来确保可重复性,但有一个例外:PYTHONHASHSEED
必须在 之前设置 您的 Python 内核启动。
你能做些什么来减少随机性?
重复运行秒,平均结果。可以理解这很昂贵,但请注意,即使是完全可重现的 运行 也不是完全 提供信息的 ,因为 模型方差 w.r.t .训练集和验证集可能比噪声引起的随机性大得多
K 折交叉验证:可以显着减轻数据和噪声方差
更大的验证集:由于噪声,提取的特征只能相差这么多;验证集越大,权重中的小扰动应反映在指标中越少
GPU 并行性:放大浮点误差
print(2. * 11. / 9.) # 2.4444444444444446
print(2. / 9. * 11.) # 2.444444444444444
操作顺序很重要,通过利用多线程,GPU 并行性无法保证任何操作都以相同的顺序执行。乍一看,差异可能看起来无足轻重 - 但要给它足够的迭代......
one = 1
for _ in range(int(1e8)):
one *= (2. / 9. * 11.) / (2. * 11. / 9.)
print(one) # 0.9999999777955395
print(1 - one) # 1.8167285897874308e-08
... 而 "one" 是典型的小权重值 1e-08
远离其原始自我。如果 1 亿次迭代似乎是一个延伸,请考虑该操作在约半分钟内完成,而您的模型可以训练一个多小时,而前 运行 完全在 CPU.[=24= 上]
最少的可重复实验:
import tensorflow as tf
import random as rn
import numpy as np
np.random.seed(1)
rn.seed(2)
tf.set_random_seed(3)
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization
from keras.layers import MaxPooling2D, Conv2D
from keras.optimizers import Adam
def model_cnn():
model = Sequential()
model.add(Conv2D(32, kernel_size=(3,3),
kernel_initializer='he_uniform', input_shape=(28,28,1)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(32, kernel_size=(3,3), kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512, kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(10, kernel_initializer='he_uniform'))
model.add(Activation('softmax'))
model.compile(loss="categorical_crossentropy", optimizer=Adam(lr=0.001),
metrics=['accuracy'])
return model
np.random.seed(1)
rn.seed(2)
tf.set_random_seed(3)
X_train = np.random.randn(30000, 28, 28, 1)
y_train = np.random.randint(0, 2, (30000, 10))
X_val = np.random.randn(30000, 28, 28, 1)
y_val = np.random.randint(0, 2, (30000, 10))
model = model_cnn()
np.random.seed(1)
rn.seed(2)
tf.set_random_seed(3)
history = model.fit(X_train, y_train, batch_size=64,shuffle=True,
epochs=1, verbose=1, validation_data=(X_val,y_val))
运行 差异:
loss: 12.5044 - acc: 0.0971 - val_loss: 11.5389 - val_acc: 0.1051
loss: 12.5047 - acc: 0.0958 - val_loss: 11.5369 - val_acc: 0.1018
loss: 12.5055 - acc: 0.0955 - val_loss: 11.5382 - val_acc: 0.0980
loss: 12.5042 - acc: 0.0961 - val_loss: 11.5382 - val_acc: 0.1179
loss: 12.5062 - acc: 0.0960 - val_loss: 11.5366 - val_acc: 0.1082