(修改)Tensorflow tflearn 二值图像学习题
(Modify)Tensorflow tflearn Binary image learning questions
我想学习使用 PIL 对张量流进行二值化的指纹图像。
我正在尝试学习二值化图像,所以形状不正确。
from __future__ import division, print_function, absolute_import
import pickle
import numpy as np
from PIL import Image
import tflearn
import tensorflow as tf
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.estimator import regression
def load_image(img_path):
img = Image.open(img_path)
return img
def resize_image(in_image, new_width, new_height, out_image=None,
resize_mode=Image.ANTIALIAS):
img = in_image.resize((new_width, new_height), resize_mode)
if out_image:
img.save(out_image)
return img
def pil_to_nparray(pil_image):
pil_image.load()
return np.asarray(pil_image, dtype="float32")
def binarization(in_img, threshold):
im = in_img.convert('L')
for i in range(im.size[0]):
for j in range(im.size[1]):
if im.getpixel((i,j)) > threshold:
im.putpixel((i,j), 255)
else:
im.putpixel((i,j), 0)
return im.convert('F')
def load_data(datafile, num_clss, save=True, save_path='dataset.pkl'):
train_list = open(datafile,'r')
labels = []
images = []
for line in train_list:
tmp = line.strip().split(' ')
fpath = tmp[0]
print(fpath)
img = load_image(fpath)
img = binarization(img, 128)
img = resize_image(img, 224, 224)
np_img = pil_to_nparray(img)
images.append(np_img)
index = int(tmp[1])
label = np.zeros(num_clss)
label[index] = 1
labels.append(label)
if save:
pickle.dump((images, labels), open(save_path, 'wb'))
return images, labels
def load_from_pkl(dataset_file):
X, Y = pickle.load(open(dataset_file, 'rb'))
return X, Y
def create_vggnet(num_classes):
# Building 'VGGNet'
network = input_data(shape=[None, 224, 224, 3], name='input')
network = conv_2d(network, 64, filter_size=3, strides=1, activation='relu')
network = conv_2d(network, 64, filter_size=3, strides=1, activation='relu')
network = max_pool_2d(network, kernel_size=2, strides=2)
network = conv_2d(network, 128, filter_size=3, strides=1, activation='relu')
network = conv_2d(network, 128, filter_size=3, strides=1, activation='relu')
network = max_pool_2d(network, 2, strides=2)
network = conv_2d(network, 256, filter_size=3, strides=1, activation='relu')
network = conv_2d(network, 256, filter_size=3, strides=1, activation='relu')
network = conv_2d(network, 256, filter_size=3, strides=1, activation='relu')
network = max_pool_2d(network, kernel_size=2, strides=2)
network = conv_2d(network, 512, filter_size=3, strides=1, activation='relu')
network = conv_2d(network, 512, filter_size=3, strides=1, activation='relu')
network = conv_2d(network, 512, filter_size=3, strides=1, activation='relu')
network = max_pool_2d(network, kernel_size=2, strides=2)
network = conv_2d(network, 512, filter_size=3, strides=1, activation='relu')
network = conv_2d(network, 512, filter_size=3, strides=1, activation='relu')
network = conv_2d(network, 512, filter_size=3, strides=1, activation='relu')
network = max_pool_2d(network, kernel_size=2, strides=2)
network = fully_connected(network, 4096, activation='relu')
network = dropout(network, 0.5)
network = fully_connected(network, 4096, activation='relu')
network = dropout(network, 0.5)
network = fully_connected(network, num_classes, activation='softmax')
network = regression(network, optimizer='adam', loss='categorical_crossentropy',
learning_rate=0.001)
return network
def train(network, X, Y):
# Trainingeed data dictionary, with placeholders as keys, and data as values.
model = tflearn.DNN(network, checkpoint_path='model_vgg',
max_checkpoints=1, tensorboard_verbose=2, tensorboard_dir='output')
model.fit(X, Y, n_epoch=100, validation_set=0.1, shuffle=True, show_metric=True,
batch_size=64, snapshot_step=200, snapshot_epoch=False, run_id='vgg_fingerprint')
model.save('model_save.model')
def predict(network, modelfile, images):
model = tflearn.DNN(network)
model.load(modelfile)
return model.predict(images)
if __name__ == '__main__':
#image, label = load_data('train.txt', 5)
X, Y = load_from_pkl('dataset.pkl')
net = create_vggnet(5)
train(net, X, Y)
我试过使用 numpy reshape 改变尺寸。
但是,重复出现以下错误。
报错如下。
ValueError:无法为 Tensor u'input / X: 0 ' 提供形状 (64,224,224) 的值,其形状为 (?, 224, 224, 3)
有什么问题?
问题在于您的输入形状 - 它与输入层不匹配。
输入层定义在create_vggnet()
:
def create_vggnet(num_classes):
# Building 'VGGNet'
network = input_data(shape=[None, 224, 224, 3], name='input')
所以你期望 None
(== any) 次 (224, 224, 3),即 224x224 x RGB(3 通道)。你传递 64(你的批量大小)乘以 224x224。
有两个修复:
1)(可能更浪费)- 将图像扩展为 RGB。
所以,你把图片转成'L'
(明度,也就是灰度)然后二值化之后,先转成RGB。然后你可以把它转换成'F'
(参见:http://effbot.org/imagingbook/image.htm and How do I save a mode 'F' image? (Python/PIL))
def binarization(in_img, threshold):
im = in_img.convert('L')
for i in range(im.size[0]):
for j in range(im.size[1]):
if im.getpixel((i, j)) > threshold:
im.putpixel((i, j), 255)
else:
im.putpixel((i, j), 0)
return im.convert('RGB').convert('F')
2)(减少浪费,但你正在稍微改变你的网络(只是输入层) - 所以可以说,这个 "isn't VGG 16 anymore")你可以将输入层更改为 1-频道。
def create_vggnet(num_classes):
# Building 'VGGNet'
network = input_data(shape=[None, 224, 224, 1], name='input')
不幸的是,shape=[None, 224, 224]
不起作用(错误与 "The Tensor needs to be 4D" 有关)。因此,对于单个输入值,我们的形状为 (224, 224, 1)。
所以你需要让图像有一个额外的维度:
def pil_to_nparray(pil_image):
pil_image.load()
return np.expand_dims(np.asarray(pil_image, dtype="float32"), 2)
或(甚至更好):
def pil_to_nparray(pil_image):
pil_image.load()
return np.asarray(pil_image, dtype="float32").reshape((224, 224, 1))
(后一个版本看起来更直接,你知道它的作用)
但这仅在输入图像为 224x224 时有效,而 expand_dims
将始终为任何尺寸添加额外的维度。
我想学习使用 PIL 对张量流进行二值化的指纹图像。 我正在尝试学习二值化图像,所以形状不正确。
from __future__ import division, print_function, absolute_import
import pickle
import numpy as np
from PIL import Image
import tflearn
import tensorflow as tf
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.estimator import regression
def load_image(img_path):
img = Image.open(img_path)
return img
def resize_image(in_image, new_width, new_height, out_image=None,
resize_mode=Image.ANTIALIAS):
img = in_image.resize((new_width, new_height), resize_mode)
if out_image:
img.save(out_image)
return img
def pil_to_nparray(pil_image):
pil_image.load()
return np.asarray(pil_image, dtype="float32")
def binarization(in_img, threshold):
im = in_img.convert('L')
for i in range(im.size[0]):
for j in range(im.size[1]):
if im.getpixel((i,j)) > threshold:
im.putpixel((i,j), 255)
else:
im.putpixel((i,j), 0)
return im.convert('F')
def load_data(datafile, num_clss, save=True, save_path='dataset.pkl'):
train_list = open(datafile,'r')
labels = []
images = []
for line in train_list:
tmp = line.strip().split(' ')
fpath = tmp[0]
print(fpath)
img = load_image(fpath)
img = binarization(img, 128)
img = resize_image(img, 224, 224)
np_img = pil_to_nparray(img)
images.append(np_img)
index = int(tmp[1])
label = np.zeros(num_clss)
label[index] = 1
labels.append(label)
if save:
pickle.dump((images, labels), open(save_path, 'wb'))
return images, labels
def load_from_pkl(dataset_file):
X, Y = pickle.load(open(dataset_file, 'rb'))
return X, Y
def create_vggnet(num_classes):
# Building 'VGGNet'
network = input_data(shape=[None, 224, 224, 3], name='input')
network = conv_2d(network, 64, filter_size=3, strides=1, activation='relu')
network = conv_2d(network, 64, filter_size=3, strides=1, activation='relu')
network = max_pool_2d(network, kernel_size=2, strides=2)
network = conv_2d(network, 128, filter_size=3, strides=1, activation='relu')
network = conv_2d(network, 128, filter_size=3, strides=1, activation='relu')
network = max_pool_2d(network, 2, strides=2)
network = conv_2d(network, 256, filter_size=3, strides=1, activation='relu')
network = conv_2d(network, 256, filter_size=3, strides=1, activation='relu')
network = conv_2d(network, 256, filter_size=3, strides=1, activation='relu')
network = max_pool_2d(network, kernel_size=2, strides=2)
network = conv_2d(network, 512, filter_size=3, strides=1, activation='relu')
network = conv_2d(network, 512, filter_size=3, strides=1, activation='relu')
network = conv_2d(network, 512, filter_size=3, strides=1, activation='relu')
network = max_pool_2d(network, kernel_size=2, strides=2)
network = conv_2d(network, 512, filter_size=3, strides=1, activation='relu')
network = conv_2d(network, 512, filter_size=3, strides=1, activation='relu')
network = conv_2d(network, 512, filter_size=3, strides=1, activation='relu')
network = max_pool_2d(network, kernel_size=2, strides=2)
network = fully_connected(network, 4096, activation='relu')
network = dropout(network, 0.5)
network = fully_connected(network, 4096, activation='relu')
network = dropout(network, 0.5)
network = fully_connected(network, num_classes, activation='softmax')
network = regression(network, optimizer='adam', loss='categorical_crossentropy',
learning_rate=0.001)
return network
def train(network, X, Y):
# Trainingeed data dictionary, with placeholders as keys, and data as values.
model = tflearn.DNN(network, checkpoint_path='model_vgg',
max_checkpoints=1, tensorboard_verbose=2, tensorboard_dir='output')
model.fit(X, Y, n_epoch=100, validation_set=0.1, shuffle=True, show_metric=True,
batch_size=64, snapshot_step=200, snapshot_epoch=False, run_id='vgg_fingerprint')
model.save('model_save.model')
def predict(network, modelfile, images):
model = tflearn.DNN(network)
model.load(modelfile)
return model.predict(images)
if __name__ == '__main__':
#image, label = load_data('train.txt', 5)
X, Y = load_from_pkl('dataset.pkl')
net = create_vggnet(5)
train(net, X, Y)
我试过使用 numpy reshape 改变尺寸。 但是,重复出现以下错误。
报错如下。 ValueError:无法为 Tensor u'input / X: 0 ' 提供形状 (64,224,224) 的值,其形状为 (?, 224, 224, 3)
有什么问题?
问题在于您的输入形状 - 它与输入层不匹配。
输入层定义在create_vggnet()
:
def create_vggnet(num_classes):
# Building 'VGGNet'
network = input_data(shape=[None, 224, 224, 3], name='input')
所以你期望 None
(== any) 次 (224, 224, 3),即 224x224 x RGB(3 通道)。你传递 64(你的批量大小)乘以 224x224。
有两个修复:
1)(可能更浪费)- 将图像扩展为 RGB。
所以,你把图片转成'L'
(明度,也就是灰度)然后二值化之后,先转成RGB。然后你可以把它转换成'F'
(参见:http://effbot.org/imagingbook/image.htm and How do I save a mode 'F' image? (Python/PIL))
def binarization(in_img, threshold):
im = in_img.convert('L')
for i in range(im.size[0]):
for j in range(im.size[1]):
if im.getpixel((i, j)) > threshold:
im.putpixel((i, j), 255)
else:
im.putpixel((i, j), 0)
return im.convert('RGB').convert('F')
2)(减少浪费,但你正在稍微改变你的网络(只是输入层) - 所以可以说,这个 "isn't VGG 16 anymore")你可以将输入层更改为 1-频道。
def create_vggnet(num_classes):
# Building 'VGGNet'
network = input_data(shape=[None, 224, 224, 1], name='input')
不幸的是,shape=[None, 224, 224]
不起作用(错误与 "The Tensor needs to be 4D" 有关)。因此,对于单个输入值,我们的形状为 (224, 224, 1)。
所以你需要让图像有一个额外的维度:
def pil_to_nparray(pil_image):
pil_image.load()
return np.expand_dims(np.asarray(pil_image, dtype="float32"), 2)
或(甚至更好):
def pil_to_nparray(pil_image):
pil_image.load()
return np.asarray(pil_image, dtype="float32").reshape((224, 224, 1))
(后一个版本看起来更直接,你知道它的作用)
但这仅在输入图像为 224x224 时有效,而 expand_dims
将始终为任何尺寸添加额外的维度。