(修改)Tensorflow tflearn 二值图像学习题

(Modify)Tensorflow tflearn Binary image learning questions

我想学习使用 PIL 对张量流进行二值化的指纹图像。 我正在尝试学习二值化图像,所以形状不正确。

from __future__ import division, print_function, absolute_import
import pickle
import numpy as np
from PIL import Image
import tflearn
import tensorflow as tf
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.estimator import regression


def load_image(img_path):
    img = Image.open(img_path)

    return img


def resize_image(in_image, new_width, new_height, out_image=None,
                 resize_mode=Image.ANTIALIAS):
    img = in_image.resize((new_width, new_height), resize_mode)

    if out_image:
        img.save(out_image)

    return img


def pil_to_nparray(pil_image):
    pil_image.load()

    return np.asarray(pil_image, dtype="float32")


def binarization(in_img, threshold):
    im = in_img.convert('L')
    for i in range(im.size[0]):
        for j in range(im.size[1]):
            if im.getpixel((i,j)) > threshold:
                im.putpixel((i,j), 255)
            else:
                im.putpixel((i,j), 0)
    return im.convert('F')


def load_data(datafile, num_clss, save=True, save_path='dataset.pkl'):
    train_list = open(datafile,'r')
    labels = []
    images = []
    for line in train_list:
        tmp = line.strip().split(' ')
        fpath = tmp[0]
        print(fpath)
        img = load_image(fpath)
        img = binarization(img, 128)
        img = resize_image(img, 224, 224)
        np_img = pil_to_nparray(img)
        images.append(np_img)

        index = int(tmp[1])
        label = np.zeros(num_clss)
        label[index] = 1
        labels.append(label)
    if save:
        pickle.dump((images, labels), open(save_path, 'wb'))

    return images, labels


def load_from_pkl(dataset_file):
    X, Y = pickle.load(open(dataset_file, 'rb'))
    return X, Y


def create_vggnet(num_classes):
    # Building 'VGGNet'
    network = input_data(shape=[None, 224, 224, 3], name='input')
    network = conv_2d(network, 64, filter_size=3, strides=1, activation='relu')
    network = conv_2d(network, 64, filter_size=3, strides=1, activation='relu')
    network = max_pool_2d(network, kernel_size=2, strides=2)
    network = conv_2d(network, 128, filter_size=3, strides=1, activation='relu')
    network = conv_2d(network, 128, filter_size=3, strides=1, activation='relu')
    network = max_pool_2d(network, 2, strides=2)

    network = conv_2d(network, 256, filter_size=3, strides=1, activation='relu')
    network = conv_2d(network, 256, filter_size=3, strides=1, activation='relu')
    network = conv_2d(network, 256, filter_size=3, strides=1, activation='relu')
    network = max_pool_2d(network, kernel_size=2, strides=2)

    network = conv_2d(network, 512, filter_size=3, strides=1, activation='relu')
    network = conv_2d(network, 512, filter_size=3, strides=1, activation='relu')
    network = conv_2d(network, 512, filter_size=3, strides=1, activation='relu')
    network = max_pool_2d(network, kernel_size=2, strides=2)

    network = conv_2d(network, 512, filter_size=3, strides=1, activation='relu')
    network = conv_2d(network, 512, filter_size=3, strides=1, activation='relu')
    network = conv_2d(network, 512, filter_size=3, strides=1, activation='relu')
    network = max_pool_2d(network, kernel_size=2, strides=2)

    network = fully_connected(network, 4096, activation='relu')
    network = dropout(network, 0.5)
    network = fully_connected(network, 4096, activation='relu')
    network = dropout(network, 0.5)
    network = fully_connected(network, num_classes, activation='softmax')

    network = regression(network, optimizer='adam', loss='categorical_crossentropy',
                         learning_rate=0.001)

    return network


def train(network, X, Y):
    # Trainingeed data dictionary, with placeholders as keys, and data as values.
    model = tflearn.DNN(network, checkpoint_path='model_vgg',
                        max_checkpoints=1, tensorboard_verbose=2, tensorboard_dir='output')
    model.fit(X, Y, n_epoch=100, validation_set=0.1, shuffle=True, show_metric=True,
              batch_size=64, snapshot_step=200, snapshot_epoch=False, run_id='vgg_fingerprint')
    model.save('model_save.model')


def predict(network, modelfile, images):
    model = tflearn.DNN(network)
    model.load(modelfile)

    return model.predict(images)


if __name__ == '__main__':
    #image, label = load_data('train.txt', 5)
    X, Y = load_from_pkl('dataset.pkl')
    net = create_vggnet(5)
    train(net, X, Y)

我试过使用 numpy reshape 改变尺寸。 但是,重复出现以下错误。

报错如下。 ValueError:无法为 Tensor u'input / X: 0 ' 提供形状 (64,224,224) 的值,其形状为 (?, 224, 224, 3)

有什么问题?

问题在于您的输入形状 - 它与输入层不匹配。

输入层定义在create_vggnet():

def create_vggnet(num_classes):
    # Building 'VGGNet'
    network = input_data(shape=[None, 224, 224, 3], name='input')

所以你期望 None (== any) 次 (224, 224, 3),即 224x224 x RGB(3 通道)。你传递 64(你的批量大小)乘以 224x224。

有两个修复:

1)(可能更浪费)- 将图像扩展为 RGB。

所以,你把图片转成'L'(明度,也就是灰度)然后二值化之后,先转成RGB。然后你可以把它转换成'F'

(参见:http://effbot.org/imagingbook/image.htm and How do I save a mode 'F' image? (Python/PIL)

def binarization(in_img, threshold):
    im = in_img.convert('L')
    for i in range(im.size[0]):
        for j in range(im.size[1]):
            if im.getpixel((i, j)) > threshold:
                im.putpixel((i, j), 255)
            else:
                im.putpixel((i, j), 0)
    return im.convert('RGB').convert('F')

2)(减少浪费,但你正在稍微改变你的网络(只是输入层) - 所以可以说,这个 "isn't VGG 16 anymore")你可以将输入层更改为 1-频道。

def create_vggnet(num_classes):
    # Building 'VGGNet'
    network = input_data(shape=[None, 224, 224, 1], name='input')

不幸的是,shape=[None, 224, 224] 不起作用(错误与 "The Tensor needs to be 4D" 有关)。因此,对于单个输入值,我们的形状为 (224, 224, 1)。

所以你需要让图像有一个额外的维度:

def pil_to_nparray(pil_image):
    pil_image.load()

    return np.expand_dims(np.asarray(pil_image, dtype="float32"), 2)

或(甚至更好):

def pil_to_nparray(pil_image):
    pil_image.load()

    return np.asarray(pil_image, dtype="float32").reshape((224, 224, 1))

(后一个版本看起来更直接,你知道它的作用) 但这仅在输入图像为 224x224 时有效,而 expand_dims 将始终为任何尺寸添加额外的维度。