如何以 TensorFlow 的 protobuf 格式保存和读取可变大小的图像
How can I save and read variable size images in TensorFlow's protobuf format
我正在尝试使用以下代码以 TensorFlow 的 protobuf 格式编写可变大小的图像:
img_feature = tf.train.Feature(
bytes_list=tf.train.BytesList(value=[
img.flatten().tostring()]))
# Define how the sequence length is stored
seq_len_feature = tf.train.Feature(
int64_list=tf.train.Int64List(value=[seq_len]))
# Define how the label list is stored
label_list_feature = tf.train.Feature(
int64_list=tf.train.Int64List(value=label_list))
# Define the feature dictionary that defines how the data is stored
feature = {
IMG_FEATURE_NAME: img_feature,
SEQ_LEN_FEATURE_NAME: seq_len_feature,
LABEL_LIST_FEATURE_NAME: label_list_feature}
# Create an example object to store
example = tf.train.Example(
features=tf.train.Features(feature=feature))
我保存的图像 img
高度固定但长度可变。
现在如果我想用下面的代码解析这张图片:
# Define how the features are read from the example
features_dict = {
IMG_FEATURE_NAME: tf.FixedLenFeature([], tf.string),
SEQ_LEN_FEATURE_NAME: tf.FixedLenFeature([1], tf.int64),
LABEL_LIST_FEATURE_NAME: tf.VarLenFeature(tf.int64),
}
features = tf.parse_single_example(
serialized_example,
features=features_dict)
# Decode string to uint8 and reshape to image shape
img = tf.decode_raw(features[IMG_FEATURE_NAME], tf.uint8)
img = tf.reshape(img, (self.img_shape, -1))
seq_len = tf.cast(features[SEQ_LEN_FEATURE_NAME], tf.int32)
# Convert list of labels
label_list = tf.cast(features[LABEL_LIST_FEATURE_NAME], tf.int32)
我收到以下错误:
ValueError: All shapes must be fully defined: [TensorShape([Dimension(28), Dimension(None)]), TensorShape([Dimension(1)]), TensorShape([Dimension(3)])]
有没有办法存储可变大小的图像(在我的例子中更具体地说是可变宽度)并用 TFRecordReader
读取它们?
首先,我无法重现错误。以下代码工作正常:
import tensorflow as tf
import numpy as np
image_height = 100
img = np.random.randint(low=0, high=255, size=(image_height,200), dtype='uint8')
IMG_FEATURE_NAME = 'image/raw'
with tf.Graph().as_default():
img_feature = tf.train.Feature(
bytes_list=tf.train.BytesList(value=[
img.flatten().tostring()]))
feature = {IMG_FEATURE_NAME: img_feature}
example = tf.train.Example(features=tf.train.Features(feature=feature))
serialized_example = example.SerializeToString()
features_dict = {IMG_FEATURE_NAME: tf.FixedLenFeature([], tf.string)}
features = tf.parse_single_example(serialized_example, features=features_dict)
img_tf = tf.decode_raw(features[IMG_FEATURE_NAME], tf.uint8)
img_tf = tf.reshape(img_tf, (image_height, -1))
with tf.Session() as sess:
img_np = sess.run(img_tf)
print(img_np)
print('Images are identical: %s' % (img == img_np).all())
它输出:
Images are identical: True
其次,我建议存储编码为 PNG 而不是 RAW 的图像,并使用 tf.VarLenFeature+tf.image.decode_png 读取它们。它将为您节省很多 space 并且自然支持可变大小的图像。
我最终能够使用以下代码创建 protobuf 数据文件:
_, img_png = cv2.imencode('.png', img)
img_png = img_png.tostring()
label_list_feature = [
tf.train.Feature(bytes_list=tf.train.BytesList(value=[label]))
for label in label_list]
img_feature = tf.train.Feature(bytes_list=tf.train.BytesList(
value=[img_png]))
# Define feature for sequence length
seq_len_feature = tf.train.Feature(
int64_list=tf.train.Int64List(value=[seq_len]))
# Feature list that contains list of labels
feature_list = {
LABEL_LIST_FEATURE_NAME: tf.train.FeatureList(
feature=label_list_feature)
}
# Context that contains sequence lenght and image
context = tf.train.Features(feature={
IMG_FEATURE_NAME: img_feature,
SEQ_LEN_FEATURE_NAME: seq_len_feature
})
feature_lists = tf.train.FeatureLists(feature_list=feature_list)
# Add sequence length as context
example = tf.train.SequenceExample(
feature_lists=feature_lists,
context=context)
以及从 protobuf 中读取的以下代码:
# Sequence length is a context feature
context_features = {
IMG_FEATURE_NAME: tf.FixedLenFeature([], dtype=tf.string),
SEQ_LEN_FEATURE_NAME: tf.FixedLenFeature([], dtype=tf.int64)
}
# Image and target word is a sequence feature
sequence_features = {
LABEL_LIST_FEATURE_NAME: tf.FixedLenSequenceFeature(
[], dtype=tf.string)
}
# Parse the example
context_parsed, sequence_parsed = tf.parse_single_sequence_example(
serialized=serialized_example,
context_features=context_features,
sequence_features=sequence_features
)
seq_len = tf.cast(context_parsed[SEQ_LEN_FEATURE_NAME], tf.int32)
# Process the image
img = context_parsed[IMG_FEATURE_NAME]
img = tf.image.decode_png(img, dtype=tf.uint8, channels=nb_channels)
img = tf.reshape(img, (img_height, -1, nb_channels))
labels = sequence_parsed[LABEL_LIST_FEATURE_NAME]
return img, seq_len, labels
注意:在这个例子中,我将我的整数标签列表更改为字符串标签列表(在我的例子中更自然)。我还将图像存储为 png 字节字符串。
我正在尝试使用以下代码以 TensorFlow 的 protobuf 格式编写可变大小的图像:
img_feature = tf.train.Feature(
bytes_list=tf.train.BytesList(value=[
img.flatten().tostring()]))
# Define how the sequence length is stored
seq_len_feature = tf.train.Feature(
int64_list=tf.train.Int64List(value=[seq_len]))
# Define how the label list is stored
label_list_feature = tf.train.Feature(
int64_list=tf.train.Int64List(value=label_list))
# Define the feature dictionary that defines how the data is stored
feature = {
IMG_FEATURE_NAME: img_feature,
SEQ_LEN_FEATURE_NAME: seq_len_feature,
LABEL_LIST_FEATURE_NAME: label_list_feature}
# Create an example object to store
example = tf.train.Example(
features=tf.train.Features(feature=feature))
我保存的图像 img
高度固定但长度可变。
现在如果我想用下面的代码解析这张图片:
# Define how the features are read from the example
features_dict = {
IMG_FEATURE_NAME: tf.FixedLenFeature([], tf.string),
SEQ_LEN_FEATURE_NAME: tf.FixedLenFeature([1], tf.int64),
LABEL_LIST_FEATURE_NAME: tf.VarLenFeature(tf.int64),
}
features = tf.parse_single_example(
serialized_example,
features=features_dict)
# Decode string to uint8 and reshape to image shape
img = tf.decode_raw(features[IMG_FEATURE_NAME], tf.uint8)
img = tf.reshape(img, (self.img_shape, -1))
seq_len = tf.cast(features[SEQ_LEN_FEATURE_NAME], tf.int32)
# Convert list of labels
label_list = tf.cast(features[LABEL_LIST_FEATURE_NAME], tf.int32)
我收到以下错误:
ValueError: All shapes must be fully defined: [TensorShape([Dimension(28), Dimension(None)]), TensorShape([Dimension(1)]), TensorShape([Dimension(3)])]
有没有办法存储可变大小的图像(在我的例子中更具体地说是可变宽度)并用 TFRecordReader
读取它们?
首先,我无法重现错误。以下代码工作正常:
import tensorflow as tf
import numpy as np
image_height = 100
img = np.random.randint(low=0, high=255, size=(image_height,200), dtype='uint8')
IMG_FEATURE_NAME = 'image/raw'
with tf.Graph().as_default():
img_feature = tf.train.Feature(
bytes_list=tf.train.BytesList(value=[
img.flatten().tostring()]))
feature = {IMG_FEATURE_NAME: img_feature}
example = tf.train.Example(features=tf.train.Features(feature=feature))
serialized_example = example.SerializeToString()
features_dict = {IMG_FEATURE_NAME: tf.FixedLenFeature([], tf.string)}
features = tf.parse_single_example(serialized_example, features=features_dict)
img_tf = tf.decode_raw(features[IMG_FEATURE_NAME], tf.uint8)
img_tf = tf.reshape(img_tf, (image_height, -1))
with tf.Session() as sess:
img_np = sess.run(img_tf)
print(img_np)
print('Images are identical: %s' % (img == img_np).all())
它输出:
Images are identical: True
其次,我建议存储编码为 PNG 而不是 RAW 的图像,并使用 tf.VarLenFeature+tf.image.decode_png 读取它们。它将为您节省很多 space 并且自然支持可变大小的图像。
我最终能够使用以下代码创建 protobuf 数据文件:
_, img_png = cv2.imencode('.png', img)
img_png = img_png.tostring()
label_list_feature = [
tf.train.Feature(bytes_list=tf.train.BytesList(value=[label]))
for label in label_list]
img_feature = tf.train.Feature(bytes_list=tf.train.BytesList(
value=[img_png]))
# Define feature for sequence length
seq_len_feature = tf.train.Feature(
int64_list=tf.train.Int64List(value=[seq_len]))
# Feature list that contains list of labels
feature_list = {
LABEL_LIST_FEATURE_NAME: tf.train.FeatureList(
feature=label_list_feature)
}
# Context that contains sequence lenght and image
context = tf.train.Features(feature={
IMG_FEATURE_NAME: img_feature,
SEQ_LEN_FEATURE_NAME: seq_len_feature
})
feature_lists = tf.train.FeatureLists(feature_list=feature_list)
# Add sequence length as context
example = tf.train.SequenceExample(
feature_lists=feature_lists,
context=context)
以及从 protobuf 中读取的以下代码:
# Sequence length is a context feature
context_features = {
IMG_FEATURE_NAME: tf.FixedLenFeature([], dtype=tf.string),
SEQ_LEN_FEATURE_NAME: tf.FixedLenFeature([], dtype=tf.int64)
}
# Image and target word is a sequence feature
sequence_features = {
LABEL_LIST_FEATURE_NAME: tf.FixedLenSequenceFeature(
[], dtype=tf.string)
}
# Parse the example
context_parsed, sequence_parsed = tf.parse_single_sequence_example(
serialized=serialized_example,
context_features=context_features,
sequence_features=sequence_features
)
seq_len = tf.cast(context_parsed[SEQ_LEN_FEATURE_NAME], tf.int32)
# Process the image
img = context_parsed[IMG_FEATURE_NAME]
img = tf.image.decode_png(img, dtype=tf.uint8, channels=nb_channels)
img = tf.reshape(img, (img_height, -1, nb_channels))
labels = sequence_parsed[LABEL_LIST_FEATURE_NAME]
return img, seq_len, labels
注意:在这个例子中,我将我的整数标签列表更改为字符串标签列表(在我的例子中更自然)。我还将图像存储为 png 字节字符串。