.map 函数 Tensorflow 数据集出错
Error with .map Function Tensorflow Dataset
我正在尝试将一个充满图像的目录导入 Tensorflow,然后将其用于 Keras Tuner。问题是 Keras Tuner 需要将数据拆分为图像和标签。我正在按照 Tensorflow 网站上的指南进行操作,这是我目前拥有的代码:
注意:我使用的是 COCO 数据集,这意味着每个图像都有多个标签。也许这就是问题所在。
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as display
from PIL import Image, ImageSequence
import PIL
import os
import pathlib
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import cv2
import datetime
import kerastuner as kt
tf.compat.v1.enable_eager_execution()
tf.executing_eagerly()
epochs = 50
steps_per_epoch = 10
batch_size = 10
IMG_HEIGHT = 150
IMG_WIDTH = 150
train_dir = "Data/Train"
test_dir = "Data/Val"
train_data_gen = tf.keras.preprocessing.image_dataset_from_directory(
train_dir, labels='inferred', label_mode='int',
class_names=None, color_mode='rgb', batch_size=batch_size, image_size=(IMG_HEIGHT,
IMG_WIDTH), shuffle=True, seed=None,
validation_split=None, subset=None,
interpolation='bilinear', follow_links=False
)
test_data_gen = tf.keras.preprocessing.image_dataset_from_directory(
test_dir, labels='inferred', label_mode='int',
class_names=None, color_mode='rgb', batch_size=batch_size, image_size=(IMG_HEIGHT,
IMG_WIDTH), shuffle=True, seed=None,
validation_split=None, subset=None,
interpolation='bilinear', follow_links=False
)
normalization_layer = tf.keras.layers.experimental.preprocessing.Rescaling(1. / 255)
train_data_gen = train_data_gen.map(lambda x, y: (normalization_layer(x), y))
test_data_gen = test_data_gen.map(lambda x, y: (normalization_layer(x), y))
AUTOTUNE = tf.data.AUTOTUNE
train_data_gen = train_data_gen.cache().prefetch(buffer_size=AUTOTUNE)
test_data_gen = test_data_gen.cache().prefetch(buffer_size=AUTOTUNE)
train_dir_PATH = pathlib.Path(train_dir)
class_names = np.array(sorted([item.name for item in train_dir_PATH.glob('*') if item.name != "LICENSE.txt"]))
print(class_names)
def get_label(file_path):
# convert the path to a list of path components
parts = tf.strings.split(file_path, os.path.sep)
# The second to last is the class-directory
one_hot = parts[-2] == class_names
# Integer encode the label
return tf.argmax(one_hot)
def decode_img(img):
# convert the compressed string to a 3D uint8 tensor
img = tf.image.decode_jpeg(img, channels=3)
# resize the image to the desired size
return tf.image.resize(img, [IMG_HEIGHT, IMG_WIDTH])
def process_path(file_path):
label = get_label(file_path)
# load the raw data from the file as a string
img = tf.io.read_file(file_path)
img = decode_img(img)
return img, label
train_ds_map = train_data_gen.map(process_path, num_parallel_calls=AUTOTUNE)
val_ds_map = test_data_gen.map(process_path, num_parallel_calls=AUTOTUNE)
for image, label in train_data_gen.take(1):
print("Image shape: ", image.numpy().shape)
print("Label: ", label.numpy())
# train_image_generator = ImageDataGenerator(rescale=1. / 255)
#
# test_image_generator = ImageDataGenerator(rescale=1. / 255)
#
# train_data_gen = train_image_generator.flow_from_directory(batch_size=batch_size,
# directory=train_dir,
# shuffle=True,
# target_size=(IMG_HEIGHT, IMG_WIDTH),
# class_mode='sparse')
#
# test_data_gen = test_image_generator.flow_from_directory(batch_size=batch_size,
# directory=test_dir,
# shuffle=True,
# target_size=(IMG_HEIGHT, IMG_WIDTH),
# class_mode='sparse')
def configure_for_performance(ds):
ds = ds.cache()
ds = ds.shuffle(buffer_size=1000)
ds = ds.batch(batch_size)
ds = ds.prefetch(buffer_size=AUTOTUNE)
return ds
train_data_gen = configure_for_performance(train_data_gen)
test_data_gen = configure_for_performance(test_data_gen)
def model_builder(hp):
model = keras.Sequential()
model.add(Conv2D(265, 3, padding='same', activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)))
model.add(MaxPooling2D())
model.add(Conv2D(64, 3, padding='same', activation='relu'))
model.add(MaxPooling2D())
model.add(Conv2D(32, 3, padding='same', activation='relu'))
model.add(MaxPooling2D())
model.add(Flatten())
model.add(keras.layers.Dense(256, activation="relu"))
hp_units = hp.Int('units', min_value=32, max_value=512, step=32)
model.add(keras.layers.Dense(hp_units, activation="relu"))
model.add(keras.layers.Dense(80, activation="softmax"))
hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['sparse_top_k_categorical_accuracy'])
return model
tuner = kt.Hyperband(model_builder,
objective='top_k_categorical_accuracy',
max_epochs=30,
factor=3,
directory='Hypertuner_Dir',
project_name='AIOS')
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
for image, label in train_data_gen.take(1):
print("Image shape: ", image.numpy().shape)
print("Label: ", label.numpy())
tuner.search(image_batch_n, labels_batch_n, epochs=50, validation_split=0.2, callbacks=[stop_early])
# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"""
The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is {best_hps.get('units')} and the optimal learning rate for the optimizer
is {best_hps.get('learning_rate')}.
""")
model = tuner.hypermodel.build(best_hps)
# model.summary()
# tf.keras.utils.plot_model(model, to_file="model.png", show_shapes=True, show_layer_names=True, rankdir='TB')
checkpoint_path = "training/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
save_weights_only=True,
verbose=1)
os.system("rm -r logs")
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
history = model.fit(train_data_gen,steps_per_epoch=steps_per_epoch,epochs=epochs,validation_data=test_data_gen,validation_steps=10,callbacks=[cp_callback, tensorboard_callback])
#history = model.fit(train_ds, epochs=epochs,validation_split=0.2)
val_acc_per_epoch = history.history['top_k_categorical_accuracy']
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))
hypermodel = tuner.hypermodel.build(best_hps)
# Retrain the model
history = hypermodel.fit(train_data_gen,steps_per_epoch=steps_per_epoch,epochs=epochs,validation_data=test_data_gen,validation_steps=10,callbacks=[cp_callback, tensorboard_callback])
# model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
# model.save('model.h5', include_optimizer=True)
test_loss, test_acc = model.evaluate(test_data_gen)
print("Tested Acc: ", test_acc)
print("Tested Acc: ", test_acc*100, "%")
当我尝试 运行 train_ds_map = train_data_gen.map(process_path, num_parallel_calls=AUTOTUNE)
时,它抛出以下错误:
我的数据结构如下:
Train
- Class 1
- Class 2
- Class 3
- Class ....
- Class 80
Test
- Class 1
- Class 2
- Class 3
- Class ....
- Class 80
Traceback (most recent call last):
File "AIOS_Hypertune.py", line 90, in <module>
train_ds_map = train_data_gen.map(process_path, num_parallel_calls=AUTOTUNE)
File "/home/dragonos/anaconda3/envs/AIOS/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 1807, in map
return ParallelMapDataset(
File "/home/dragonos/anaconda3/envs/AIOS/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 4242, in __init__
self._map_func = StructuredFunctionWrapper(
File "/home/dragonos/anaconda3/envs/AIOS/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 3525, in __init__
self._function = wrapper_fn.get_concrete_function()
File "/home/dragonos/anaconda3/envs/AIOS/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3051, in get_concrete_function
graph_function = self._get_concrete_function_garbage_collected(
File "/home/dragonos/anaconda3/envs/AIOS/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3019, in _get_concrete_function_garbage_collected
graph_function, _ = self._maybe_define_function(args, kwargs)
File "/home/dragonos/anaconda3/envs/AIOS/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3361, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "/home/dragonos/anaconda3/envs/AIOS/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3196, in _create_graph_function
func_graph_module.func_graph_from_py_func(
File "/home/dragonos/anaconda3/envs/AIOS/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 990, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/home/dragonos/anaconda3/envs/AIOS/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 3518, in wrapper_fn
ret = _wrapper_helper(*args)
File "/home/dragonos/anaconda3/envs/AIOS/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 3453, in _wrapper_helper
ret = autograph.tf_convert(func, ag_ctx)(*nested_args)
File "/home/dragonos/anaconda3/envs/AIOS/lib/python3.8/site-packages/tensorflow/python/autograph/impl/api.py", line 670, in wrapper
raise e.ag_error_metadata.to_exception(e)
TypeError: in user code:
TypeError: tf__process_path() takes 1 positional argument but 2 were given
我该如何解决这个错误?我唯一能想到的是因为我的每张图片都有多个 类.
您需要像这样创建训练和测试拆分:
train_data_gen = tf.keras.preprocessing.image_dataset_from_directory(
train_dir, labels='inferred', label_mode='int',
class_names=None, color_mode='rgb', batch_size=batch_size, image_size=(IMG_HEIGHT,IMG_WIDTH), shuffle=True, seed=123,
validation_split=Val_Split, subset="training",
interpolation='bilinear', follow_links=False
)
test_data_gen = tf.keras.preprocessing.image_dataset_from_directory(
train_dir, labels='inferred', label_mode='int',
class_names=None, color_mode='rgb', batch_size=batch_size, image_size=(IMG_HEIGHT,IMG_WIDTH), shuffle=True, seed=123,
validation_split=Val_Split, subset="validation",
interpolation='bilinear', follow_links=False
)
.........
list_ds = tf.data.Dataset.list_files(str(train_dir_PATH/'*/*'), shuffle=False)
list_ds = list_ds.shuffle(image_count, reshuffle_each_iteration=False)
val_size = int(image_count * Val_Split)
train_data_gen = list_ds.skip(val_size)
test_data_gen = list_ds.take(val_size)
这对我有用。只需遵循本指南:https://www.tensorflow.org/tutorials/load_data/images
我正在尝试将一个充满图像的目录导入 Tensorflow,然后将其用于 Keras Tuner。问题是 Keras Tuner 需要将数据拆分为图像和标签。我正在按照 Tensorflow 网站上的指南进行操作,这是我目前拥有的代码:
注意:我使用的是 COCO 数据集,这意味着每个图像都有多个标签。也许这就是问题所在。
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as display
from PIL import Image, ImageSequence
import PIL
import os
import pathlib
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import cv2
import datetime
import kerastuner as kt
tf.compat.v1.enable_eager_execution()
tf.executing_eagerly()
epochs = 50
steps_per_epoch = 10
batch_size = 10
IMG_HEIGHT = 150
IMG_WIDTH = 150
train_dir = "Data/Train"
test_dir = "Data/Val"
train_data_gen = tf.keras.preprocessing.image_dataset_from_directory(
train_dir, labels='inferred', label_mode='int',
class_names=None, color_mode='rgb', batch_size=batch_size, image_size=(IMG_HEIGHT,
IMG_WIDTH), shuffle=True, seed=None,
validation_split=None, subset=None,
interpolation='bilinear', follow_links=False
)
test_data_gen = tf.keras.preprocessing.image_dataset_from_directory(
test_dir, labels='inferred', label_mode='int',
class_names=None, color_mode='rgb', batch_size=batch_size, image_size=(IMG_HEIGHT,
IMG_WIDTH), shuffle=True, seed=None,
validation_split=None, subset=None,
interpolation='bilinear', follow_links=False
)
normalization_layer = tf.keras.layers.experimental.preprocessing.Rescaling(1. / 255)
train_data_gen = train_data_gen.map(lambda x, y: (normalization_layer(x), y))
test_data_gen = test_data_gen.map(lambda x, y: (normalization_layer(x), y))
AUTOTUNE = tf.data.AUTOTUNE
train_data_gen = train_data_gen.cache().prefetch(buffer_size=AUTOTUNE)
test_data_gen = test_data_gen.cache().prefetch(buffer_size=AUTOTUNE)
train_dir_PATH = pathlib.Path(train_dir)
class_names = np.array(sorted([item.name for item in train_dir_PATH.glob('*') if item.name != "LICENSE.txt"]))
print(class_names)
def get_label(file_path):
# convert the path to a list of path components
parts = tf.strings.split(file_path, os.path.sep)
# The second to last is the class-directory
one_hot = parts[-2] == class_names
# Integer encode the label
return tf.argmax(one_hot)
def decode_img(img):
# convert the compressed string to a 3D uint8 tensor
img = tf.image.decode_jpeg(img, channels=3)
# resize the image to the desired size
return tf.image.resize(img, [IMG_HEIGHT, IMG_WIDTH])
def process_path(file_path):
label = get_label(file_path)
# load the raw data from the file as a string
img = tf.io.read_file(file_path)
img = decode_img(img)
return img, label
train_ds_map = train_data_gen.map(process_path, num_parallel_calls=AUTOTUNE)
val_ds_map = test_data_gen.map(process_path, num_parallel_calls=AUTOTUNE)
for image, label in train_data_gen.take(1):
print("Image shape: ", image.numpy().shape)
print("Label: ", label.numpy())
# train_image_generator = ImageDataGenerator(rescale=1. / 255)
#
# test_image_generator = ImageDataGenerator(rescale=1. / 255)
#
# train_data_gen = train_image_generator.flow_from_directory(batch_size=batch_size,
# directory=train_dir,
# shuffle=True,
# target_size=(IMG_HEIGHT, IMG_WIDTH),
# class_mode='sparse')
#
# test_data_gen = test_image_generator.flow_from_directory(batch_size=batch_size,
# directory=test_dir,
# shuffle=True,
# target_size=(IMG_HEIGHT, IMG_WIDTH),
# class_mode='sparse')
def configure_for_performance(ds):
ds = ds.cache()
ds = ds.shuffle(buffer_size=1000)
ds = ds.batch(batch_size)
ds = ds.prefetch(buffer_size=AUTOTUNE)
return ds
train_data_gen = configure_for_performance(train_data_gen)
test_data_gen = configure_for_performance(test_data_gen)
def model_builder(hp):
model = keras.Sequential()
model.add(Conv2D(265, 3, padding='same', activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)))
model.add(MaxPooling2D())
model.add(Conv2D(64, 3, padding='same', activation='relu'))
model.add(MaxPooling2D())
model.add(Conv2D(32, 3, padding='same', activation='relu'))
model.add(MaxPooling2D())
model.add(Flatten())
model.add(keras.layers.Dense(256, activation="relu"))
hp_units = hp.Int('units', min_value=32, max_value=512, step=32)
model.add(keras.layers.Dense(hp_units, activation="relu"))
model.add(keras.layers.Dense(80, activation="softmax"))
hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['sparse_top_k_categorical_accuracy'])
return model
tuner = kt.Hyperband(model_builder,
objective='top_k_categorical_accuracy',
max_epochs=30,
factor=3,
directory='Hypertuner_Dir',
project_name='AIOS')
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
for image, label in train_data_gen.take(1):
print("Image shape: ", image.numpy().shape)
print("Label: ", label.numpy())
tuner.search(image_batch_n, labels_batch_n, epochs=50, validation_split=0.2, callbacks=[stop_early])
# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"""
The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is {best_hps.get('units')} and the optimal learning rate for the optimizer
is {best_hps.get('learning_rate')}.
""")
model = tuner.hypermodel.build(best_hps)
# model.summary()
# tf.keras.utils.plot_model(model, to_file="model.png", show_shapes=True, show_layer_names=True, rankdir='TB')
checkpoint_path = "training/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
save_weights_only=True,
verbose=1)
os.system("rm -r logs")
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
history = model.fit(train_data_gen,steps_per_epoch=steps_per_epoch,epochs=epochs,validation_data=test_data_gen,validation_steps=10,callbacks=[cp_callback, tensorboard_callback])
#history = model.fit(train_ds, epochs=epochs,validation_split=0.2)
val_acc_per_epoch = history.history['top_k_categorical_accuracy']
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))
hypermodel = tuner.hypermodel.build(best_hps)
# Retrain the model
history = hypermodel.fit(train_data_gen,steps_per_epoch=steps_per_epoch,epochs=epochs,validation_data=test_data_gen,validation_steps=10,callbacks=[cp_callback, tensorboard_callback])
# model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
# model.save('model.h5', include_optimizer=True)
test_loss, test_acc = model.evaluate(test_data_gen)
print("Tested Acc: ", test_acc)
print("Tested Acc: ", test_acc*100, "%")
当我尝试 运行 train_ds_map = train_data_gen.map(process_path, num_parallel_calls=AUTOTUNE)
时,它抛出以下错误:
我的数据结构如下:
Train
- Class 1
- Class 2
- Class 3
- Class ....
- Class 80
Test
- Class 1
- Class 2
- Class 3
- Class ....
- Class 80
Traceback (most recent call last):
File "AIOS_Hypertune.py", line 90, in <module>
train_ds_map = train_data_gen.map(process_path, num_parallel_calls=AUTOTUNE)
File "/home/dragonos/anaconda3/envs/AIOS/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 1807, in map
return ParallelMapDataset(
File "/home/dragonos/anaconda3/envs/AIOS/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 4242, in __init__
self._map_func = StructuredFunctionWrapper(
File "/home/dragonos/anaconda3/envs/AIOS/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 3525, in __init__
self._function = wrapper_fn.get_concrete_function()
File "/home/dragonos/anaconda3/envs/AIOS/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3051, in get_concrete_function
graph_function = self._get_concrete_function_garbage_collected(
File "/home/dragonos/anaconda3/envs/AIOS/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3019, in _get_concrete_function_garbage_collected
graph_function, _ = self._maybe_define_function(args, kwargs)
File "/home/dragonos/anaconda3/envs/AIOS/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3361, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "/home/dragonos/anaconda3/envs/AIOS/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3196, in _create_graph_function
func_graph_module.func_graph_from_py_func(
File "/home/dragonos/anaconda3/envs/AIOS/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 990, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/home/dragonos/anaconda3/envs/AIOS/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 3518, in wrapper_fn
ret = _wrapper_helper(*args)
File "/home/dragonos/anaconda3/envs/AIOS/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 3453, in _wrapper_helper
ret = autograph.tf_convert(func, ag_ctx)(*nested_args)
File "/home/dragonos/anaconda3/envs/AIOS/lib/python3.8/site-packages/tensorflow/python/autograph/impl/api.py", line 670, in wrapper
raise e.ag_error_metadata.to_exception(e)
TypeError: in user code:
TypeError: tf__process_path() takes 1 positional argument but 2 were given
我该如何解决这个错误?我唯一能想到的是因为我的每张图片都有多个 类.
您需要像这样创建训练和测试拆分:
train_data_gen = tf.keras.preprocessing.image_dataset_from_directory(
train_dir, labels='inferred', label_mode='int',
class_names=None, color_mode='rgb', batch_size=batch_size, image_size=(IMG_HEIGHT,IMG_WIDTH), shuffle=True, seed=123,
validation_split=Val_Split, subset="training",
interpolation='bilinear', follow_links=False
)
test_data_gen = tf.keras.preprocessing.image_dataset_from_directory(
train_dir, labels='inferred', label_mode='int',
class_names=None, color_mode='rgb', batch_size=batch_size, image_size=(IMG_HEIGHT,IMG_WIDTH), shuffle=True, seed=123,
validation_split=Val_Split, subset="validation",
interpolation='bilinear', follow_links=False
)
.........
list_ds = tf.data.Dataset.list_files(str(train_dir_PATH/'*/*'), shuffle=False)
list_ds = list_ds.shuffle(image_count, reshuffle_each_iteration=False)
val_size = int(image_count * Val_Split)
train_data_gen = list_ds.skip(val_size)
test_data_gen = list_ds.take(val_size)
这对我有用。只需遵循本指南:https://www.tensorflow.org/tutorials/load_data/images