ValueError: no SavedModel bundles found! when trying to deploy a TF2.0 model to SageMaker
ValueError: no SavedModel bundles found! when trying to deploy a TF2.0 model to SageMaker
我正在尝试将 TF2.0 模型部署到 SageMaker。到目前为止,我设法训练了模型并将其保存到 S3 存储桶中,但是当我调用 .deploy()
方法时,我从 cloud Watch
收到以下错误
ValueError: no SavedModel bundles found!
这是我的训练脚本:
### Code to add in a tensorflow_estimator.py file
import argparse
import os
import pathlib
import tensorflow as tf
if __name__ == '__main__':
parser = argparse.ArgumentParser()
# hyperparameters sent by the client are passed as command-line arguments to the script.
parser.add_argument('--epochs', type=int, default=10)
parser.add_argument('--batch_size', type=int, default=100)
parser.add_argument('--learning_rate', type=float, default=0.1)
# Data, model, and output directories
parser.add_argument('--output-data-dir', type=str, default=os.environ.get('SM_OUTPUT_DATA_DIR'))
parser.add_argument('--model-dir', type=str, default=os.environ.get('SM_MODEL_DIR'))
parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAIN'))
parser.add_argument('--test', type=str, default=os.environ.get('SM_CHANNEL_TEST'))
args, _ = parser.parse_known_args()
print("##### ARGS ##### \n{}".format(args))
# Get files
path = pathlib.Path(args.train)
# Print out folder content
for item in path.iterdir():
print("##### DIRECTORIES ##### \n{}".format(item))
# Get all images
all_images = list(path.glob("*/*"))
all_image_paths = [str(path) for path in list(path.glob("*/*"))]
# Transform images into tensors
def preprocess_and_load_images(path):
image = tf.io.read_file(path)
image = tf.image.decode_jpeg(image, channels=3)
image = tf.image.resize(image, [192, 192])
return image
# Apply preprocessing function
ds_paths = tf.data.Dataset.from_tensor_slices(all_image_paths)
ds_images = ds_paths.map(preprocess_and_load_images)
# Map Labels
labels = []
for data in path.iterdir():
if data.is_dir():
labels += [data.name]
labels_index = {}
for i,label in enumerate(labels):
labels_index[label]=i
print("##### Label Index ##### \n{}".format(labels_index))
all_image_labels = [labels_index[path.parent.name] for path in list(path.glob("*/*"))]
# Create a tf Dataset
labels_ds = tf.data.Dataset.from_tensor_slices(all_image_labels)
# Zip train and labeled dataset
full_ds = tf.data.Dataset.zip((ds_images, labels_ds))
# Shuffle Dataset and batch it
full_ds = full_ds.shuffle(len(all_images)).batch(args.batch_size)
# Create a pre-trained model
base_model = tf.keras.applications.InceptionV3(input_shape=(192,192,3),
include_top=False,
weights = "imagenet"
)
base_model.trainable = False
model = tf.keras.Sequential([
base_model,
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Dense(len(labels), activation="softmax")
])
initial_learning_rate = args.learning_rate
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
initial_learning_rate,
decay_steps=1000,
decay_rate=0.96,
staircase=True)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate = lr_schedule),
loss = tf.keras.losses.SparseCategoricalCrossentropy(),
metrics = [tf.keras.metrics.SparseCategoricalAccuracy()])
# Train the model
model.fit(full_ds, epochs=args.epochs)
# Save the model
model.save(os.path.join(args.model_dir, "tf_model"), save_format="tf")
def model_fn(model_dir):
classifier = tf.keras.models.load_model(os.path.join(model_dir, "tf_model"))
return classifier
这是我写入 Colab 的代码
from sagemaker.tensorflow import TensorFlow
tf_estimator = TensorFlow(entry_point='tensorflow_estimator.py',
role=role,
train_instance_count=1,
train_instance_type='ml.m5.large',
framework_version='2.0.0',
sagemaker_session=sagemaker_session,
output_path=s3_output_location,
hyperparameters={'epochs': 1,
'batch_size': 30,
'learning_rate': 0.001},
py_version='py3')
tf_estimator.fit({"train":train_data})
from sagemaker.tensorflow.serving import Model
model = Model(model_data='s3://path/to/model.tar.gz',
role=role,
framework_version="2.0.0",
sagemaker_session=sagemaker_session)
predictor = model.deploy(initial_instance_count=1, instance_type='ml.m5.large')
我已经尝试查看 但实际上我的 tar.gz 文件中没有版本问题,因为结构如下:
├── assets
├── saved_model.pb
└── variables
├── variables.data-00000-of-00001
└── variables.index
我觉得我在训练脚本中定义 model_fn()
时可能是错误的,但绝对不知道用什么来替换它。你们有什么想法吗?
非常感谢您的帮助!
我实际上尝试将我的训练脚本修改为以下内容:
### Code to add in a tensorflow_estimator.py file
import argparse
import os
import pathlib
import tensorflow as tf
if __name__ == '__main__':
parser = argparse.ArgumentParser()
# hyperparameters sent by the client are passed as command-line arguments to the script.
parser.add_argument('--epochs', type=int, default=10)
parser.add_argument('--batch_size', type=int, default=100)
parser.add_argument('--learning_rate', type=float, default=0.1)
# Data, model, and output directories
parser.add_argument('--output-data-dir', type=str, default=os.environ.get('SM_OUTPUT_DATA_DIR'))
parser.add_argument('--model-dir', type=str, default=os.environ.get('SM_MODEL_DIR'))
parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAIN'))
parser.add_argument('--test', type=str, default=os.environ.get('SM_CHANNEL_TEST'))
args, _ = parser.parse_known_args()
print("##### ARGS ##### \n{}".format(args))
# Get files
path = pathlib.Path(args.train)
# Print out folder content
for item in path.iterdir():
print("##### DIRECTORIES ##### \n{}".format(item))
# Get all images
all_images = list(path.glob("*/*"))
all_image_paths = [str(path) for path in list(path.glob("*/*"))]
# Transform images into tensors
def preprocess_and_load_images(path):
image = tf.io.read_file(path)
image = tf.image.decode_jpeg(image, channels=3)
image = tf.image.resize(image, [192, 192])
return image
# Apply preprocessing function
ds_paths = tf.data.Dataset.from_tensor_slices(all_image_paths)
ds_images = ds_paths.map(preprocess_and_load_images)
# Map Labels
labels = []
for data in path.iterdir():
if data.is_dir():
labels += [data.name]
labels_index = {}
for i,label in enumerate(labels):
labels_index[label]=i
print("##### Label Index ##### \n{}".format(labels_index))
all_image_labels = [labels_index[path.parent.name] for path in list(path.glob("*/*"))]
# Create a tf Dataset
labels_ds = tf.data.Dataset.from_tensor_slices(all_image_labels)
# Zip train and labeled dataset
full_ds = tf.data.Dataset.zip((ds_images, labels_ds))
# Shuffle Dataset and batch it
full_ds = full_ds.shuffle(len(all_images)).batch(args.batch_size)
# Create a pre-trained model
base_model = tf.keras.applications.InceptionV3(input_shape=(192,192,3),
include_top=False,
weights = "imagenet"
)
base_model.trainable = False
model = tf.keras.Sequential([
base_model,
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Dense(len(labels), activation="softmax")
])
initial_learning_rate = args.learning_rate
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
initial_learning_rate,
decay_steps=1000,
decay_rate=0.96,
staircase=True)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate = lr_schedule),
loss = tf.keras.losses.SparseCategoricalCrossentropy(),
metrics = [tf.keras.metrics.SparseCategoricalAccuracy()])
# Train the model
model.fit(full_ds, epochs=args.epochs)
# Save the model
model.save(os.path.join(args.model_dir, "tensorflow_model/1"), save_format="tf")
看来为您的文件夹取一个数字名称很重要:
# Save the model
model.save(os.path.join(args.model_dir, "tensorflow_model/1"), save_format="tf")
我正在尝试将 TF2.0 模型部署到 SageMaker。到目前为止,我设法训练了模型并将其保存到 S3 存储桶中,但是当我调用 .deploy()
方法时,我从 cloud Watch
ValueError: no SavedModel bundles found!
这是我的训练脚本:
### Code to add in a tensorflow_estimator.py file
import argparse
import os
import pathlib
import tensorflow as tf
if __name__ == '__main__':
parser = argparse.ArgumentParser()
# hyperparameters sent by the client are passed as command-line arguments to the script.
parser.add_argument('--epochs', type=int, default=10)
parser.add_argument('--batch_size', type=int, default=100)
parser.add_argument('--learning_rate', type=float, default=0.1)
# Data, model, and output directories
parser.add_argument('--output-data-dir', type=str, default=os.environ.get('SM_OUTPUT_DATA_DIR'))
parser.add_argument('--model-dir', type=str, default=os.environ.get('SM_MODEL_DIR'))
parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAIN'))
parser.add_argument('--test', type=str, default=os.environ.get('SM_CHANNEL_TEST'))
args, _ = parser.parse_known_args()
print("##### ARGS ##### \n{}".format(args))
# Get files
path = pathlib.Path(args.train)
# Print out folder content
for item in path.iterdir():
print("##### DIRECTORIES ##### \n{}".format(item))
# Get all images
all_images = list(path.glob("*/*"))
all_image_paths = [str(path) for path in list(path.glob("*/*"))]
# Transform images into tensors
def preprocess_and_load_images(path):
image = tf.io.read_file(path)
image = tf.image.decode_jpeg(image, channels=3)
image = tf.image.resize(image, [192, 192])
return image
# Apply preprocessing function
ds_paths = tf.data.Dataset.from_tensor_slices(all_image_paths)
ds_images = ds_paths.map(preprocess_and_load_images)
# Map Labels
labels = []
for data in path.iterdir():
if data.is_dir():
labels += [data.name]
labels_index = {}
for i,label in enumerate(labels):
labels_index[label]=i
print("##### Label Index ##### \n{}".format(labels_index))
all_image_labels = [labels_index[path.parent.name] for path in list(path.glob("*/*"))]
# Create a tf Dataset
labels_ds = tf.data.Dataset.from_tensor_slices(all_image_labels)
# Zip train and labeled dataset
full_ds = tf.data.Dataset.zip((ds_images, labels_ds))
# Shuffle Dataset and batch it
full_ds = full_ds.shuffle(len(all_images)).batch(args.batch_size)
# Create a pre-trained model
base_model = tf.keras.applications.InceptionV3(input_shape=(192,192,3),
include_top=False,
weights = "imagenet"
)
base_model.trainable = False
model = tf.keras.Sequential([
base_model,
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Dense(len(labels), activation="softmax")
])
initial_learning_rate = args.learning_rate
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
initial_learning_rate,
decay_steps=1000,
decay_rate=0.96,
staircase=True)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate = lr_schedule),
loss = tf.keras.losses.SparseCategoricalCrossentropy(),
metrics = [tf.keras.metrics.SparseCategoricalAccuracy()])
# Train the model
model.fit(full_ds, epochs=args.epochs)
# Save the model
model.save(os.path.join(args.model_dir, "tf_model"), save_format="tf")
def model_fn(model_dir):
classifier = tf.keras.models.load_model(os.path.join(model_dir, "tf_model"))
return classifier
这是我写入 Colab 的代码
from sagemaker.tensorflow import TensorFlow
tf_estimator = TensorFlow(entry_point='tensorflow_estimator.py',
role=role,
train_instance_count=1,
train_instance_type='ml.m5.large',
framework_version='2.0.0',
sagemaker_session=sagemaker_session,
output_path=s3_output_location,
hyperparameters={'epochs': 1,
'batch_size': 30,
'learning_rate': 0.001},
py_version='py3')
tf_estimator.fit({"train":train_data})
from sagemaker.tensorflow.serving import Model
model = Model(model_data='s3://path/to/model.tar.gz',
role=role,
framework_version="2.0.0",
sagemaker_session=sagemaker_session)
predictor = model.deploy(initial_instance_count=1, instance_type='ml.m5.large')
我已经尝试查看
├── assets
├── saved_model.pb
└── variables
├── variables.data-00000-of-00001
└── variables.index
我觉得我在训练脚本中定义 model_fn()
时可能是错误的,但绝对不知道用什么来替换它。你们有什么想法吗?
非常感谢您的帮助!
我实际上尝试将我的训练脚本修改为以下内容:
### Code to add in a tensorflow_estimator.py file
import argparse
import os
import pathlib
import tensorflow as tf
if __name__ == '__main__':
parser = argparse.ArgumentParser()
# hyperparameters sent by the client are passed as command-line arguments to the script.
parser.add_argument('--epochs', type=int, default=10)
parser.add_argument('--batch_size', type=int, default=100)
parser.add_argument('--learning_rate', type=float, default=0.1)
# Data, model, and output directories
parser.add_argument('--output-data-dir', type=str, default=os.environ.get('SM_OUTPUT_DATA_DIR'))
parser.add_argument('--model-dir', type=str, default=os.environ.get('SM_MODEL_DIR'))
parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAIN'))
parser.add_argument('--test', type=str, default=os.environ.get('SM_CHANNEL_TEST'))
args, _ = parser.parse_known_args()
print("##### ARGS ##### \n{}".format(args))
# Get files
path = pathlib.Path(args.train)
# Print out folder content
for item in path.iterdir():
print("##### DIRECTORIES ##### \n{}".format(item))
# Get all images
all_images = list(path.glob("*/*"))
all_image_paths = [str(path) for path in list(path.glob("*/*"))]
# Transform images into tensors
def preprocess_and_load_images(path):
image = tf.io.read_file(path)
image = tf.image.decode_jpeg(image, channels=3)
image = tf.image.resize(image, [192, 192])
return image
# Apply preprocessing function
ds_paths = tf.data.Dataset.from_tensor_slices(all_image_paths)
ds_images = ds_paths.map(preprocess_and_load_images)
# Map Labels
labels = []
for data in path.iterdir():
if data.is_dir():
labels += [data.name]
labels_index = {}
for i,label in enumerate(labels):
labels_index[label]=i
print("##### Label Index ##### \n{}".format(labels_index))
all_image_labels = [labels_index[path.parent.name] for path in list(path.glob("*/*"))]
# Create a tf Dataset
labels_ds = tf.data.Dataset.from_tensor_slices(all_image_labels)
# Zip train and labeled dataset
full_ds = tf.data.Dataset.zip((ds_images, labels_ds))
# Shuffle Dataset and batch it
full_ds = full_ds.shuffle(len(all_images)).batch(args.batch_size)
# Create a pre-trained model
base_model = tf.keras.applications.InceptionV3(input_shape=(192,192,3),
include_top=False,
weights = "imagenet"
)
base_model.trainable = False
model = tf.keras.Sequential([
base_model,
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Dense(len(labels), activation="softmax")
])
initial_learning_rate = args.learning_rate
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
initial_learning_rate,
decay_steps=1000,
decay_rate=0.96,
staircase=True)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate = lr_schedule),
loss = tf.keras.losses.SparseCategoricalCrossentropy(),
metrics = [tf.keras.metrics.SparseCategoricalAccuracy()])
# Train the model
model.fit(full_ds, epochs=args.epochs)
# Save the model
model.save(os.path.join(args.model_dir, "tensorflow_model/1"), save_format="tf")
看来为您的文件夹取一个数字名称很重要:
# Save the model
model.save(os.path.join(args.model_dir, "tensorflow_model/1"), save_format="tf")