将 Albert 转换为 tflite(Albert 通过 bert-for-tf2 在 Keras 中实现)
Converting Albert to tflite (Albert implemented in Keras via bert-for-tf2)
我很难将 albert(更具体地说,albert_base 模型)转换为 tflite。这是我使用 bert-for-tf2 (https://github.com/kpe/bert-for-tf2) 定义我的模型的代码 <- 顺便感谢这个出色的实现...
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import Input, Flatten, AveragePooling1D
from tensorflow.keras.models import Model
import bert
import sentencepiece as spm
def load_pretrained_albert():
model_name = "albert_base"
albert_dir = bert.fetch_tfhub_albert_model(model_name, ".models")
model_params = bert.albert_params(model_name)
l_bert = bert.BertModelLayer.from_params(model_params, name="albert")
# use in Keras Model here, and call model.build()
max_seq_len = 128
l_input_ids = Input(shape=(max_seq_len,), dtype='float32', name="l_input_ids")
output = l_bert(l_input_ids)
pooled_output = AveragePooling1D(pool_size=max_seq_len, data_format="channels_last")(output)
pooled_output = Flatten()(pooled_output) # poooled_output: [batch_size, embedding_dimension=768]
model = Model(inputs=[l_input_ids], outputs=[pooled_output])
model.build(input_shape=(None, max_seq_len))
bert.load_albert_weights(l_bert, albert_dir)
return model
但是当我尝试使用以下代码将模型转换为 tflite 时,
converter = tf.lite.TFLiteConverter.from_keras_model(m)
tflite_model = converter.convert()
发生以下错误:
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\lite\python\lite.py", line 405, in convert
self._funcs[0], lower_control_flow=False)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\framework\convert_to_constants.py", line 575, in convert_variables_to_constants_v2
converted_input_indices)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\framework\convert_to_constants.py", line 371, in _construct_concrete_function
new_output_names)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\eager\wrap_function.py", line 620, in function_from_graph_def
wrapped_import = wrap_function(_imports_graph_def, [])
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\eager\wrap_function.py", line 598, in wrap_function
collections={}),
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\framework\func_graph.py", line 915, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\eager\wrap_function.py", line 83, in __call__
return self.call_with_variable_creator_scope(self._fn)(*args, **kwargs)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\eager\wrap_function.py", line 89, in wrapped
return fn(*args, **kwargs)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\eager\wrap_function.py", line 618, in _imports_graph_def
importer.import_graph_def(graph_def, name="")
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\util\deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\framework\importer.py", line 405, in import_graph_def
producer_op_list=producer_op_list)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\framework\importer.py", line 505, in _import_graph_def_internal
raise ValueError(str(e))
ValueError: Input 0 of node model/albert/embeddings/word_embeddings/embedding_lookup was passed float from model/albert/embeddings/word_embeddings/embedding_lookup/Read/ReadVariableOp/resource:0 incompatible with expected resource.
因此,我尝试将模型保存为 saved_model 格式并尝试使用以下代码进行转换:
converter = tf.lite.TFLiteConverter.from_saved_model('saved_model_path')
tflite_model = converter.convert()
但是,再次出现相同的错误消息。
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\lite\python\lite.py", line 405, in convert
self._funcs[0], lower_control_flow=False)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\framework\convert_to_constants.py", line 575, in convert_variables_to_constants_v2
converted_input_indices)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\framework\convert_to_constants.py", line 371, in _construct_concrete_function
new_output_names)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\eager\wrap_function.py", line 620, in function_from_graph_def
wrapped_import = wrap_function(_imports_graph_def, [])
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\eager\wrap_function.py", line 598, in wrap_function
collections={}),
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\framework\func_graph.py", line 915, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\eager\wrap_function.py", line 83, in __call__
return self.call_with_variable_creator_scope(self._fn)(*args, **kwargs)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\eager\wrap_function.py", line 89, in wrapped
return fn(*args, **kwargs)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\eager\wrap_function.py", line 618, in _imports_graph_def
importer.import_graph_def(graph_def, name="")
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\util\deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\framework\importer.py", line 405, in import_graph_def
producer_op_list=producer_op_list)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\framework\importer.py", line 505, in _import_graph_def_internal
raise ValueError(str(e))
ValueError: Input 0 of node StatefulPartitionedCall/model/albert/embeddings/word_embeddings/embedding_lookup was passed float from Func/StatefulPartitionedCall/input/_2:0 incompatible with expected resource.
所以我的理解是,当预期的数据类型不是浮点数时,embedding_lookup 是用浮点数输入的。但是预期的数据类型是什么?有什么办法可以查到吗?另外,这个问题有解决方法吗?
对于我将 albert_base 转换为 tflite formtat 的努力,我们将不胜感激!
有趣的是,我已经为这个问题苦苦挣扎了几个小时,但在我上传问题后我就解决了问题...
所以解决方法是,使用tensorflow 1.15.0版本!
使用 tensorflow2 似乎会导致问题。
但是,我仍然无法将模型转换为 tflite,因为它还不支持 'IdentityN' 操作。我不认为我可以自己编写自定义操作,所以我认为我应该等待 tflite 更新....
关于"IdentityN "的错误,您尝试过用SELECT_TF_OPS转换吗?
https://www.tensorflow.org/lite/guide/ops_select
使用官方存储库中的 ALBERT 2.0 ( tf 2.0 ) 模型。将 https://github.com/google-research/ALBERT/blob/master/modeling.py#L516 更改为 tf.gather(tf.identity(embedding_table), input_ids)
。然后像以前一样尝试使用 tflite 进行转换。如果没有,请在这里评论。
我很难将 albert(更具体地说,albert_base 模型)转换为 tflite。这是我使用 bert-for-tf2 (https://github.com/kpe/bert-for-tf2) 定义我的模型的代码 <- 顺便感谢这个出色的实现...
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import Input, Flatten, AveragePooling1D
from tensorflow.keras.models import Model
import bert
import sentencepiece as spm
def load_pretrained_albert():
model_name = "albert_base"
albert_dir = bert.fetch_tfhub_albert_model(model_name, ".models")
model_params = bert.albert_params(model_name)
l_bert = bert.BertModelLayer.from_params(model_params, name="albert")
# use in Keras Model here, and call model.build()
max_seq_len = 128
l_input_ids = Input(shape=(max_seq_len,), dtype='float32', name="l_input_ids")
output = l_bert(l_input_ids)
pooled_output = AveragePooling1D(pool_size=max_seq_len, data_format="channels_last")(output)
pooled_output = Flatten()(pooled_output) # poooled_output: [batch_size, embedding_dimension=768]
model = Model(inputs=[l_input_ids], outputs=[pooled_output])
model.build(input_shape=(None, max_seq_len))
bert.load_albert_weights(l_bert, albert_dir)
return model
但是当我尝试使用以下代码将模型转换为 tflite 时,
converter = tf.lite.TFLiteConverter.from_keras_model(m)
tflite_model = converter.convert()
发生以下错误:
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\lite\python\lite.py", line 405, in convert
self._funcs[0], lower_control_flow=False)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\framework\convert_to_constants.py", line 575, in convert_variables_to_constants_v2
converted_input_indices)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\framework\convert_to_constants.py", line 371, in _construct_concrete_function
new_output_names)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\eager\wrap_function.py", line 620, in function_from_graph_def
wrapped_import = wrap_function(_imports_graph_def, [])
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\eager\wrap_function.py", line 598, in wrap_function
collections={}),
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\framework\func_graph.py", line 915, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\eager\wrap_function.py", line 83, in __call__
return self.call_with_variable_creator_scope(self._fn)(*args, **kwargs)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\eager\wrap_function.py", line 89, in wrapped
return fn(*args, **kwargs)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\eager\wrap_function.py", line 618, in _imports_graph_def
importer.import_graph_def(graph_def, name="")
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\util\deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\framework\importer.py", line 405, in import_graph_def
producer_op_list=producer_op_list)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\framework\importer.py", line 505, in _import_graph_def_internal
raise ValueError(str(e))
ValueError: Input 0 of node model/albert/embeddings/word_embeddings/embedding_lookup was passed float from model/albert/embeddings/word_embeddings/embedding_lookup/Read/ReadVariableOp/resource:0 incompatible with expected resource.
因此,我尝试将模型保存为 saved_model 格式并尝试使用以下代码进行转换:
converter = tf.lite.TFLiteConverter.from_saved_model('saved_model_path')
tflite_model = converter.convert()
但是,再次出现相同的错误消息。
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\lite\python\lite.py", line 405, in convert
self._funcs[0], lower_control_flow=False)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\framework\convert_to_constants.py", line 575, in convert_variables_to_constants_v2
converted_input_indices)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\framework\convert_to_constants.py", line 371, in _construct_concrete_function
new_output_names)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\eager\wrap_function.py", line 620, in function_from_graph_def
wrapped_import = wrap_function(_imports_graph_def, [])
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\eager\wrap_function.py", line 598, in wrap_function
collections={}),
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\framework\func_graph.py", line 915, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\eager\wrap_function.py", line 83, in __call__
return self.call_with_variable_creator_scope(self._fn)(*args, **kwargs)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\eager\wrap_function.py", line 89, in wrapped
return fn(*args, **kwargs)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\eager\wrap_function.py", line 618, in _imports_graph_def
importer.import_graph_def(graph_def, name="")
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\util\deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\framework\importer.py", line 405, in import_graph_def
producer_op_list=producer_op_list)
File "C:\Users\hygki\Anaconda3\lib\site-packages\tensorflow_core\python\framework\importer.py", line 505, in _import_graph_def_internal
raise ValueError(str(e))
ValueError: Input 0 of node StatefulPartitionedCall/model/albert/embeddings/word_embeddings/embedding_lookup was passed float from Func/StatefulPartitionedCall/input/_2:0 incompatible with expected resource.
所以我的理解是,当预期的数据类型不是浮点数时,embedding_lookup 是用浮点数输入的。但是预期的数据类型是什么?有什么办法可以查到吗?另外,这个问题有解决方法吗?
对于我将 albert_base 转换为 tflite formtat 的努力,我们将不胜感激!
有趣的是,我已经为这个问题苦苦挣扎了几个小时,但在我上传问题后我就解决了问题...
所以解决方法是,使用tensorflow 1.15.0版本! 使用 tensorflow2 似乎会导致问题。
但是,我仍然无法将模型转换为 tflite,因为它还不支持 'IdentityN' 操作。我不认为我可以自己编写自定义操作,所以我认为我应该等待 tflite 更新....
关于"IdentityN "的错误,您尝试过用SELECT_TF_OPS转换吗? https://www.tensorflow.org/lite/guide/ops_select
使用官方存储库中的 ALBERT 2.0 ( tf 2.0 ) 模型。将 https://github.com/google-research/ALBERT/blob/master/modeling.py#L516 更改为 tf.gather(tf.identity(embedding_table), input_ids)
。然后像以前一样尝试使用 tflite 进行转换。如果没有,请在这里评论。