AssertionError: Tried to export a function which references untracked resource

Question

我注意到在训练期间我无法（再）这样做后，我写了一个单元测试来保护模型。

@pytest.mark.usefixtures("maybe_run_functions_eagerly")
def test_save_model(speech_model: Tuple[TransducerBase, SpeechFeaturesConfig]):
    model, speech_features_config = speech_model
    speech_features_config: SpeechFeaturesConfig
    channels = 3 if speech_features_config.add_delta_deltas else 1
    num_mel_bins = speech_features_config.num_mel_bins
    enc_inputs = np.random.rand(1, 50, num_mel_bins, channels)
    dec_inputs = np.expand_dims(np.random.randint(0, 25, size=10), axis=1)
    inputs = enc_inputs, dec_inputs
    model(inputs)

    # Throws KeyError:
    # graph = tf.compat.v1.get_default_graph()
    # tensor = graph.get_tensor_by_name("77040:0")

    directory = tempfile.mkdtemp(prefix=f"{model.__class__.__name__}_")
    try:
        model.save(directory)
    finally:
        shutil.rmtree(directory)

尝试保存模型总是会抛出以下错误：

E         AssertionError: Tried to export a function which references untracked resource Tensor("77040:0", shape=(), dtype=resource). TensorFlow objects (e.g. tf.Variable) captured by functions must be tracked by assigning them to an attribute of a tracked object or assigned to an attribute of the main object directly.
E         
E         Trackable Python objects referring to this tensor (from gc.get_referrers, limited to two hops):
E         <tf.Variable 'transformer_transducer/transducer_encoder/inputs_embedding/convolution_stack/conv2d/kernel:0' shape=(3, 3, 3, 32) dtype=float32>

Note: As you can see in the code above, but I am not able to retrieve this tensor with tf.compat.v1.get_default_graph().get_tensor_by_name("77040:0").

I tried the following too, but the result is always empty:
model(batch)  # Build the model

tensor_name = "77040"

var_names = [var.name for var in model.trainable_weights]
weights = list(filter(lambda var: tensor_name in var, var_names))

var_names = [var.name for var in model.trainable_variables]
variables = list(filter(lambda var: tensor_name in var, var_names))

print(weights)
print(variables)

问题是我不明白为什么我会得到这个，因为受影响的层是由 Keras 跟踪，如下面的屏幕截图所示。我在 call() 函数的调试会话期间使用了它。

我对此没有任何解释，我运行不知道这里可能有什么问题。

屏幕截图中的 transformations 列表是属性层 InputsEmbedding 的构建层，如下所示：

class InputsEmbedding(layers.Layer, TimeReduction):
    def __init__(self, config: InputsEmbeddingConfig, **kwargs):
        super().__init__(**kwargs)

        if config.transformations is None or not len(config.transformations):
            raise RuntimeError("No transformations provided.")

        self.config = config

        self.transformations = list()
        for transformation in self.config.transformations:
            layer_name, layer_params = list(transformation.items())[0]
            layer = _get_layer(layer_name, layer_params)
            self.transformations.append(layer)

        self.init_time_reduction_layer()

    def get_config(self):
        return self.config.dict()


def _get_layer(name: str, params: dict) -> layers.Layer:
    if name == "conv2d_stack":
        return ConvolutionStack(**params)
    elif name == "stack_frames":
        return StackFrames(**params)
    else:
        raise RuntimeError(f"Unsupported or unknown time-reduction layer {name}")

为了验证问题不是 InputsEmbedding，我创建了一个单元文本来保存仅使用该特定层的模型。

@pytest.mark.usefixtures("maybe_run_functions_eagerly")
def test_inputs_embedding_save_model():
    convolutions = [
        "filters=2, kernel_size=(3, 3), strides=(2, 1)",
        "filters=4, kernel_size=(3, 3), strides=(2, 1)",
        "filters=8, kernel_size=(3, 4), strides=(1, 1)",
    ]

    config = InputsEmbeddingConfig()
    config.transformations = [dict(conv2d_stack=dict(convolutions=convolutions)), dict(stack_frames=dict(n=2))]

    num_features = 8
    num_channels = 3

    inputs = layers.Input(shape=(None, num_features, num_channels))
    x = inputs
    x, _ = InputsEmbedding(config)(x)
    model = keras.Model(inputs=inputs, outputs=x)
    model.build(input_shape=(1, 20, num_features, num_channels))

    directory = tempfile.mkdtemp(prefix=f"{model.__class__.__name__}_")
    try:
        model.save(directory)
    finally:
        shutil.rmtree(directory)

在这里我可以毫无问题地保存这一层：

`ConvolutionStack`

因为它似乎是相关的，这里是 ConvolutionStack 的（相当丑陋的）实现：

from typing import List

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.python.keras.layers import convolutional

from speech.lab.layers import InputsRequirements
from speech.lab.models import conv_util, models_util


class ConvolutionStack(layers.Layer):
    def __init__(
        self,
        convolutions: List[str],
        kernel_regularizer: dict = None,
        bias_regularizer: dict = None,
        **kwargs
    ):
        super().__init__(**kwargs)
        self.config = dict(
            convolutions=convolutions,
            kernel_regularizer=kernel_regularizer,
            bias_regularizer=bias_regularizer
        )
        self.conv_stack_config = [eval(f"dict({convolution})") for convolution in convolutions]
        self.conv_blocks = list()

        if kernel_regularizer is not None:
            kernel_regularizer = models_util.maybe_to_regularizer(kernel_regularizer)
        if bias_regularizer is not None:
            bias_regularizer = models_util.maybe_to_regularizer(bias_regularizer)

        for block_config in self.conv_stack_config:
            block = _new_convolution_block(
                **block_config,
                kernel_regularizer=kernel_regularizer,
                bias_regularizer=bias_regularizer,
            )
            self.conv_blocks.append(block)

        self.drop_dim2 = layers.Lambda(tf.squeeze, arguments=dict(axis=-2))
        self.expand_last = layers.Lambda(tf.expand_dims, arguments=dict(axis=-1))

    @property
    def inputs_requirements(self) -> InputsRequirements:
        requirements, frame_look_back = conv_util.get_conv2d_stack_requirements(self.conv_stack_config)
        first = requirements[0]
        t_min, f_size = first["min_size"]
        t_grow, f_grow = first["grow_size"]
        return InputsRequirements(
            frame_look_back=frame_look_back,
            t_min=t_min,
            t_grow=t_grow,
            f_min=f_size,
            f_grow=f_grow,
        )

    def call(self, inputs, training=None, mask=None, **kwargs):
        """
        :param inputs:
            Tensor taking the form [batch, time, freq, channel]
        :param training:
        :param mask:
        :param kwargs:
        :return:
            Tensor taking the form [batch, time, freq, 1]
        """

        if training:
            t_min = self.inputs_requirements.t_min
            t_grow = self.inputs_requirements.t_grow
            pad = conv_util.get_padding_for_loss(tf.shape(inputs)[1], t_min=t_min, t_grow=t_grow)
            inputs = tf.pad(inputs, ((0, 0), (0, pad), (0, 0), (0, 0)))

            if mask is not None:
                mask = tf.pad(mask, ((0, 0), (0, pad)))

        f_min = self.inputs_requirements.f_min
        f_grow = self.inputs_requirements.f_grow
        assert (inputs.shape[2] - f_min) % f_grow == 0, (
            f'Inputs dimension "freq" ' f"expected to be {f_min} + n * {f_grow}  but got {inputs.shape[2]} instead."
        )

        x = inputs
        for block in self.conv_blocks:

            for layer in block:

                if mask is not None and isinstance(layer, convolutional.Conv):
                    st, _ = layer.strides
                    kt = tf.maximum(layer.kernel_size[0] - 1, 1)
                    mask = mask[:, :-kt][:, ::st]
                    mask = tf.pad(mask, ((0, 0), (0, tf.maximum(2 - layer.kernel_size[0], 0))))

                x = layer(x, training=training)

        return self.expand_last(self.drop_dim2(x)), mask

    def get_config(self):
        return self.config


def _new_convolution_block(
    filters: int,
    kernel_size: tuple,
    strides: tuple,
    use_bias: bool = False,
    use_norm: bool = True,
    kernel_regularizer=None,
    bias_regularizer=None,
    activation=None,
):
    assert strides[0] % 2 == 0 or strides[0] == 1, "Strides on the time axis must be divisible by 2 or be exactly 1."

    if activation is not None:
        activation_layer = layers.Activation(activation)
    else:
        activation_layer = layers.Lambda(lambda x: x)

    if use_norm:
        norm_layer = layers.LayerNormalization()
    else:
        norm_layer = layers.Lambda(lambda x: x)

    return (
        layers.Conv2D(
            filters=filters,
            kernel_size=kernel_size,
            strides=strides,
            use_bias=use_bias,
            kernel_regularizer=kernel_regularizer,
            bias_regularizer=bias_regularizer,
        ),
        norm_layer,
        activation_layer,
    )

另见

tensorflow/serving #1719

Answer 1

您的问题与 'transformer_transducer/transducer_encoder/inputs_embedding/ convolution_stack/conv2d/kernel:0' 无关。
错误代码告诉你这个元素是指一个不可追踪的元素。似乎不可跟踪对象没有直接分配给这个 conv2d/kernel:0.

的属性

为了解决您的问题，我们需要根据此错误代码 Tensor("77040:0", shape=(), dtype=resource) 进行本地化：

AssertionError: Tried to export a function which references untracked resource\
Tensor("77040:0", shape=(), dtype=resource). 
TensorFlow objects (e.g. tf.Variable) captured by functions must be tracked by assigning them to an attribute of a tracked object or assigned to an attribute of the main object directly.

编辑：

感谢您的评论，我们发现“ConvolutionStack”似乎重现了错误。

The problem only occurs if I use the ConvolutionStack layer in InputsEmbedding but I can save both of them successfully in a standalone model.

我知道你不能共享这一层的配置，这就是为什么我建议你尝试从 ConvolutionStack 本地化这个 Tensor(77040:0。

这个不可追踪的张量一定是一个人工制品或者一个由ConvolutionStack函数的过程创建的临时张量。

尝试找到一个可以从一个函数传递到另一个函数而不是分配给层的属性的张量 class

Answer 2

使用

tensorflow v2.5.0
Python: 3.9

当我们 declare/define 层作为 class 变量时，问题似乎出现了。我只能假设问题与内部 Keras 逻辑有关，这可能是有道理的，但我认为这对用户来说并不明显，而且我认为我从未见过指出这可能是个问题的提示。

所以，在我的项目中，我有以下内容：

class Model(keras.Model):
    inputs_embedding: InputsEmbedding = None  # <-- This caused the problem

    def __init__(config, *args, **kwargs):
        super().__init__(*args, **kwargs)
        if config.embeddings is not None:
            self.inputs_embedding = InputsEmbedding(config.embeddings)
        # ...

MVP 示例

以下示例创建 ModelA、ModelB、ModelC 和 ModelD 的实例。模型 A 和 B 可以保存，但 C 不能。据我所知，将具有可训练权重的层 声明为 class 变量是行不通的。但是，它似乎确实适用于没有可训练权重的层（参见 ModelB）。

请注意如何保存 ModelD。与 ModelB 的不同之处在于，该层仅被声明而不是定义为 None，这导致了为什么 ModelC 仍然有效的问题。

源代码

import tempfile import numpy as np import tensorflow as tf from tensorflow.keras import layers class ModelA(tf.keras.Model): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.model_layer = layers.LayerNormalization() def call(self, inputs, training=None, mask=None): return self.model_layer(inputs) def get_config(self): return dict() class ModelB(tf.keras.Model): model_layer: layers.Layer = None def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # This is probably working because layers.Lambda has no trainable variables self.model_layer = layers.Lambda(lambda x: x) def call(self, inputs, training=None, mask=None): return self.model_layer(inputs) def get_config(self): return dict() class ModelC(tf.keras.Model): model_layer: layers.Layer = None def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.model_layer = layers.LayerNormalization() def call(self, inputs, training=None, mask=None): return self.model_layer(inputs) def get_config(self): return dict() class ModelD(tf.keras.Model): model_layer: layers.Layer def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.model_layer = layers.LayerNormalization() def call(self, inputs, training=None, mask=None): return self.model_layer(inputs) def get_config(self): return dict() def save_tmp_model(model: tf.keras.Model): name = model.__class__.__name__ print(f'Saving model {name}') try: model.save(tempfile.mkdtemp(prefix=f"{name}_")) except Exception as e: print(f"Unable to save model: {name}") print('Error message:') print(str(e)) return print(f".. success!") def main(): inputs = np.random.rand(1, 50, 16) model_a = ModelA() model_b = ModelB() model_c = ModelC() model_d = ModelD() # Build models model_a(inputs) model_b(inputs) model_c(inputs) model_d(inputs) # Save models save_tmp_model(model_a) save_tmp_model(model_b) save_tmp_model(model_c) save_tmp_model(model_d) if __name__ == '__main__': main()

输出

Saving model ModelA .. success! Saving model ModelB .. success! Saving model ModelC Unable to save model: ModelC Error message: Tried to export a function which references untracked resource Tensor("1198:0", shape=(), dtype=resource). TensorFlow objects (e.g. tf.Variable) captured by functions must be tracked by assigning them to an attribute of a tracked object or assigned to an attribute of the main object directly. Trackable Python objects referring to this tensor (from gc.get_referrers, limited to two hops): <tf.Variable 'model_c/layer_normalization_1/gamma:0' shape=(16,) dtype=float32> Saving model ModelD .. success!

AssertionError: Tried to export a function which references untracked resource