Tensorflow: ValueError: Input 0 is incompatible with layer model: expected shape=(None, 99), found shape=(None, 3)

Question

我正在尝试使用 Tensorflow 中制作的 ANN 分类模型进行预测，以使用 MediaPipe 对姿势关键点进行分类。 mediapipe 姿态跟踪器有 33 个 x y 和 z 坐标关键点，总共 99 个数据点。

我正在训练 4 类。

这是运行嵌入姿势

import mediapipe as mp
import numpy as np
import tensorflow as tf
from tensorflow import keras
mp_pose = mp.solutions.pose


def get_center_point(landmarks, left_bodypart, right_bodypart):
  """Calculates the center point of the two given landmarks."""

  left = tf.gather(landmarks, left_bodypart.value, axis=1)
  right = tf.gather(landmarks, right_bodypart.value, axis=1)
  center = left * 0.5 + right * 0.5
  return center


def get_pose_size(landmarks, torso_size_multiplier=2.5):
  """Calculates pose size.

  It is the maximum of two values:
    * Torso size multiplied by `torso_size_multiplier`
    * Maximum distance from pose center to any pose landmark
  """
  # Hips center
  hips_center = get_center_point(landmarks, mp_pose.PoseLandmark.LEFT_HIP, 
                                 mp_pose.PoseLandmark.RIGHT_HIP)

  # Shoulders center
  shoulders_center = get_center_point(landmarks,mp_pose.PoseLandmark.LEFT_SHOULDER,
                                      mp_pose.PoseLandmark.RIGHT_SHOULDER)

  # Torso size as the minimum body size
  torso_size = tf.linalg.norm(shoulders_center - hips_center)

  # Pose center
  pose_center_new = get_center_point(landmarks,mp_pose.PoseLandmark.LEFT_HIP, 
                                     mp_pose.PoseLandmark.RIGHT_HIP)
  pose_center_new = tf.expand_dims(pose_center_new, axis=1)
  # Broadcast the pose center to the same size as the landmark vector to
  # perform substraction
  pose_center_new = tf.broadcast_to(pose_center_new,
                                    [tf.size(landmarks) // (33*3), 33, 3])

  # Dist to pose center
  d = tf.gather(landmarks - pose_center_new, 0, axis=0,
                name="dist_to_pose_center")
  # Max dist to pose center
  max_dist = tf.reduce_max(tf.linalg.norm(d, axis=0))

  # Normalize scale
  pose_size = tf.maximum(torso_size * torso_size_multiplier, max_dist)

  return pose_size


def normalize_pose_landmarks(landmarks):
  """Normalizes the landmarks translation by moving the pose center to (0,0) and
  scaling it to a constant pose size.
  """
  # Move landmarks so that the pose center becomes (0,0)
  pose_center = get_center_point(landmarks, mp_pose.PoseLandmark.LEFT_HIP, 
                                 mp_pose.PoseLandmark.RIGHT_HIP)
  pose_center = tf.expand_dims(pose_center, axis=1)
  # Broadcast the pose center to the same size as the landmark vector to perform
  # substraction
  pose_center = tf.broadcast_to(pose_center, 
                                [tf.size(landmarks) // (33*3), 33, 3])
  landmarks = landmarks - pose_center

  # Scale the landmarks to a constant pose size
  pose_size = get_pose_size(landmarks)
  landmarks /= pose_size

  return landmarks


def landmarks_to_embedding(landmarks_and_scores):
  """Converts the input landmarks into a pose embedding."""
  # Reshape the flat input into a matrix with shape=(33, 3)
  reshaped_inputs = keras.layers.Reshape((33, 3))(landmarks_and_scores)

  # Normalize landmarks 3D
  landmarks = normalize_pose_landmarks(reshaped_inputs[:, :, :3])

  # Flatten the normalized landmark coordinates into a vector
  embedding = keras.layers.Flatten()(landmarks)

  return embedding

然后我创建模型并将嵌入输入提供给它

import csv
import cv2
import itertools
import numpy as np
import pandas as pd
import os
import sys
import tempfile
import tqdm
import mediapipe as mp
from matplotlib import pyplot as plt
from matplotlib.collections import LineCollection
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from poseEmbedding import get_center_point, get_pose_size, normalize_pose_landmarks, landmarks_to_embedding


def load_pose_landmarks(csv_path):
    #load CSV file
    dataframe = pd.read_csv(csv_path)
    df_to_process = dataframe.copy()
    
    #extract the list of class names
    classes = df_to_process.pop('class_name').unique()
    
    #extract the labels
    y = df_to_process.pop('class_no')
    
    #convert the input features and labels into float64 format for training
    X = df_to_process.astype('float64')
    y = keras.utils.to_categorical(y)
    
    return X,y, classes, dataframe
csvs_out_train_path = 'train_data.csv'
csvs_out_test_path = 'test_data.csv'

#Load training data

X, y, class_names, _ = load_pose_landmarks(csvs_out_train_path)

#split training data(X,y) into (X_train, y_train) and (X_val, y_val)
X_train, X_val, y_train, y_val = train_test_split(X,y, test_size=0.15)

X_test, y_test, _, df_test = load_pose_landmarks(csvs_out_test_path)

mp_pose = mp.solutions.pose

inputs = tf.keras.Input(shape=(99))
embedding = landmarks_to_embedding(inputs)

layer = keras.layers.Dense(128, activation=tf.nn.relu6)(embedding)
layer = keras.layers.Dropout(0.5)(layer)
layer = keras.layers.Dense(64, activation=tf.nn.relu6)(layer)
layer = keras.layers.Dropout(0.5)(layer)
outputs = keras.layers.Dense(4, activation="softmax")(layer)

model = keras.Model(inputs, outputs)
#model.summary()


model.compile(
    optimizer = 'adam',
    loss = 'categorical_crossentropy',
    metrics=['accuracy']
)




# Start training
history = model.fit(X_train, y_train,
                    epochs=200,
                    batch_size=16,
                    validation_data=(X_val, y_val))
model.save("complete_epoch_model")
                    
# Visualize the training history to see whether you're overfitting.
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['TRAIN', 'VAL'], loc='lower right')
plt.show()
loss, accuracy = model.evaluate(X_test, y_test)

模型摘要打印出来：

 Layer (type)                   Output Shape         Param #     Connected to                     
==================================================================================================
 input_18 (InputLayer)          [(None, 99)]         0           []                               
                                                                                                  
 reshape_17 (Reshape)           (None, 33, 3)        0           ['input_18[0][0]']               
                                                                                                  
 tf.__operators__.getitem_10 (S  (None, 33, 3)       0           ['reshape_17[0][0]']             
 licingOpLambda)                                                                                  
                                                                                                  
 tf.compat.v1.gather_69 (TFOpLa  (None, 3)           0           ['tf.__operators__.getitem_10[0][
 mbda)                                                           0]']                             
                                                                                                  
 tf.compat.v1.gather_70 (TFOpLa  (None, 3)           0           ['tf.__operators__.getitem_10[0][
 mbda)                                                           0]']                             
                                                                                                  
 tf.math.multiply_69 (TFOpLambd  (None, 3)           0           ['tf.compat.v1.gather_69[0][0]'] 
 a)                                                                                               
                                                                                                  
 tf.math.multiply_70 (TFOpLambd  (None, 3)           0           ['tf.compat.v1.gather_70[0][0]'] 
 a)                                                                                               
                                                                                                  
 tf.__operators__.add_31 (TFOpL  (None, 3)           0           ['tf.math.multiply_69[0][0]',    
 ambda)                                                           'tf.math.multiply_70[0][0]']    
                                                                                                  
 tf.compat.v1.size_17 (TFOpLamb  ()                  0           ['tf.__operators__.getitem_10[0][
 da)                                                             0]']                             
                                                                                                  
 tf.expand_dims_17 (TFOpLambda)  (None, 1, 3)        0           ['tf.__operators__.add_31[0][0]']
                                                                                                  
 tf.compat.v1.floor_div_17 (TFO  ()                  0           ['tf.compat.v1.size_17[0][0]']   
 pLambda)                                                                                         
                                                                                                  
 tf.broadcast_to_17 (TFOpLambda  (None, 33, 3)       0           ['tf.expand_dims_17[0][0]',      
 )                                                                'tf.compat.v1.floor_div_17[0][0]
                                                                 ']                               
                                                                                                  
 tf.math.subtract_23 (TFOpLambd  (None, 33, 3)       0           ['tf.__operators__.getitem_10[0][
 a)                                                              0]',                             
                                                                  'tf.broadcast_to_17[0][0]']     
                                                                                                  
 tf.compat.v1.gather_75 (TFOpLa  (None, 3)           0           ['tf.math.subtract_23[0][0]']    
 mbda)                                                                                            
                                                                                                  
 tf.compat.v1.gather_76 (TFOpLa  (None, 3)           0           ['tf.math.subtract_23[0][0]']    
 mbda)                                                                                            
                                                                                                  
 tf.math.multiply_75 (TFOpLambd  (None, 3)           0           ['tf.compat.v1.gather_75[0][0]'] 
 a)                                                                                               
                                                                                                  
 tf.math.multiply_76 (TFOpLambd  (None, 3)           0           ['tf.compat.v1.gather_76[0][0]'] 
 a)                                                                                               
                                                                                                  
 tf.__operators__.add_34 (TFOpL  (None, 3)           0           ['tf.math.multiply_75[0][0]',    
 ambda)                                                           'tf.math.multiply_76[0][0]']    
                                                                                                  
 tf.compat.v1.size_18 (TFOpLamb  ()                  0           ['tf.math.subtract_23[0][0]']    
 da)                                                                                              
                                                                                                  
 tf.compat.v1.gather_73 (TFOpLa  (None, 3)           0           ['tf.math.subtract_23[0][0]']    
 mbda)                                                                                            
                                                                                                  
 tf.compat.v1.gather_74 (TFOpLa  (None, 3)           0           ['tf.math.subtract_23[0][0]']    
 mbda)                                                                                            
                                                                                                  
 tf.compat.v1.gather_71 (TFOpLa  (None, 3)           0           ['tf.math.subtract_23[0][0]']    
 mbda)                                                                                            
                                                                                                  
 tf.compat.v1.gather_72 (TFOpLa  (None, 3)           0           ['tf.math.subtract_23[0][0]']    
 mbda)                                                                                            
                                                                                                  
 tf.expand_dims_18 (TFOpLambda)  (None, 1, 3)        0           ['tf.__operators__.add_34[0][0]']
                                                                                                  
 tf.compat.v1.floor_div_18 (TFO  ()                  0           ['tf.compat.v1.size_18[0][0]']   
 pLambda)                                                                                         
                                                                                                  
 tf.math.multiply_73 (TFOpLambd  (None, 3)           0           ['tf.compat.v1.gather_73[0][0]'] 
 a)                                                                                               
                                                                                                  
 tf.math.multiply_74 (TFOpLambd  (None, 3)           0           ['tf.compat.v1.gather_74[0][0]'] 
 a)                                                                                               
                                                                                                  
 tf.math.multiply_71 (TFOpLambd  (None, 3)           0           ['tf.compat.v1.gather_71[0][0]'] 
 a)                                                                                               
                                                                                                  
 tf.math.multiply_72 (TFOpLambd  (None, 3)           0           ['tf.compat.v1.gather_72[0][0]'] 
 a)                                                                                               
                                                                                                  
 tf.broadcast_to_18 (TFOpLambda  (None, 33, 3)       0           ['tf.expand_dims_18[0][0]',      
 )                                                                'tf.compat.v1.floor_div_18[0][0]
                                                                 ']                               
                                                                                                  
 tf.__operators__.add_33 (TFOpL  (None, 3)           0           ['tf.math.multiply_73[0][0]',    
 ambda)                                                           'tf.math.multiply_74[0][0]']    
                                                                                                  
 tf.__operators__.add_32 (TFOpL  (None, 3)           0           ['tf.math.multiply_71[0][0]',    
 ambda)                                                           'tf.math.multiply_72[0][0]']    
                                                                                                  
 tf.math.subtract_25 (TFOpLambd  (None, 33, 3)       0           ['tf.math.subtract_23[0][0]',    
 a)                                                               'tf.broadcast_to_18[0][0]']     
                                                                                                  
 tf.math.subtract_24 (TFOpLambd  (None, 3)           0           ['tf.__operators__.add_33[0][0]',
 a)                                                               'tf.__operators__.add_32[0][0]']
                                                                                                  
 tf.compat.v1.gather_77 (TFOpLa  (33, 3)             0           ['tf.math.subtract_25[0][0]']    
 mbda)                                                                                            
                                                                                                  
 tf.compat.v1.norm_14 (TFOpLamb  ()                  0           ['tf.math.subtract_24[0][0]']    
 da)                                                                                              
                                                                                                  
 tf.compat.v1.norm_15 (TFOpLamb  (3,)                0           ['tf.compat.v1.gather_77[0][0]'] 
 da)                                                                                              
                                                                                                  
 tf.math.multiply_77 (TFOpLambd  ()                  0           ['tf.compat.v1.norm_14[0][0]']   
 a)                                                                                               
                                                                                                  
 tf.math.reduce_max_7 (TFOpLamb  ()                  0           ['tf.compat.v1.norm_15[0][0]']   
 da)                                                                                              
                                                                                                  
 tf.math.maximum_7 (TFOpLambda)  ()                  0           ['tf.math.multiply_77[0][0]',    
                                                                  'tf.math.reduce_max_7[0][0]']   
                                                                                                  
 tf.math.truediv_7 (TFOpLambda)  (None, 33, 3)       0           ['tf.math.subtract_23[0][0]',    
                                                                  'tf.math.maximum_7[0][0]']      
                                                                                                  
 flatten_7 (Flatten)            (None, 99)           0           ['tf.math.truediv_7[0][0]']      
                                                                                                  
 dense_21 (Dense)               (None, 128)          12800       ['flatten_7[0][0]']              
                                                                                                  
 dropout_14 (Dropout)           (None, 128)          0           ['dense_21[0][0]']               
                                                                                                  
 dense_22 (Dense)               (None, 64)           8256        ['dropout_14[0][0]']             
                                                                                                  
 dropout_15 (Dropout)           (None, 64)           0           ['dense_22[0][0]']               
                                                                                                  
 dense_23 (Dense)               (None, 4)            260         ['dropout_15[0][0]']             
                                                                                                  
==================================================================================================
Total params: 21,316
Trainable params: 21,316
Non-trainable params: 0
__________________________________________________________________________________________________

现在，当我尝试运行在我的网络摄像头上进行推理时，我从 mediapipe 和 Tensorflow 收到以下错误：

ValueError: Input 0 is incompatible with layer model: expected shape=(None, 99), found shape=(None, 3)

我不确定如何修复此错误，因为我只能使用 99 的形状进行训练，因为 TF 在尝试编译时给出了使用 3 的形状的错误。我该如何解决这个问题？

这是我的推理代码：

import cv2
import os
import tqdm
import numpy as np
import logging
from mediapipe.python.solutions import pose as mp_pose
from mediapipe.python.solutions import drawing_utils as mp_drawing
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras.utils import CustomObjectScope


def relu6(x):
  return K.relu(x, max_value=6)

logging.getLogger().setLevel(logging.CRITICAL)



cap = cv2.VideoCapture(0)

model = tf.keras.models.load_model('weights_best.hdf5', compile = True,
        custom_objects = {"relu6": relu6})


with mp_pose.Pose() as pose_tracker:
  while cap.isOpened():
    # Get next frame of the video.
    ret, frame = cap.read()


    # Run pose tracker.
    imagefirst = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    image = cv2.flip(imagefirst,1)

    result = pose_tracker.process(image)
    pose_landmarks = result.pose_landmarks

    # Draw pose prediction.
    if pose_landmarks is not None:
      mp_drawing.draw_landmarks(
          image,
          landmark_list=pose_landmarks,
          connections=mp_pose.POSE_CONNECTIONS)

    if pose_landmarks is not None:
      # Get landmarks.
      frame_height, frame_width = frame.shape[0], frame.shape[1]
      pose_landmarks = np.array([[lmk.x * frame_width, lmk.y * frame_height, lmk.z * frame_width]
                                 for lmk in pose_landmarks.landmark], dtype=np.float32)
      assert pose_landmarks.shape == (33, 3), 'Unexpected landmarks shape: {}'.format(pose_landmarks.shape)
      prediction = model.predict(pose_landmarks)



    # Save the output frame.
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

    cv2.imshow('Raw Webcam Feed', image)
    if cv2.waitKey(10) & 0xFF == ord('q'):
      break

# Close output video.
cap.release()
cv2.destroyAllWindows()

# Release MediaPipe resources.
pose_tracker.close()

Answer 1

也许可以尝试在断言之后和做出预测之前将 pose_landmarks 的形状从 (33, 3) 更改为 (1, 99)：

import tensorflow as tf

pose_landmarks = tf.random.normal((33, 3))
assert pose_landmarks.shape == (33, 3), 'Unexpected landmarks shape: {}'.format(pose_landmarks.shape)

pose_landmarks = tf.expand_dims(pose_landmarks, axis=0)
shape = tf.shape(pose_landmarks)
pose_landmarks = tf.reshape(pose_landmarks, (shape[0], shape[1] * shape[2]))

tf.print(pose_landmarks.shape)

TensorShape([1, 99])

Tensorflow: ValueError: Input 0 is incompatible with layer model: expected shape=(None, 99), found shape=(None, 3)

Tensorflow: ValueError: Input 0 is incompatible with layer model: expected shape=(None, 99), found shape=(None, 3)

python

opencv

neural-network

keras

tensorflow