Apply landmark to MoViNet
def build_classifier(batch_size, num_frames, backbone, resolution, num_classes): # --- Video input --- video_input = layers.Input(shape=(num_frames, resolution, resolution, 3), batch_size=batch_size, name='video_input')
# Feature extraction from MoViNet backbone
def extract_video_features(x):
endpoints = backbone(x, training=False)
x = endpoints['head']
x = tf.squeeze(x, axis=[2, 3])
x = tf.keras.layers.GlobalAveragePooling1D()(x)
return x
video_features = layers.Lambda(
extract_video_features,
output_shape=(480,),
name="video_features"
)(video_input)
# --- Landmark input ---
landmark_input = layers.Input(shape=(num_frames, 234), batch_size=batch_size, name='landmark_input')
landmark_features = layers.Bidirectional(layers.LSTM(128, return_sequences=False))(landmark_input)
landmark_features = layers.Dense(128, activation='relu')(landmark_features)
# --- Fusion ---
merged = layers.Concatenate()([video_features, landmark_features]) # shape: (B, 608)
x = layers.Dense(256, activation='relu')(merged)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(num_classes, activation='softmax')(x)
model = tf.keras.Model(inputs=[video_input, landmark_input], outputs=outputs)
return model
I build this model but when I run model.fit() it shows
TypeError: Exception encountered when calling Lambda.call().
tuple indices must be integers or slices, not str
Arguments received by Lambda.call(): • inputs=tf.Tensor(shape=(None, 50, 224, 224, 3), dtype=float32) • mask=None • training=True.
How can I fix this or any other ways to apply landmarks?