models
models copied to clipboard
[BUG] Batch Size results in different prediction outputs.
Bug description
Predictions from different size data loader yields different results.
Reproducing:
- simplistic two tower model
model = mm.TwoTowerModel(
schema,
query_tower=mm.MLPBlock([64,64],
no_activation_last_layer=True,
),
item_tower=mm.MLPBlock([64,64],
no_activation_last_layer=True,
))
model.compile(
optimizer='adam',
run_eagerly=False,
loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True, ),
metrics=[],
)
- I get the same prediction if running predict twice on the same data; however, there is different results if the batch sizes are different.
BATCH_SIZE_1 = 10240
data_loader1 = tf_dataloader.BatchedDataset(
Dataset( os.path.join(TRAINING_DIR, "valid_by_user/part_19.parquet"), part_size="100MB",schema=schema),
batch_size =BATCH_SIZE_1,
shuffle = False )
BATCH_SIZE_2 = 1024
data_loader2 = tf_dataloader.BatchedDataset(
Dataset( os.path.join(TRAINING_DIR, "valid_by_user/part_19.parquet"), part_size="100MB",schema=schema),
batch_size =BATCH_SIZE_2,
shuffle = False )
#test on the first batch from each data loader
data_loader1_batch = next(data_loader1)[0]
data_loader2_batch = next(data_loader2)[0]
# predict on same data twice and check if you get the same predictions on the same input (currently passing)
pred1 = model(data_loader1_batch)
pred1_again = model(data_loader1_batch)
assert np.all(pred1 == pred1_again)
pred2 = model(data_loader2_batch)
pred2_again = model(data_loader2_batch)
assert np.all(pred2 == pred2_again)
#check if the same prediction based of different batch sizes. (this is currently failing.)
assert np.all(pred1[:len(pred2)] == pred2)
Expected behavior
The test above should all be passing, the predictions should be the same regardless of the batch size.
Environment details
- Merlin version:
merlin-core 0.5.0+1.g1354dcf
merlin-models 0.5.0+11.gd0ffe18d0
merlin-sok 1.1.3
merlin-systems 0.4.0+2.gf1e2bf5
tensorflow 2.9.1
- Platform: Ubuntu
Hi @angmc . I have tried to reproduce the issue using synthetic data (ecommerce_data). I implemented this unit test and it passes.
Could you maybe share your dataset?
@pytest.mark.parametrize("run_eagerly", [True, False])
def test_two_tower_retrieval_model_idempotence(ecommerce_data: Dataset, run_eagerly):
schema = ecommerce_data.schema.select_by_name(["user_categories", "item_id"])
model = mm.TwoTowerModel(
schema,
query_tower=mm.MLPBlock([64, 64], no_activation_last_layer=True,),
item_tower=mm.MLPBlock([64, 64], no_activation_last_layer=True,),
)
model.compile(
optimizer="adam",
run_eagerly=run_eagerly,
loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True,),
metrics=[],
)
BATCH_SIZE_1 = 50
data_loader1 = BatchedDataset(ecommerce_data, batch_size=BATCH_SIZE_1, shuffle=False,)
BATCH_SIZE_2 = 20
data_loader2 = BatchedDataset(ecommerce_data, batch_size=BATCH_SIZE_2, shuffle=False,)
# test on the first batch from each data loader
data_loader1_batch = next(data_loader1)[0]
data_loader2_batch = next(data_loader2)[0]
# predict on same data twice and check if you get the same predictions on the same input (currently passing)
pred1 = model(data_loader1_batch)
pred1_again = model(data_loader1_batch)
tf.assert_equal(pred1, pred1_again)
pred2 = model(data_loader2_batch)
pred2_again = model(data_loader2_batch)
tf.assert_equal(pred2, pred2_again)
# check if the same prediction based of different batch sizes. (NOT FAILING WITH SYNTHETIC DATA.)
tf.assert_equal(pred1[: len(pred2)], pred2)
assert np.all(pred1[: len(pred2)] == pred2)