weaviate-python-client
weaviate-python-client copied to clipboard
BUG: Broken references silently failing using client.batch (v4)
It is great that references can be added to objects created within the same batch.
However, failed references should be listed in client.batch.failed_references.
repro:
import weaviate
import weaviate.classes as wvc
import weaviate.util
from tqdm import tqdm
from weaviate.util import generate_uuid5
print("Weaviate version:", weaviate.__version__)
with weaviate.connect_to_local() as client:
print(client.get_meta())
client.collections.delete(["A", "B"])
client.collections.create("A")
client.collections.create("B")
a_collection = client.collections.get("A")
b_collection = client.collections.get("B")
a_collection.config.add_property(wvc.config.Property(name="a_name", data_type=wvc.config.DataType.TEXT))
a_collection.config.add_reference(wvc.config.ReferenceProperty(name="b_ref", target_collection="B"))
b_collection.config.add_property(wvc.config.Property(name="b_name", data_type=wvc.config.DataType.TEXT))
b_collection.config.add_reference(wvc.config.ReferenceProperty(name="a_ref", target_collection="A"))
print({
"A": a_collection.aggregate.over_all(),
"B": b_collection.aggregate.over_all()
})
print("-" * 80)
client.close()
with weaviate.connect_to_local() as client:
with client.batch.dynamic() as batch:
for i in tqdm(range(5000)):
a_uuid = generate_uuid5(str(i), "A")
b_uuid = generate_uuid5(str(i), "B")
inexistent_uuid = generate_uuid5(str(i), "inexistent")
batch.add_object(properties={"a_name": "test"}, collection="A", uuid=a_uuid)
batch.add_object(properties={"b_name": "test"}, collection="B", uuid=b_uuid)
batch.add_reference(from_uuid=b_uuid, from_collection="B" ,from_property="a_ref", to=a_uuid)
batch.add_reference(from_uuid=a_uuid, from_collection="A" ,from_property="b_ref", to=inexistent_uuid)
print(">>> Done inserting objects")
if client.batch.failed_objects:
print("Failed objects:", client.batch.failed_objects[:10])
else:
print("No failed objects")
if client.batch.failed_references:
print("Failed references:", client.batch.failed_references[:10])
else:
print("No failed references")
# double check all B objects have a reference to an A object
with weaviate.connect_to_local() as client:
b_collection = client.collections.get("B")
for b_obj in b_collection.iterator(return_references=[wvc.query.QueryReference(link_on="a_ref")]):
assert len(b_obj.references) == 1
print(">>> All B objects have a reference to an A object")
a_collection = client.collections.get("A")
for a_obj in a_collection.iterator(return_references=[wvc.query.QueryReference(link_on="b_ref")]):
assert len(a_obj.references) == 0
print(">>> All A objects have no references")
with weaviate.connect_to_local() as client:
a_collection = client.collections.get("A")
b_collection = client.collections.get("B")
print("-" * 80)
print({
"A": a_collection.aggregate.over_all(),
"B": b_collection.aggregate.over_all()
})
Output:
Weaviate version: 4.5.5
{'hostname': 'http://127.0.0.1:8080/', 'modules': {}, 'version': '1.23.7'}
{'A': AggregateReturn(properties={}, total_count=0), 'B': AggregateReturn(properties={}, total_count=0)}
--------------------------------------------------------------------------------
100%|██████████| 5000/5000 [00:01<00:00, 2547.51it/s]
>>> Done inserting objects
No failed objects
No failed references
>>> All B objects have a reference to an A object
>>> All A objects have no references
--------------------------------------------------------------------------------
{'A': AggregateReturn(properties={}, total_count=5000), 'B': AggregateReturn(properties={}, total_count=5000)}
Hi @glesperance, this one is actually a quirk of Weaviate's core logic when it comes to adding references. The design decision was made that adding a reference (edge) to/from an object (node) that doesn't exist is not an actual error but instead simply fails
If you feel this is unexpected behaviour and that references should error if referring to non-existent objects then I encourage you to open a PR on the Weaviate core repo!
Will close this as there is nothing we can do on the client side, but please open an issue in the weaviate repo if you expect a different behaviour
we gurantee that references to objects that have been added to the batch before the reference are properly resolved. So if you do
with collection.batch.whatever() as batch:
batch.add_object(uuid1)
batch.add_object(uuid2)
batch.add_reference(from=uuid1, to=uuid2)
the reference will only be added after both objects have been succesfully ingested by weaviate