objectbox-python icon indicating copy to clipboard operation
objectbox-python copied to clipboard

Seeking advice: remove is painfully slow

Open patknight opened this issue 11 months ago • 0 comments

I'm finding that adding and searching an objectbox database is really fast. However, the remove operation is really slow (1 second per object.) The database is on a local NVME SSD drive. It contains about 20,000 hashes and takes about 6GB.

My find_unique hash_box.query operation is fast - it's literally the call to hash_box.remove that takes the time.

What am I doing wrong?

@Entity()
class ImHash:
    id = Id
    key = String(index=Index(IndexType.HASH), unique=True)
    cos_value = Float32Vector(index=HnswIndex(
        dimensions=62720,
        distance_type=VectorDistanceType.COSINE,
    ))


def hash_image(im: Image.Image) -> list[float]:
    vector = img2vec.get_vec(im, tensor=True)
    return vector.detach().cpu().numpy().flatten()


def hash_and_store(name_or_fp, key: str):
    im = Image.open(name_or_fp)
    h = hash_image(im)
    ih = find_unique(key)
    if ih is None:
        # create
        ih = ImHash()
        ih.key = key
    ih.cos_value = h
    with store_lock:
        hash_box.put(ih)


def init(db_dir: pathlib.Path):
    global store, hash_box, img2vec
    store = Store(directory=str(db_dir / directory_name),
                  model_json_file=str(db_dir / json_model_name),
                  max_db_size_in_kb=10 * 1024 * 1024)
    hash_box = store.box(ImHash)
    img2vec = Img2Vec(cuda=False, model='efficientnet_b0')


def close():
    store.close()


def find_unique(key: str):
    with store_lock:
        query = hash_box.query(ImHash.key.equals(key)).build()
        result = query.find()
    if len(result) == 0:
        return None
    elif len(result) > 1:
        print('Multiple matches found')
        return None
    else:
        return result[0]


def find_similar(key: str) -> list[tuple[ImHash, float]]:
    target = find_unique(key)
    with store_lock:
        query = hash_box.query(ImHash.cos_value.nearest_neighbor(target.cos_value, 8)).build()
        results = query.find_with_scores()
    results.sort(key=lambda x: x[1])
    return results


def remove(key: str):
    target = find_unique(key)
    if target is not None:
        with store_lock:
            hash_box.remove(target)


def remove_many(keys: list[str]):
    with store.write_tx():
        for k in keys:
            i = find_unique(k)
            if i is None:
                print('Hash key "%s" was already gone' % k)
            else:
                with store_lock:
                    hash_box.remove(i.id)

patknight avatar Nov 17 '24 21:11 patknight