milvus-sdk-java icon indicating copy to clipboard operation
milvus-sdk-java copied to clipboard

Error when set limit only without topK when search

Open topikachu opened this issue 6 months ago • 8 comments

I meet error io.milvus.v2.exception.MilvusClientException: topk [0] is invalid, it should be in range [1, 16384], but got 0

when set limit only wihout topK


import com.google.gson.Gson;
import com.google.gson.JsonObject;
import io.milvus.v2.client.ConnectConfig;
import io.milvus.v2.client.MilvusClientV2;
import io.milvus.v2.service.collection.request.CreateCollectionReq;
import io.milvus.v2.service.collection.request.DropCollectionReq;
import io.milvus.v2.service.vector.request.InsertReq;
import io.milvus.v2.service.vector.request.SearchReq;
import io.milvus.v2.service.vector.request.data.FloatVec;
import io.milvus.v2.service.vector.response.InsertResp;
import org.junit.jupiter.api.Test;
import org.testcontainers.junit.jupiter.Container;
import org.testcontainers.junit.jupiter.Testcontainers;
import org.testcontainers.milvus.MilvusContainer;

import java.sql.SQLException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;

import static org.assertj.core.api.Assertions.assertThat;


@Testcontainers
public class LimitTest {
    @Container
    static MilvusContainer milvus = new MilvusContainer("milvusdb/milvus:v2.5.11");


    @Test
    void testLimit() throws SQLException {
        // 1. Connect to Milvus server
        ConnectConfig connectConfig = ConnectConfig.builder()
                .uri(milvus.getEndpoint())
//                .token(TOKEN)
                .build();

        MilvusClientV2 client = new MilvusClientV2(connectConfig);

        String collectionName = "quick_setup";

        var delReq = DropCollectionReq.builder()
                .collectionName(collectionName)
                .build();
        client.dropCollection(delReq);


// 2. Create a collection in quick setup mode
        CreateCollectionReq quickSetupReq = CreateCollectionReq.builder()
                .collectionName(collectionName)
                .dimension(5)
                .build();

        client.createCollection(quickSetupReq);


        Gson gson = new Gson();
        List<JsonObject> data = Arrays.asList(
                gson.fromJson("{\"id\": 0, \"vector\": [0.3580376395471989f, -0.6023495712049978f, 0.18414012509913835f, -0.26286205330961354f, 0.9029438446296592f], \"color\": \"pink_8682\"}", JsonObject.class),
                gson.fromJson("{\"id\": 1, \"vector\": [0.19886812562848388f, 0.06023560599112088f, 0.6976963061752597f, 0.2614474506242501f, 0.838729485096104f], \"color\": \"red_7025\"}", JsonObject.class),
                gson.fromJson("{\"id\": 2, \"vector\": [0.43742130801983836f, -0.5597502546264526f, 0.6457887650909682f, 0.7894058910881185f, 0.20785793220625592f], \"color\": \"orange_6781\"}", JsonObject.class),
                gson.fromJson("{\"id\": 3, \"vector\": [0.3172005263489739f, 0.9719044792798428f, -0.36981146090600725f, -0.4860894583077995f, 0.95791889146345f], \"color\": \"pink_9298\"}", JsonObject.class),
                gson.fromJson("{\"id\": 4, \"vector\": [0.4452349528804562f, -0.8757026943054742f, 0.8220779437047674f, 0.46406290649483184f, 0.30337481143159106f], \"color\": \"red_4794\"}", JsonObject.class),
                gson.fromJson("{\"id\": 5, \"vector\": [0.985825131989184f, -0.8144651566660419f, 0.6299267002202009f, 0.1206906911183383f, -0.1446277761879955f], \"color\": \"yellow_4222\"}", JsonObject.class),
                gson.fromJson("{\"id\": 6, \"vector\": [0.8371977790571115f, -0.015764369584852833f, -0.31062937026679327f, -0.562666951622192f, -0.8984947637863987f], \"color\": \"red_9392\"}", JsonObject.class),
                gson.fromJson("{\"id\": 7, \"vector\": [-0.33445148015177995f, -0.2567135004164067f, 0.8987539745369246f, 0.9402995886420709f, 0.5378064918413052f], \"color\": \"grey_8510\"}", JsonObject.class),
                gson.fromJson("{\"id\": 8, \"vector\": [0.39524717779832685f, 0.4000257286739164f, -0.5890507376891594f, -0.8650502298996872f, -0.6140360785406336f], \"color\": \"white_9381\"}", JsonObject.class),
                gson.fromJson("{\"id\": 9, \"vector\": [0.5718280481994695f, 0.24070317428066512f, -0.3737913482606834f, -0.06726932177492717f, -0.6980531615588608f], \"color\": \"purple_4976\"}", JsonObject.class)
        );

        InsertReq insertReq = InsertReq.builder()
                .collectionName(collectionName)
                .data(data)
                .build();

        InsertResp insertResp = client.insert(insertReq);
        System.out.print(insertResp.getInsertCnt());


        var searchReq = SearchReq.builder()
                .collectionName(collectionName)
                .data(Collections.singletonList(new FloatVec(new float[]{1.0f, 2.0f, 3.0f, 4.0f, 5.0f})))
                .limit(1)
                .filter(" color like 'red%'")
                .outputFields(List.of("id", "color", "vector"))
                .offset(0)
                .searchParams(Map.of("hints", "iterative_filter"))
                .build();

        var searchResults = client.search(searchReq);


        assertThat(searchResults.getSearchResults()).isNotEmpty();

    }


}


But in the document, it said The number of records to return in the search result. This parameter uses the same syntax as the limit parameter, so you should only set one of them.

    <dependency>
        <groupId>io.milvus</groupId>
        <artifactId>milvus-sdk-java</artifactId>
        <version>2.5.10</version>
    </dependency>

image: milvusdb/milvus:v2.5.11

topikachu avatar Jun 27 '25 02:06 topikachu

/assign @yhmo

yanliang567 avatar Jun 27 '25 03:06 yanliang567

In Java SDK, the "topk" and "limit" is for different purpose. "topk" is the count of items to be returned for search() "limit" takes effect for "offset"

The document is following the python sdk, so it is not correct.

In the future versions of Java SDK, we will deprecate the "topk" parameter and use the "limit" for both "topk" and "offset". Now, you must input a value for the "topk" parameter.

yhmo avatar Jun 27 '25 03:06 yhmo

In Java SDK, the "topk" and "limit" is for different purpose. "topk" is the count of items to be returned for search() "limit" takes effect for "offset"

The document is following the python sdk, so it is not correct.

In the future versions of Java SDK, we will deprecate the "topk" parameter and use the "limit" for both "topk" and "offset". Now, you must input a value for the "topk" parameter.

Thanks So, how to paginate the result when searching?

.topK(size) .offset(offset)

??

topikachu avatar Jun 27 '25 03:06 topikachu

topk should be the same meaning of limit. This definition is definitely wrong @yhmo

xiaofan-luan avatar Jun 27 '25 04:06 xiaofan-luan

topk should be the same meaning of limit. This definition is definitely wrong @yhmo

Thanks for clarifying.

A quick but dirty fix, add the below method in SearchReq,

    public int getTopK() {
        return topK > 0 ? topK : (int) limit;
    }

How do you think?

topikachu avatar Jun 27 '25 04:06 topikachu

Please check #1435

topikachu avatar Jun 27 '25 05:06 topikachu

.topK(size) .offset(offset)

This works:

SearchReq.builder()
.topK(size)
.offset(offset)

yhmo avatar Jun 27 '25 06:06 yhmo

The pr will deprecate topK parameter for SearchReq/HybridSearchReq/SearchIteratorReq, replaced with limit https://github.com/milvus-io/milvus-sdk-java/pull/1439 https://github.com/milvus-io/milvus-sdk-java/pull/1438

The topK parameter still works, limit parameter is recommended.

Usage before this pr:

SearchReq.builder()
.topK(size)
.offset(offset)

Usage after this pr:

SearchReq.builder()
.limit(size)
.offset(offset)

This commit will be available in the next SDK minor version v2.5.11/v2.6.1

yhmo avatar Jun 27 '25 10:06 yhmo

By using old versions Java SDK, this example shows the topK/offset works well: (I tested Java SDK v2.4.9 with this example, the result is expected)

import com.google.gson.Gson;
import com.google.gson.JsonObject;
import io.milvus.v2.client.ConnectConfig;
import io.milvus.v2.client.MilvusClientV2;
import io.milvus.v2.common.ConsistencyLevel;
import io.milvus.v2.common.DataType;
import io.milvus.v2.common.IndexParam;
import io.milvus.v2.service.collection.request.AddFieldReq;
import io.milvus.v2.service.collection.request.CreateCollectionReq;
import io.milvus.v2.service.collection.request.DropCollectionReq;
import io.milvus.v2.service.collection.request.HasCollectionReq;
import io.milvus.v2.service.vector.request.InsertReq;
import io.milvus.v2.service.vector.request.QueryReq;
import io.milvus.v2.service.vector.request.SearchReq;
import io.milvus.v2.service.vector.request.data.BaseVector;
import io.milvus.v2.service.vector.request.data.FloatVec;
import io.milvus.v2.service.vector.response.QueryResp;
import io.milvus.v2.service.vector.response.SearchResp;

import java.util.*;

public class MilvusTest {
    private static final String COLLECTION_NAME = "example_to_search_offset";
    private static final String ID_FIELD = "id";
    private static final String VECTOR_FIELD = "vector";
    private static final String JSON_FIELD = "metadata";
    private static final Integer VECTOR_DIM = 128;

    public static List<Float> generateFloatVector(int dimension) {
        Random ran = new Random();
        List<Float> vector = new ArrayList<>();
        for (int i = 0; i < dimension; ++i) {
            vector.add(ran.nextFloat());
        }
        return vector;
    }

    private static void queryWithExpr(MilvusClientV2 client, String expr) {
        QueryResp queryRet = client.query(QueryReq.builder()
                .collectionName(COLLECTION_NAME)
                .filter(expr)
                .outputFields(Arrays.asList(ID_FIELD, JSON_FIELD))
                .build());
        System.out.println("\nQuery with expression: " + expr);
        List<QueryResp.QueryResult> records = queryRet.getQueryResults();
        for (QueryResp.QueryResult record : records) {
            System.out.println(record.getEntity());
        }
        System.out.println("=============================================================");
    }

    private static List<Long> search(MilvusClientV2 client, List<BaseVector> vectors, int topk, int offset) {
        System.out.println(String.format("\nSearch with topk=%d, offset=%d", topk, offset));
        List<String> outputFields = Arrays.asList(ID_FIELD);
        SearchResp resp = client.search(SearchReq.builder()
                .collectionName(COLLECTION_NAME)
                .topK(topk)
                .offset(offset)
                .data(vectors)
                .annsField(VECTOR_FIELD)
                .consistencyLevel(ConsistencyLevel.STRONG)
                .outputFields(outputFields)
                .build());

        List<Long> ids = new ArrayList<>();
        System.out.println("Search result ids:");
        List<List<SearchResp.SearchResult>> searchResults = resp.getSearchResults();
        for (List<SearchResp.SearchResult> results : searchResults) {
            for (SearchResp.SearchResult result : results) {
                ids.add((Long)result.getId());
            }
        }

        System.out.println(ids);
        System.out.println("Search done");
        return ids;
    }

    public static void main(String[] args) {
        ConnectConfig config = ConnectConfig.builder()
                .uri("http://localhost:19530")
                .build();
        MilvusClientV2 client = new MilvusClientV2(config);

        if (!client.hasCollection(HasCollectionReq.builder().collectionName(COLLECTION_NAME).build())) {
            // Create collection
            CreateCollectionReq.CollectionSchema collectionSchema = CreateCollectionReq.CollectionSchema.builder()
                    .build();
            collectionSchema.addField(AddFieldReq.builder()
                    .fieldName(ID_FIELD)
                    .dataType(DataType.Int64)
                    .isPrimaryKey(Boolean.TRUE)
                    .build());
            collectionSchema.addField(AddFieldReq.builder()
                    .fieldName(VECTOR_FIELD)
                    .dataType(DataType.FloatVector)
                    .dimension(VECTOR_DIM)
                    .build());
            collectionSchema.addField(AddFieldReq.builder()
                    .fieldName(JSON_FIELD)
                    .dataType(DataType.JSON)
                    .build());

            List<IndexParam> indexes = new ArrayList<>();
            indexes.add(IndexParam.builder()
                    .fieldName(VECTOR_FIELD)
                    .indexType(IndexParam.IndexType.FLAT)
                    .metricType(IndexParam.MetricType.COSINE)
                    .build());

            CreateCollectionReq requestCreate = CreateCollectionReq.builder()
                    .collectionName(COLLECTION_NAME)
                    .collectionSchema(collectionSchema)
                    .indexParams(indexes)
                    .consistencyLevel(ConsistencyLevel.BOUNDED)
                    .build();
            client.createCollection(requestCreate);
            System.out.println("Collection created");

            // Insert rows
            Gson gson = new Gson();
            int count = 10000;
            for (int i = 0; i < count; i++) {
                JsonObject row = new JsonObject();
                row.addProperty(ID_FIELD, i);
                row.add(VECTOR_FIELD, gson.toJsonTree(generateFloatVector(VECTOR_DIM)));

                JsonObject metadata = new JsonObject();
                metadata.addProperty("path", String.format("\\root/abc/path%d", i));
                metadata.addProperty("size", i);
                if (i % 7 == 0) {
                    metadata.addProperty("special", true);
                }
                metadata.add("flags", gson.toJsonTree(Arrays.asList(i, i + 1, i + 2)));
                row.add(JSON_FIELD, metadata);
                System.out.println(metadata);

                client.insert(InsertReq.builder()
                        .collectionName(COLLECTION_NAME)
                        .data(Collections.singletonList(row))
                        .build());
            }
            System.out.println(String.format("% rows inserted", count));
        }

        List<BaseVector> vectors = new ArrayList<>();
        vectors.add(new FloatVec(generateFloatVector(VECTOR_DIM)));

        search(client, vectors, 20, 0);
        search(client, vectors, 10, 5);

        client.close();
    }
}

From the result, we can see when "limit=10, offset=5", the id list is from the No.5 of the result when "limit=20, offset=0"

Search with topk=20, offset=0
Search result ids:
[63, 78, 25, 52, 96, 0, 73, 13, 67, 76, 18, 95, 7, 45, 87, 69, 94, 99, 5, 27]
Search done

Search with topk=10, offset=5
Search result ids:
[0, 73, 13, 67, 76, 18, 95, 7, 45, 87]
Search done

yhmo avatar Jul 04 '25 10:07 yhmo

Already fixed in v2.5.11 and v2.6.1, close this issue.

yhmo avatar Aug 14 '25 08:08 yhmo