milvus
milvus copied to clipboard
[Bug]: Search data is less than expected for RAFT_IVF_FLAT index
Is there an existing issue for this?
- [X] I have searched the existing issues
Environment
- Milvus version:milvus master latest
- Deployment mode(standalone or cluster): both
- MQ type(rocksmq, pulsar or kafka): both
- SDK version(e.g. pymilvus v2.0.0rc2): pymilvus 2.3.0.dev48
- OS(Ubuntu or CentOS):
- CPU/Memory:
- GPU:
- Others:
Current Behavior
Search data is less than expected for RAFT_IVF_FLAT index (dim=1)
[2023-03-22T10:55:46.840Z] index = 'RAFT_IVF_FLAT', params = {'nlist': 128}, auto_id = True, _async = False
[2023-03-22T10:55:46.840Z]
[2023-03-22T10:55:46.840Z] @pytest.mark.tags(CaseLabel.GPU)
[2023-03-22T10:55:46.840Z] @pytest.mark.parametrize("index, params",
[2023-03-22T10:55:46.840Z] zip(ct.all_index_types[9:14],
[2023-03-22T10:55:46.840Z] ct.default_index_params[9:14]))
[2023-03-22T10:55:46.840Z] def test_search_after_different_index_with_min_dim_gpu(self, index, params, auto_id, _async):
[2023-03-22T10:55:46.840Z] """
[2023-03-22T10:55:46.840Z] target: test search after different index with min dim
[2023-03-22T10:55:46.840Z] method: test search after different index and corresponding search params with dim = 1
[2023-03-22T10:55:46.840Z] expected: search successfully with limit(topK)
[2023-03-22T10:55:46.840Z] """
[2023-03-22T10:55:46.840Z] # 1. initialize with data
[2023-03-22T10:55:46.840Z] collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 5000,
[2023-03-22T10:55:46.840Z] partition_num=1,
[2023-03-22T10:55:46.840Z] auto_id=auto_id,
[2023-03-22T10:55:46.840Z] dim=min_dim, is_index=False)[0:5]
[2023-03-22T10:55:46.840Z] # 2. create index and load
[2023-03-22T10:55:46.840Z] if params.get("m"):
[2023-03-22T10:55:46.840Z] params["m"] = min_dim
[2023-03-22T10:55:46.840Z] if params.get("PQM"):
[2023-03-22T10:55:46.840Z] params["PQM"] = min_dim
[2023-03-22T10:55:46.840Z] default_index = {"index_type": index, "params": params, "metric_type": "L2"}
[2023-03-22T10:55:46.840Z] collection_w.create_index("float_vector", default_index)
[2023-03-22T10:55:46.840Z] collection_w.load()
[2023-03-22T10:55:46.840Z] # 3. search
[2023-03-22T10:55:46.840Z] search_params = cf.gen_search_param(index)
[2023-03-22T10:55:46.840Z] vectors = [[random.random() for _ in range(min_dim)] for _ in range(default_nq)]
[2023-03-22T10:55:46.840Z] for search_param in search_params:
[2023-03-22T10:55:46.840Z] log.info("Searching with search params: {}".format(search_param))
[2023-03-22T10:55:46.840Z] > collection_w.search(vectors[:default_nq], default_search_field,
[2023-03-22T10:55:46.840Z] search_param, default_limit,
[2023-03-22T10:55:46.840Z] default_search_exp, _async=_async,
[2023-03-22T10:55:46.840Z] travel_timestamp=0,
[2023-03-22T10:55:46.840Z] check_task=CheckTasks.check_search_results,
[2023-03-22T10:55:46.840Z] check_items={"nq": default_nq,
[2023-03-22T10:55:46.840Z] "ids": insert_ids,
[2023-03-22T10:55:46.840Z] "limit": default_limit,
[2023-03-22T10:55:46.840Z] "_async": _async})
[2023-03-22T10:55:46.840Z]
[2023-03-22T10:55:46.840Z] testcases/test_search.py:1878:
[2023-03-22T10:55:46.840Z] _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
[2023-03-22T10:55:46.840Z] utils/wrapper.py:33: in inner_wrapper
[2023-03-22T10:55:46.840Z] res, result = func(*args, **kwargs)
[2023-03-22T10:55:46.840Z] base/collection_wrapper.py:168: in search
[2023-03-22T10:55:46.840Z] check_result = ResponseChecker(res, func_name, check_task, check_items, check,
[2023-03-22T10:55:46.840Z] check/func_check.py:57: in run
[2023-03-22T10:55:46.840Z] result = self.check_search_results(self.response, self.func_name, self.check_items)
[2023-03-22T10:55:46.840Z] _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
[2023-03-22T10:55:46.840Z]
[2023-03-22T10:55:46.840Z] search_res = <pymilvus.orm.search.SearchResult object at 0x7f20bec7ed60>
[2023-03-22T10:55:46.840Z] func_name = 'search'
[2023-03-22T10:55:46.840Z] check_items = {'_async': False, 'ids': [440266085618393944, 440266085618393945, 440266085618393946, 440266085618393947, 440266085618393948, 440266085618393949, ...], 'limit': 10, 'nq': 2}
[2023-03-22T10:55:46.840Z]
[2023-03-22T10:55:46.840Z] @staticmethod
[2023-03-22T10:55:46.840Z] def check_search_results(search_res, func_name, check_items):
[2023-03-22T10:55:46.840Z] """
[2023-03-22T10:55:46.840Z] target: check the search results
[2023-03-22T10:55:46.840Z] method: 1. check the query number
[2023-03-22T10:55:46.840Z] 2. check the limit(topK) and ids
[2023-03-22T10:55:46.840Z] 3. check the distance
[2023-03-22T10:55:46.840Z] expected: check the search is ok
[2023-03-22T10:55:46.840Z] """
[2023-03-22T10:55:46.840Z] log.info("search_results_check: checking the searching results")
[2023-03-22T10:55:46.840Z] if func_name != 'search':
[2023-03-22T10:55:46.840Z] log.warning("The function name is {} rather than {}".format(func_name, "search"))
[2023-03-22T10:55:46.840Z] if len(check_items) == 0:
[2023-03-22T10:55:46.840Z] raise Exception("No expect values found in the check task")
[2023-03-22T10:55:46.840Z] if check_items.get("_async", None):
[2023-03-22T10:55:46.840Z] if check_items["_async"]:
[2023-03-22T10:55:46.840Z] search_res.done()
[2023-03-22T10:55:46.840Z] search_res = search_res.result()
[2023-03-22T10:55:46.840Z] if len(search_res) != check_items["nq"]:
[2023-03-22T10:55:46.840Z] log.error("search_results_check: Numbers of query searched (%d) "
[2023-03-22T10:55:46.840Z] "is not equal with expected (%d)"
[2023-03-22T10:55:46.840Z] % (len(search_res), check_items["nq"]))
[2023-03-22T10:55:46.840Z] assert len(search_res) == check_items["nq"]
[2023-03-22T10:55:46.841Z] else:
[2023-03-22T10:55:46.841Z] log.info("search_results_check: Numbers of query searched is correct")
[2023-03-22T10:55:46.841Z] for hits in search_res:
[2023-03-22T10:55:46.841Z] if (len(hits) != check_items["limit"]) \
[2023-03-22T10:55:46.841Z] or (len(hits.ids) != check_items["limit"]):
[2023-03-22T10:55:46.841Z] log.error("search_results_check: limit(topK) searched (%d) "
[2023-03-22T10:55:46.841Z] "is not equal with expected (%d)"
[2023-03-22T10:55:46.841Z] % (len(hits), check_items["limit"]))
[2023-03-22T10:55:46.841Z] > assert len(hits) == check_items["limit"]
[2023-03-22T10:55:46.841Z] E AssertionError
Expected Behavior
Search data is equal with expected for RAFT_IVF_FLAT index
dim = 1
Steps To Reproduce
https://jenkins.milvus.io:18080/blue/organizations/jenkins/milvus-gpu-ci/detail/PR-22900/3/pipeline/145
@pytest.mark.tags(CaseLabel.GPU)
@pytest.mark.parametrize("index, params",
zip(ct.all_index_types[9:14],
ct.default_index_params[9:14]))
def test_search_after_different_index_with_min_dim_gpu(self, index, params, auto_id, _async):
"""
target: test search after different index with min dim
method: test search after different index and corresponding search params with dim = 1
expected: search successfully with limit(topK)
"""
# 1. initialize with data
collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 5000,
partition_num=1,
auto_id=auto_id,
dim=min_dim, is_index=False)[0:5]
# 2. create index and load
if params.get("m"):
params["m"] = min_dim
if params.get("PQM"):
params["PQM"] = min_dim
default_index = {"index_type": index, "params": params, "metric_type": "L2"}
collection_w.create_index("float_vector", default_index)
collection_w.load()
# 3. search
search_params = cf.gen_search_param(index)
vectors = [[random.random() for _ in range(min_dim)] for _ in range(default_nq)]
for search_param in search_params:
log.info("Searching with search params: {}".format(search_param))
collection_w.search(vectors[:default_nq], default_search_field,
search_param, default_limit,
default_search_exp, _async=_async,
travel_timestamp=0,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"ids": insert_ids,
"limit": default_limit,
"_async": _async})
Milvus Log
artifacts-milvus-distributed-PR-22900-3-pymilvus-e2e-logs.tar.gz
Anything else?
- limit(topK) searched (4) is not equal with expected (10) (func_check.py:253)
- collection name: search_collection_zARq99bB
"RAFT_IVF_PQ" also has this issue: test_search_after_different_index_with_min_dim_gpu search_results_check: limit(topK) searched (1) is not equal with expected (10) (func_check.py:253)
I think dim=1 may be the reason caused the issue, other cases are good.
Assert "failed to search, not match the error type in knowhere" at /home/caiyd/vec/milvus/internal/core/src/index/VectorMemIndex.cpp:116
[2023/03/24 17:50:48.512 +08:00] [DEBUG] [querynode/segment.go:337] [tr/cgoSearch] [traceID=f03effdc4742fcdc816ebafd3c5e9c80] [msg="finish cgoSearch"] [duration=2.328878ms]
[2023/03/24 17:50:48.512 +08:00] [WARN] [querynode/cgo_helper.go:57] ["Search failed, C Runtime Exception: [UnexpectedError] Assert \"failed to search, not match the error type in knowhere\" at /home/caiyd/vec/milvus/internal/core/src/index/VectorMemIndex.cpp:116\n\n"]
2023-03-24 17:50:48,513 | WARNING | default | [KNOWHERE][Search][milvus] RAFT inner error, cuBLAS error encountered at: file=/home/caiyd/vec/milvus/cmake_build/3rdparty_download/raft-src/cpp/include/raft/linalg/detail/cublas_wrappers.hpp line=299: call='cublasSetStream(handle, stream)', Reason=1:CUBLAS_STATUS_NOT_INITIALIZED
Obtained 14 stack frames
#0 in /home/caiyd/vec/milvus/internal/core/output/lib/libknowhere.so(_ZN4raft9exception18collect_call_stackEv+0x52) [0x7f0c509cfff2]
#1 in /home/caiyd/vec/milvus/internal/core/output/lib/libknowhere.so(_ZN4raft12cublas_errorC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE+0xd1) [0x7f0c509d0a71]
#2 in /home/caiyd/vec/milvus/internal/core/output/lib/libknowhere.so(_ZN4raft6linalg6detail4gemmIfLb0EEEvRKNS_16device_resourcesEbbiiiPKT_S8_iS8_iS8_PS6_iP11CUstream_st+0x6e8) [0x7f0c50a237f8]
#3 in /home/caiyd/vec/milvus/internal/core/output/lib/libknowhere.so(_ZN4raft7spatial3knn8ivf_flat6detail11search_implIfflEEvRKNS_16device_resourcesERKNS_9neighbors8ivf_flat5indexIT_T1_EEPKSB_jjjbPSC_PT0_PN3rmm2mr22device_memory_resourceE+0x453) [0x7f0c50b36d53]
#4 in /home/caiyd/vec/milvus/internal/core/output/lib/libknowhere.so(_ZNK8knowhere16RaftIvfIndexNodeIN4raft9neighbors8ivf_flat5indexIflEEE6SearchERKNS_7DataSetERKNS_6ConfigERKNS_10BitsetViewE+0xc0f) [0x7f0c50b382df]
#5 in /home/caiyd/vec/milvus/internal/core/output/lib/libknowhere.so(_ZNSt17_Function_handlerIFSt10unique_ptrINSt13__future_base12_Result_baseENS2_8_DeleterEEvENS1_12_Task_setterIS0_INS1_7_ResultIN8knowhere8expectedISt10shared_ptrINS8_7DataSetEENS8_6StatusEEEEES3_EZNS1_11_Task_stateIZNS8_10ThreadPool4pushIZNKS8_26IndexNodeThreadPoolWrapper6SearchERKSB_RKNS8_6ConfigERKNS8_10BitsetViewEEUlvE_JEEESt6futureIDTclfp_spfp0_EEEOT_DpOT0_EUliE_SaIiEFSE_iEE6_M_runEOiEUlvE_SE_EEE9_M_invokeERKSt9_Any_data+0x54) [0x7f0c509a44e4]
#6 in /home/caiyd/vec/milvus/internal/core/output/lib/librocksdb.so.6(_ZNSt13__future_base13_State_baseV29_M_do_setEPSt8functionIFSt10unique_ptrINS_12_Result_baseENS3_8_DeleterEEvEEPb+0x32) [0x7f0c6947e202]
#7 in /lib/x86_64-linux-gnu/libpthread.so.0(+0x114df) [0x7f0c6970b4df]
#8 in /home/caiyd/vec/milvus/internal/core/output/lib/libknowhere.so(_ZNSt13__future_base11_Task_stateIZN8knowhere10ThreadPool4pushIZNKS1_26IndexNodeThreadPoolWrapper6SearchERKNS1_7DataSetERKNS1_6ConfigERKNS1_10BitsetViewEEUlvE_JEEESt6futureIDTclfp_spfp0_EEEOT_DpOT0_EUliE_SaIiEFNS1_8expectedISt10shared_ptrIS5_ENS1_6StatusEEEiEE6_M_runEOi+0x118) [0x7f0c5096b978]
#9 in /home/caiyd/vec/milvus/internal/core/output/lib/libknowhere.so(_ZNSt17_Function_handlerIFviEZN4ctpl11thread_pool4pushIZN8knowhere10ThreadPool4pushIZNKS4_26IndexNodeThreadPoolWrapper6SearchERKNS4_7DataSetERKNS4_6ConfigERKNS4_10BitsetViewEEUlvE_JEEESt6futureIDTclfp_spfp0_EEEOT_DpOT0_EUliE_EESI_IDTclfp_Li0EEEESM_EUliE_E9_M_invokeERKSt9_Any_dataOi+0x37) [0x7f0c5096b837]
#10 in /home/caiyd/vec/milvus/internal/core/output/lib/libknowhere.so(_ZZN4ctpl11thread_pool10set_threadEiENKUlvE_clEv+0xbd) [0x7f0c5083468d]
#11 in /lib/x86_64-linux-gnu/libstdc++.so.6(+0xd6de4) [0x7f0c67673de4]
#12 in /lib/x86_64-linux-gnu/libpthread.so.0(+0x8609) [0x7f0c69702609]
#13 in /lib/x86_64-linux-gnu/libc.so.6(clone+0x43) [0x7f0c6789e133]
/assign @Presburger
/unassign
This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.
Rotten issues close after 30d of inactivity. Reopen the issue with /reopen
.
is this fixed?
is this fixed?
No, this issue still exists. https://jenkins.milvus.io:18080/blue/organizations/jenkins/milvus-gpu-ci/detail/PR-23642/8/pipeline
Fixed and closed.