cloud-vectordb-examples
cloud-vectordb-examples copied to clipboard
Unable to insert the vector data in the collection created in the cloud when using with Spring AI
I am able to create the collection properly and search. Able to insert via search API . Here is the code snipped thats trying to insert the data ` package com.learn.openai.demo.service.config;
import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.document.Document;
import org.springframework.ai.embedding.EmbeddingModel;
import org.springframework.ai.reader.tika.TikaDocumentReader;
import org.springframework.ai.transformer.splitter.TextSplitter;
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import org.springframework.ai.vectorstore.milvus.MilvusVectorStore;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import io.milvus.client.MilvusServiceClient;
import io.milvus.param.ConnectParam;
import io.milvus.param.collection.CreateCollectionParam;
import io.milvus.param.collection.FieldType;
import io.milvus.param.collection.LoadCollectionParam;
import io.milvus.param.index.CreateIndexParam;
import io.milvus.grpc.DataType;
import java.util.List;
@Slf4j
@Configuration
public class MilvusVectorStoreConfig {
@Value("${spring.ai.vectorstore.milvus.client.host}")
private String host;
@Value("${spring.ai.vectorstore.milvus.client.port}")
private int port;
@Value("${spring.ai.vectorstore.milvus.client.uri}")
private String uri;
@Value("${spring.ai.vectorstore.milvus.client.token}")
private String token;
@Value("${spring.ai.vectorstore.milvus.collection-name}")
private String collectionName;
@Value("${spring.ai.vectorstore.milvus.metric-type}")
private String metricType;
@Bean
public MilvusServiceClient milvusServiceClient() {
ConnectParam connectParam = ConnectParam.newBuilder()
.withHost(host)
.withPort(port)
.withUri(uri)
.withToken(token)
// .withDatabaseName("proj-5670b6f942143eb0c19ad7")
.build();
return new MilvusServiceClient(connectParam);
}
@Bean
public MilvusVectorStore milvusVectorStore(MilvusServiceClient milvusServiceClient, EmbeddingModel embeddingModel, VectorStoreProperties vectorStoreProperties) {
// Get the actual embedding dimension from the model
int actualDimension = getEmbeddingDimension(embeddingModel);
log.info("Detected embedding dimension: {}", actualDimension);
// Create collection if it doesn't exist
boolean wasCreated = createCollectionIfNotExists(milvusServiceClient, actualDimension);
MilvusVectorStore store = MilvusVectorStore.builder(milvusServiceClient, embeddingModel)
.collectionName(collectionName)
.build();
// Load the collection into memory
loadCollection(milvusServiceClient);
// Check if collection has data by trying a similarity search
boolean collectionHasData = checkCollectionHasData(store);
if (collectionHasData) {
log.info("Milvus collection '{}' already has data, skipping data loading", collectionName);
} else {
log.info("Milvus collection '{}' is empty, loading movie data...", collectionName);
loadMovieData(store, vectorStoreProperties);
}
return store;
}
private int getEmbeddingDimension(EmbeddingModel embeddingModel) {
try {
// Generate a test embedding to get the dimension
float[] testEmbedding = embeddingModel.embed("test");
return testEmbedding.length;
} catch (Exception e) {
log.warn("Could not detect embedding dimension from model, using default 1536", e);
return 1536; // Default to 1536 (common for OpenAI models)
}
}
private boolean createCollectionIfNotExists(MilvusServiceClient milvusServiceClient, int dimension) {
try {
// Check if collection exists
boolean exists = milvusServiceClient.hasCollection(
io.milvus.param.collection.HasCollectionParam.newBuilder()
.withCollectionName(collectionName)
.build()
).getData();
if (!exists) {
log.info("Creating Milvus collection: {} with dimension: {}", collectionName, dimension);
// Create collection with all required fields for Spring AI
CreateCollectionParam createCollectionParam = CreateCollectionParam.newBuilder()
.withCollectionName(collectionName)
.withDescription("Movie embeddings collection")
.addFieldType(FieldType.newBuilder()
.withName("id")
.withDataType(DataType.Int64)
.withPrimaryKey(true)
.withAutoID(true)
.build())
.addFieldType(FieldType.newBuilder()
.withName("doc_id")
.withDataType(DataType.VarChar)
.withMaxLength(65535)
.build())
.addFieldType(FieldType.newBuilder()
.withName("content")
.withDataType(DataType.VarChar)
.withMaxLength(65535)
.build())
.addFieldType(FieldType.newBuilder()
.withName("metadata")
.withDataType(DataType.JSON)
.build())
.addFieldType(FieldType.newBuilder()
.withName("embedding")
.withDataType(DataType.FloatVector)
.withDimension(dimension)
.build())
.build();
milvusServiceClient.createCollection(createCollectionParam);
// Create index on embedding field
CreateIndexParam createIndexParam = CreateIndexParam.newBuilder()
.withCollectionName(collectionName)
.withFieldName("embedding")
.withIndexType(io.milvus.param.IndexType.IVF_FLAT)
.withMetricType(io.milvus.param.MetricType.valueOf(metricType))
.withExtraParam("{\"nlist\":1024}")
.build();
milvusServiceClient.createIndex(createIndexParam);
log.info("Successfully created collection and index: {}", collectionName);
return true; // Collection was created
} else {
log.info("Collection '{}' already exists", collectionName);
return false; // Collection already existed
}
} catch (Exception e) {
log.error("Error creating collection: {}", e.getMessage(), e);
throw new RuntimeException("Failed to create Milvus collection", e);
}
}
private void loadCollection(MilvusServiceClient milvusServiceClient) {
try {
log.info("Loading collection '{}' into memory...", collectionName);
LoadCollectionParam loadCollectionParam = LoadCollectionParam.newBuilder()
.withCollectionName(collectionName)
.build();
milvusServiceClient.loadCollection(loadCollectionParam);
log.info("Successfully loaded collection '{}' into memory", collectionName);
} catch (Exception e) {
log.error("Error loading collection: {}", e.getMessage(), e);
throw new RuntimeException("Failed to load Milvus collection", e);
}
}
private boolean checkCollectionHasData(MilvusVectorStore store) {
try {
// Try to perform a simple similarity search to check if collection has data
List<Document> results = store.similaritySearch("Movie");
return !results.isEmpty();
} catch (Exception e) {
log.info("Collection appears to be empty: {}", e.getMessage());
return false;
}
}
private void loadMovieData(MilvusVectorStore store, VectorStoreProperties vectorStoreProperties) {
vectorStoreProperties.getDocumentsToLoad().forEach(document -> {
log.info("Loading document: {}", document);
try {
TikaDocumentReader documentReader = new TikaDocumentReader(document);
List<Document> documents = documentReader.get();
TextSplitter textSplitter = new TokenTextSplitter();
List<Document> splitDocs = textSplitter.apply(documents);
if (!splitDocs.isEmpty()) {
log.info("First document to insert: {}", splitDocs.get(0));
}
store.add(splitDocs);
log.info("Added {} split documents to Milvus", splitDocs.size());
// Rate limiting to avoid overwhelming the embedding API
Thread.sleep(1000);
} catch (Exception e) {
log.error("Error loading document: {}", document, e);
throw new RuntimeException("Failed to load document: " + document, e);
}
});
log.info("Successfully loaded all movie data into Milvus collection: {}", collectionName);
}
} `
build.gradle dependency for spring milvus -->
implementation 'org.springframework.ai:spring-ai-starter-vector-store-milvus'
I am happy to share more data as needed.