cloud-vectordb-examples icon indicating copy to clipboard operation
cloud-vectordb-examples copied to clipboard

Unable to insert the vector data in the collection created in the cloud when using with Spring AI

Open codefarm0 opened this issue 6 months ago • 1 comments

I am able to create the collection properly and search. Able to insert via search API . Here is the code snipped thats trying to insert the data ` package com.learn.openai.demo.service.config;

import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.document.Document;
import org.springframework.ai.embedding.EmbeddingModel;
import org.springframework.ai.reader.tika.TikaDocumentReader;
import org.springframework.ai.transformer.splitter.TextSplitter;
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import org.springframework.ai.vectorstore.milvus.MilvusVectorStore;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import io.milvus.client.MilvusServiceClient;
import io.milvus.param.ConnectParam;
import io.milvus.param.collection.CreateCollectionParam;
import io.milvus.param.collection.FieldType;
import io.milvus.param.collection.LoadCollectionParam;
import io.milvus.param.index.CreateIndexParam;
import io.milvus.grpc.DataType;

import java.util.List;

@Slf4j
@Configuration
public class MilvusVectorStoreConfig {

@Value("${spring.ai.vectorstore.milvus.client.host}")
private String host;

@Value("${spring.ai.vectorstore.milvus.client.port}")
private int port;

@Value("${spring.ai.vectorstore.milvus.client.uri}")
private String uri;

@Value("${spring.ai.vectorstore.milvus.client.token}")
private String token;

@Value("${spring.ai.vectorstore.milvus.collection-name}")
private String collectionName;

@Value("${spring.ai.vectorstore.milvus.metric-type}")
private String metricType;

@Bean
public MilvusServiceClient milvusServiceClient() {
    ConnectParam connectParam = ConnectParam.newBuilder()
            .withHost(host)
            .withPort(port)
            .withUri(uri)
            .withToken(token)
//                .withDatabaseName("proj-5670b6f942143eb0c19ad7")
            .build();
    return new MilvusServiceClient(connectParam);
}

@Bean
public MilvusVectorStore milvusVectorStore(MilvusServiceClient milvusServiceClient, EmbeddingModel embeddingModel,     VectorStoreProperties vectorStoreProperties) {
    // Get the actual embedding dimension from the model
    int actualDimension = getEmbeddingDimension(embeddingModel);
    log.info("Detected embedding dimension: {}", actualDimension);
    
    // Create collection if it doesn't exist
    boolean wasCreated = createCollectionIfNotExists(milvusServiceClient, actualDimension);
    
    MilvusVectorStore store = MilvusVectorStore.builder(milvusServiceClient, embeddingModel)
            .collectionName(collectionName)
            .build();

    // Load the collection into memory
    loadCollection(milvusServiceClient);

    // Check if collection has data by trying a similarity search
    boolean collectionHasData = checkCollectionHasData(store);
    
    if (collectionHasData) {
        log.info("Milvus collection '{}' already has data, skipping data loading", collectionName);
    } else {
        log.info("Milvus collection '{}' is empty, loading movie data...", collectionName);
        loadMovieData(store, vectorStoreProperties);
    }
    
    return store;
}

private int getEmbeddingDimension(EmbeddingModel embeddingModel) {
    try {
        // Generate a test embedding to get the dimension
        float[] testEmbedding = embeddingModel.embed("test");
        return testEmbedding.length;
    } catch (Exception e) {
        log.warn("Could not detect embedding dimension from model, using default 1536", e);
        return 1536; // Default to 1536 (common for OpenAI models)
    }
}

private boolean createCollectionIfNotExists(MilvusServiceClient milvusServiceClient, int dimension) {
    try {
        // Check if collection exists
        boolean exists = milvusServiceClient.hasCollection(
            io.milvus.param.collection.HasCollectionParam.newBuilder()
                .withCollectionName(collectionName)
                .build()
        ).getData();
        
        if (!exists) {
            log.info("Creating Milvus collection: {} with dimension: {}", collectionName, dimension);
            
            // Create collection with all required fields for Spring AI
            CreateCollectionParam createCollectionParam = CreateCollectionParam.newBuilder()
                .withCollectionName(collectionName)
                .withDescription("Movie embeddings collection")
                .addFieldType(FieldType.newBuilder()
                    .withName("id")
                    .withDataType(DataType.Int64)
                    .withPrimaryKey(true)
                    .withAutoID(true)
                    .build())
                .addFieldType(FieldType.newBuilder()
                    .withName("doc_id")
                    .withDataType(DataType.VarChar)
                    .withMaxLength(65535)
                    .build())
                .addFieldType(FieldType.newBuilder()
                    .withName("content")
                    .withDataType(DataType.VarChar)
                    .withMaxLength(65535)
                    .build())
                .addFieldType(FieldType.newBuilder()
                    .withName("metadata")
                    .withDataType(DataType.JSON)
                    .build())
                .addFieldType(FieldType.newBuilder()
                    .withName("embedding")
                    .withDataType(DataType.FloatVector)
                    .withDimension(dimension)
                    .build())
                .build();
            
            milvusServiceClient.createCollection(createCollectionParam);
            
            // Create index on embedding field
            CreateIndexParam createIndexParam = CreateIndexParam.newBuilder()
                .withCollectionName(collectionName)
                .withFieldName("embedding")
                .withIndexType(io.milvus.param.IndexType.IVF_FLAT)
                .withMetricType(io.milvus.param.MetricType.valueOf(metricType))
                .withExtraParam("{\"nlist\":1024}")
                .build();
            
            milvusServiceClient.createIndex(createIndexParam);
            
            log.info("Successfully created collection and index: {}", collectionName);
            return true; // Collection was created
        } else {
            log.info("Collection '{}' already exists", collectionName);
            return false; // Collection already existed
        }
        
    } catch (Exception e) {
        log.error("Error creating collection: {}", e.getMessage(), e);
        throw new RuntimeException("Failed to create Milvus collection", e);
    }
}

private void loadCollection(MilvusServiceClient milvusServiceClient) {
    try {
        log.info("Loading collection '{}' into memory...", collectionName);
        
        LoadCollectionParam loadCollectionParam = LoadCollectionParam.newBuilder()
            .withCollectionName(collectionName)
            .build();
        
        milvusServiceClient.loadCollection(loadCollectionParam);
        
        log.info("Successfully loaded collection '{}' into memory", collectionName);
        
    } catch (Exception e) {
        log.error("Error loading collection: {}", e.getMessage(), e);
        throw new RuntimeException("Failed to load Milvus collection", e);
    }
}

private boolean checkCollectionHasData(MilvusVectorStore store) {
    try {
        // Try to perform a simple similarity search to check if collection has data
        List<Document> results = store.similaritySearch("Movie");
        return !results.isEmpty();
    } catch (Exception e) {
        log.info("Collection appears to be empty: {}", e.getMessage());
        return false;
    }
}

private void loadMovieData(MilvusVectorStore store, VectorStoreProperties vectorStoreProperties) {
    vectorStoreProperties.getDocumentsToLoad().forEach(document -> {
        log.info("Loading document: {}", document);
        
        try {
            TikaDocumentReader documentReader = new TikaDocumentReader(document);
            List<Document> documents = documentReader.get();
            TextSplitter textSplitter = new TokenTextSplitter();
            List<Document> splitDocs = textSplitter.apply(documents);
            if (!splitDocs.isEmpty()) {
                log.info("First document to insert: {}", splitDocs.get(0));
            }
            store.add(splitDocs);
            
            log.info("Added {} split documents to Milvus", splitDocs.size());
            
            // Rate limiting to avoid overwhelming the embedding API
            Thread.sleep(1000);
            
        } catch (Exception e) {
            log.error("Error loading document: {}", document, e);
            throw new RuntimeException("Failed to load document: " + document, e);
        }
    });
    
    log.info("Successfully loaded all movie data into Milvus collection: {}", collectionName);
}
} `

build.gradle dependency for spring milvus -->

implementation 'org.springframework.ai:spring-ai-starter-vector-store-milvus'

I am happy to share more data as needed.

codefarm0 avatar Jul 02 '25 15:07 codefarm0