qdrant-client
qdrant-client copied to clipboard
Cant upload bulk data to local qdrant client.
def __init__(self, dataset_path="movies_metadata.csv", sample_size=1000):
# Connect to Qdrant Cloud
self.client = QdrantClient(
url=os.getenv("QDRANT_URL_LOCAL"),
timeout=300
)
self.model = SentenceTransformer('all-MiniLM-L6-v2', device='mps') # Adjust device if needed
self.collection_name = "movies_large"
# Load and preprocess movie dataset
self._load_dataset(dataset_path, sample_size)
self._initialize_collection()
def _load_dataset(self, dataset_path, sample_size=1000):
"""Load and sample movie data from CSV."""
def _initialize_collection(self):
"""Create or reset the collection and upload data."""
if self.client.collection_exists(self.collection_name):
self.client.delete_collection(self.collection_name)
print(f"Collection '{self.collection_name}' deleted.")
self.client.create_collection(
collection_name=self.collection_name,
vectors_config=VectorParams(size=384, distance=Distance.COSINE),
optimizers_config=models.OptimizersConfigDiff(
indexing_threshold=0,
),
shard_number=2,
)
points = [
PointStruct(
id=idx,
vector=vector,
payload={"title": movie["title"], "description": movie["description"]}
)
for idx, (vector, movie) in enumerate(zip(self.vectors, self.movies))
]
self.client.upsert(
collection_name=self.collection_name,
points=points,
)
print(f"Collection '{self.collection_name}' initialized with {len(points)} points.")
When I uploaded 1,000 points, it worked fine. But with 10,000 points, it gets stuck and doesn’t send the data to the client. I checked the bulk upload documentation but didn’t find anything helpful. My device has plenty of free RAM, so I don’t think it’s a memory issue.
hi @RaihanulHaque
it's better to split your data in chunks
you can do it manually and write a loop with upsert on your own or use convenience methods like upload_collection or upload_points