Vector Database Setup Script for Milvus: Data Import and Export
I need a script template to handle data import and export in Milvus. Can you provide a complete example?
This template provides a basic structure for importing and exporting data in Milvus. Adapt it to your specific needs.
from pymilvus import connections, Collection, utility
import numpy as np
# Milvus connection parameters
host = 'localhost'
port = '19530'
collection_name = 'my_collection'
# Connect to Milvus
connections.connect(host=host, port=port)
# Check if the collection exists; create if it doesn't
if not utility.has_collection(collection_name):
from pymilvus import FieldSchema, CollectionSchema, DataType, MetricType, IndexType
fields = [
FieldSchema(name='id', dtype=DataType.INT64, is_primary=True, auto_id=False),
FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, dim=128)
]
schema = CollectionSchema(fields=fields, description='My collection')
collection = Collection(name=collection_name, schema=schema)
index_params = {
'metric_type': MetricType.L2,
'index_type': IndexType.IVF_FLAT,
'params': {'nlist': 1024}
}
collection.create_index(field_name='embedding', index_params=index_params)
else:
collection = Collection(collection_name)
# Generate sample data (replace with your actual data loading)
def generate_data(num_vectors, dim):
ids = np.arange(num_vectors)
embeddings = np.random.rand(num_vectors, dim).astype(np.float32)
return [ids, embeddings]
num_vectors_to_insert = 1000
data_to_insert = generate_data(num_vectors_to_insert, 128)
# Insert data
collection.insert(data_to_insert)
# Flush data to make it searchable
collection.flush()
# Load the collection into memory
collection.load()
print(f"Inserted {num_vectors_to_insert} vectors into {collection_name}.")
from pymilvus import connections, Collection
import pandas as pd
# Milvus connection parameters
host = 'localhost'
port = '19530'
collection_name = 'my_collection'
# Connect to Milvus
connections.connect(host=host, port=port)
# Load the collection
collection = Collection(collection_name)
collection.load()
# Get all data from the collection (be mindful of memory usage for large collections)
data = collection.query(expr='id >= 0', output_fields=['id', 'embedding'])
# Convert to Pandas DataFrame
df = pd.DataFrame(data)
# Save to CSV (or any other format you prefer)
df.to_csv('milvus_data_export.csv', index=False)
print(f"Exported {len(df)} rows to milvus_data_export.csv")
# Remember to disconnect if needed (optional)
# connections.disconnect("default")
pymilvus and pandas installed (pip install pymilvus pandas).Know the answer? Login to help.
Login to Answer