mirror of
https://github.com/langgenius/dify-docs.git
synced 2026-03-26 13:18:34 +07:00
Update the weaviate v4 migration docs and script (#616)
* updated docs and migration script * update the dify version, add the migration script link, and improve formatting --------- Co-authored-by: DhruvGorasiya <Dhruv.Gorasiya@student.csulb.edu> Co-authored-by: kurokobo <2920259+kurokobo@users.noreply.github.com> Co-authored-by: Riskey <riskey47@dify.ai>
This commit is contained in:
@@ -1,15 +1,22 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Migration script to fix Weaviate schema incompatibility between 1.19.0 and 1.27.0+
|
||||
|
||||
This script:
|
||||
- Identifies collections with old schema (no vectorConfig)
|
||||
- Creates new collections with proper vectorConfig including "default" named vector
|
||||
- Migrates data using cursor-based pagination (efficient for large datasets)
|
||||
- Uses batch operations for fast inserts
|
||||
- Preserves all object properties and vectors
|
||||
Note:
|
||||
- This is a community-edited version of the draft of the script presented by the Dify Team.
|
||||
- This script is not officially supported by the Dify Team.
|
||||
- The original source for this script can be found at https://github.com/langgenius/dify/issues/27291#issuecomment-3501003678.
|
||||
- The changes made in this script are:
|
||||
- Retrieve Weaviate connection info from environment variables to make this script run in the Worker container.
|
||||
- Switch to cursor-based pagination in "replace_old_collection", since the migration could fail with large collections.
|
||||
- Fix an issue where both the old and new collections remained without being deleted after migrating an empty collection.
|
||||
"""
|
||||
|
||||
import os
|
||||
import weaviate
|
||||
from weaviate.classes.config import Configure, VectorDistances
|
||||
import sys
|
||||
@@ -17,62 +24,68 @@ import time
|
||||
from typing import List, Dict, Any
|
||||
|
||||
# Configuration
|
||||
WEAVIATE_HOST = "localhost"
|
||||
WEAVIATE_PORT = 8080
|
||||
WEAVIATE_GRPC_PORT = 50051
|
||||
WEAVIATE_API_KEY = "WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih"
|
||||
BATCH_SIZE = 100
|
||||
WEAVIATE_ENDPOINT = os.getenv("WEAVIATE_ENDPOINT", "http://weaviate:8080")
|
||||
WEAVIATE_GRPC_ENDPOINT = os.getenv("WEAVIATE_GRPC_ENDPOINT", "grpc://weaviate:50051")
|
||||
WEAVIATE_API_KEY = os.getenv("WEAVIATE_API_KEY", "WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih")
|
||||
BATCH_SIZE = 1000
|
||||
WEAVIATE_HOST = WEAVIATE_ENDPOINT.split("//")[-1].split(":")[0]
|
||||
WEAVIATE_PORT = int(WEAVIATE_ENDPOINT.split(":")[-1])
|
||||
WEAVIATE_GRPC_PORT = int(WEAVIATE_GRPC_ENDPOINT.split(":")[-1])
|
||||
|
||||
|
||||
def identify_old_collections(client: weaviate.WeaviateClient) -> List[str]:
|
||||
"""Identify collections that need migration (those without vectorConfig)"""
|
||||
collections_to_migrate = []
|
||||
|
||||
|
||||
all_collections = client.collections.list_all()
|
||||
print(f"Found {len(all_collections)} total collections")
|
||||
|
||||
|
||||
for collection_name in all_collections.keys():
|
||||
# Only check Vector_index collections (Dify knowledge bases)
|
||||
if not collection_name.startswith("Vector_index_"):
|
||||
continue
|
||||
|
||||
|
||||
collection = client.collections.get(collection_name)
|
||||
config = collection.config.get()
|
||||
|
||||
|
||||
# Check if this collection has the old schema
|
||||
if config.vector_config is None:
|
||||
collections_to_migrate.append(collection_name)
|
||||
print(f" - {collection_name}: OLD SCHEMA (needs migration)")
|
||||
else:
|
||||
print(f" - {collection_name}: NEW SCHEMA (skip)")
|
||||
|
||||
|
||||
return collections_to_migrate
|
||||
|
||||
|
||||
def get_collection_schema(client: weaviate.WeaviateClient, collection_name: str) -> Dict[str, Any]:
|
||||
def get_collection_schema(
|
||||
client: weaviate.WeaviateClient, collection_name: str
|
||||
) -> Dict[str, Any]:
|
||||
"""Get the full schema of a collection via REST API"""
|
||||
import requests
|
||||
|
||||
|
||||
response = requests.get(
|
||||
f"http://{WEAVIATE_HOST}:{WEAVIATE_PORT}/v1/schema/{collection_name}",
|
||||
headers={"Authorization": f"Bearer {WEAVIATE_API_KEY}"}
|
||||
headers={"Authorization": f"Bearer {WEAVIATE_API_KEY}"},
|
||||
)
|
||||
|
||||
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
else:
|
||||
raise Exception(f"Failed to get schema: {response.text}")
|
||||
|
||||
|
||||
def create_new_collection(client: weaviate.WeaviateClient, old_name: str, schema: Dict[str, Any]) -> str:
|
||||
def create_new_collection(
|
||||
client: weaviate.WeaviateClient, old_name: str, schema: Dict[str, Any]
|
||||
) -> str:
|
||||
"""Create a new collection with updated schema using REST API"""
|
||||
import requests
|
||||
|
||||
|
||||
# Generate new collection name
|
||||
new_name = f"{old_name}_migrated"
|
||||
|
||||
|
||||
print(f"Creating new collection: {new_name}")
|
||||
|
||||
|
||||
# Build new schema with proper vectorConfig
|
||||
# Note: When using vectorConfig (named vectors), we don't set class-level vectorizer
|
||||
new_schema = {
|
||||
@@ -81,91 +94,88 @@ def create_new_collection(client: weaviate.WeaviateClient, old_name: str, schema
|
||||
# Do NOT set class-level vectorizer when using vectorConfig
|
||||
"vectorConfig": {
|
||||
"default": {
|
||||
"vectorizer": {
|
||||
"none": {}
|
||||
},
|
||||
"vectorizer": {"none": {}},
|
||||
"vectorIndexType": "hnsw",
|
||||
"vectorIndexConfig": {
|
||||
"distance": "cosine",
|
||||
"ef": -1,
|
||||
"efConstruction": 128,
|
||||
"maxConnections": 32
|
||||
}
|
||||
"maxConnections": 32,
|
||||
},
|
||||
}
|
||||
},
|
||||
"properties": []
|
||||
"properties": [],
|
||||
}
|
||||
|
||||
|
||||
# Copy properties from old schema
|
||||
if "properties" in schema:
|
||||
new_schema["properties"] = schema["properties"]
|
||||
|
||||
|
||||
# Create collection via REST API
|
||||
response = requests.post(
|
||||
f"http://{WEAVIATE_HOST}:{WEAVIATE_PORT}/v1/schema",
|
||||
f"{WEAVIATE_ENDPOINT}/v1/schema",
|
||||
json=new_schema,
|
||||
headers={"Authorization": f"Bearer {WEAVIATE_API_KEY}"}
|
||||
headers={"Authorization": f"Bearer {WEAVIATE_API_KEY}"},
|
||||
)
|
||||
|
||||
|
||||
if response.status_code not in [200, 201]:
|
||||
raise Exception(f"Failed to create collection: {response.text}")
|
||||
|
||||
|
||||
print(f" Created new collection: {new_name}")
|
||||
return new_name
|
||||
|
||||
|
||||
def migrate_collection_data(
|
||||
client: weaviate.WeaviateClient,
|
||||
old_collection_name: str,
|
||||
new_collection_name: str
|
||||
client: weaviate.WeaviateClient, old_collection_name: str, new_collection_name: str
|
||||
) -> int:
|
||||
"""Migrate data from old collection to new collection using cursor-based pagination"""
|
||||
|
||||
|
||||
old_collection = client.collections.get(old_collection_name)
|
||||
new_collection = client.collections.get(new_collection_name)
|
||||
|
||||
|
||||
total_migrated = 0
|
||||
cursor = None
|
||||
|
||||
|
||||
print(f"Migrating data from {old_collection_name} to {new_collection_name}")
|
||||
|
||||
|
||||
while True:
|
||||
# Fetch batch of objects using cursor-based pagination
|
||||
if cursor is None:
|
||||
# First batch
|
||||
response = old_collection.query.fetch_objects(
|
||||
limit=BATCH_SIZE,
|
||||
include_vector=True
|
||||
limit=BATCH_SIZE, include_vector=True
|
||||
)
|
||||
else:
|
||||
# Subsequent batches using cursor
|
||||
response = old_collection.query.fetch_objects(
|
||||
limit=BATCH_SIZE,
|
||||
include_vector=True,
|
||||
after=cursor
|
||||
limit=BATCH_SIZE, include_vector=True, after=cursor
|
||||
)
|
||||
|
||||
|
||||
objects = response.objects
|
||||
|
||||
|
||||
if not objects:
|
||||
break
|
||||
|
||||
|
||||
# Use batch insert for efficiency
|
||||
with new_collection.batch.dynamic() as batch:
|
||||
for obj in objects:
|
||||
# Prepare properties
|
||||
properties = obj.properties
|
||||
|
||||
|
||||
# Add object with vector
|
||||
batch.add_object(
|
||||
properties=properties,
|
||||
vector=obj.vector["default"] if isinstance(obj.vector, dict) else obj.vector,
|
||||
uuid=obj.uuid
|
||||
vector=(
|
||||
obj.vector["default"]
|
||||
if isinstance(obj.vector, dict)
|
||||
else obj.vector
|
||||
),
|
||||
uuid=obj.uuid,
|
||||
)
|
||||
|
||||
|
||||
total_migrated += len(objects)
|
||||
print(f" Migrated {total_migrated} objects...")
|
||||
|
||||
|
||||
# Update cursor for next iteration
|
||||
if len(objects) < BATCH_SIZE:
|
||||
# Last batch
|
||||
@@ -173,36 +183,34 @@ def migrate_collection_data(
|
||||
else:
|
||||
# Get the last object's UUID for cursor
|
||||
cursor = objects[-1].uuid
|
||||
|
||||
|
||||
print(f" Total migrated: {total_migrated} objects")
|
||||
return total_migrated
|
||||
|
||||
|
||||
def verify_migration(
|
||||
client: weaviate.WeaviateClient,
|
||||
old_collection_name: str,
|
||||
new_collection_name: str
|
||||
client: weaviate.WeaviateClient, old_collection_name: str, new_collection_name: str
|
||||
):
|
||||
"""Verify that the migration was successful"""
|
||||
|
||||
|
||||
old_collection = client.collections.get(old_collection_name)
|
||||
new_collection = client.collections.get(new_collection_name)
|
||||
|
||||
|
||||
# Count objects in both collections
|
||||
old_count_response = old_collection.query.fetch_objects(limit=1)
|
||||
new_count_response = new_collection.query.fetch_objects(limit=1)
|
||||
|
||||
|
||||
# Get aggregation for accurate counts
|
||||
old_agg = old_collection.aggregate.over_all(total_count=True)
|
||||
new_agg = new_collection.aggregate.over_all(total_count=True)
|
||||
|
||||
|
||||
old_count = old_agg.total_count
|
||||
new_count = new_agg.total_count
|
||||
|
||||
|
||||
print(f"\nVerification:")
|
||||
print(f" Old collection ({old_collection_name}): {old_count} objects")
|
||||
print(f" New collection ({new_collection_name}): {new_count} objects")
|
||||
|
||||
|
||||
if old_count == new_count:
|
||||
print(f" Status: SUCCESS - Counts match!")
|
||||
return True
|
||||
@@ -212,109 +220,132 @@ def verify_migration(
|
||||
|
||||
|
||||
def replace_old_collection(
|
||||
client: weaviate.WeaviateClient,
|
||||
old_collection_name: str,
|
||||
new_collection_name: str
|
||||
client: weaviate.WeaviateClient, old_collection_name: str, new_collection_name: str
|
||||
):
|
||||
"""Replace old collection with migrated one by recreating with original name"""
|
||||
import requests
|
||||
|
||||
|
||||
print(f"\nReplacing old collection with migrated data...")
|
||||
|
||||
# Step 1: Get data from migrated collection
|
||||
print(f" Step 1: Getting data from migrated collection...")
|
||||
migrated = client.collections.get(new_collection_name)
|
||||
objects = migrated.query.fetch_objects(include_vector=True, limit=10000)
|
||||
print(f" Found {len(objects.objects)} objects")
|
||||
|
||||
# Step 2: Delete old collection
|
||||
print(f" Step 2: Deleting old collection...")
|
||||
|
||||
# Step 1: Delete old collection
|
||||
print(f" Step 1: Deleting old collection...")
|
||||
response = requests.delete(
|
||||
f"http://{WEAVIATE_HOST}:{WEAVIATE_PORT}/v1/schema/{old_collection_name}",
|
||||
headers={"Authorization": f"Bearer {WEAVIATE_API_KEY}"}
|
||||
f"{WEAVIATE_ENDPOINT}/v1/schema/{old_collection_name}",
|
||||
headers={"Authorization": f"Bearer {WEAVIATE_API_KEY}"},
|
||||
)
|
||||
if response.status_code != 200:
|
||||
print(f" Warning: Could not delete old collection: {response.text}")
|
||||
else:
|
||||
print(f" Deleted")
|
||||
|
||||
# Step 3: Get schema from migrated collection
|
||||
print(f" Step 3: Getting schema from migrated collection...")
|
||||
|
||||
# Step 2: Get schema from migrated collection
|
||||
print(f" Step 2: Getting schema from migrated collection...")
|
||||
schema_response = requests.get(
|
||||
f"http://{WEAVIATE_HOST}:{WEAVIATE_PORT}/v1/schema/{new_collection_name}",
|
||||
headers={"Authorization": f"Bearer {WEAVIATE_API_KEY}"}
|
||||
f"{WEAVIATE_ENDPOINT}/v1/schema/{new_collection_name}",
|
||||
headers={"Authorization": f"Bearer {WEAVIATE_API_KEY}"},
|
||||
)
|
||||
schema = schema_response.json()
|
||||
schema["class"] = old_collection_name
|
||||
|
||||
# Step 4: Create collection with original name and new schema
|
||||
print(f" Step 4: Creating collection with original name...")
|
||||
|
||||
# Step 3: Create collection with original name and new schema
|
||||
print(f" Step 3: Creating collection with original name...")
|
||||
create_response = requests.post(
|
||||
f"http://{WEAVIATE_HOST}:{WEAVIATE_PORT}/v1/schema",
|
||||
f"{WEAVIATE_ENDPOINT}/v1/schema",
|
||||
json=schema,
|
||||
headers={"Authorization": f"Bearer {WEAVIATE_API_KEY}"}
|
||||
headers={"Authorization": f"Bearer {WEAVIATE_API_KEY}"},
|
||||
)
|
||||
if create_response.status_code not in [200, 201]:
|
||||
raise Exception(f"Failed to create collection: {create_response.text}")
|
||||
print(f" Created")
|
||||
|
||||
# Step 5: Copy data to collection with original name
|
||||
print(f" Step 5: Copying data to original collection name...")
|
||||
|
||||
# Step 4: Copy data to collection with original name using cursor-based pagination
|
||||
print(f" Step 4: Copying data to original collection name...")
|
||||
migrated_collection = client.collections.get(new_collection_name)
|
||||
new_collection = client.collections.get(old_collection_name)
|
||||
|
||||
with new_collection.batch.dynamic() as batch:
|
||||
for obj in objects.objects:
|
||||
batch.add_object(
|
||||
properties=obj.properties,
|
||||
vector=obj.vector,
|
||||
uuid=obj.uuid
|
||||
|
||||
total_copied = 0
|
||||
cursor = None
|
||||
|
||||
while True:
|
||||
# Fetch batch of objects using cursor-based pagination
|
||||
if cursor is None:
|
||||
# First batch
|
||||
response = migrated_collection.query.fetch_objects(
|
||||
include_vector=True, limit=BATCH_SIZE
|
||||
)
|
||||
|
||||
count = new_collection.aggregate.over_all(total_count=True).total_count
|
||||
print(f" Copied {count} objects")
|
||||
|
||||
# Step 6: Delete the temporary migrated collection
|
||||
print(f" Step 6: Cleaning up temporary migrated collection...")
|
||||
else:
|
||||
# Subsequent batches using cursor
|
||||
response = migrated_collection.query.fetch_objects(
|
||||
include_vector=True, limit=BATCH_SIZE, after=cursor
|
||||
)
|
||||
|
||||
objects = response.objects
|
||||
|
||||
if not objects:
|
||||
break
|
||||
|
||||
# Use batch insert for efficiency
|
||||
with new_collection.batch.dynamic() as batch:
|
||||
for obj in objects:
|
||||
batch.add_object(
|
||||
properties=obj.properties, vector=obj.vector, uuid=obj.uuid
|
||||
)
|
||||
|
||||
total_copied += len(objects)
|
||||
print(f" Copied {total_copied} objects...")
|
||||
|
||||
# Update cursor for next iteration
|
||||
if len(objects) < BATCH_SIZE:
|
||||
break
|
||||
else:
|
||||
cursor = objects[-1].uuid
|
||||
|
||||
print(f" Total copied: {total_copied} objects")
|
||||
|
||||
# Step 5: Delete the temporary migrated collection
|
||||
print(f" Step 5: Cleaning up temporary migrated collection...")
|
||||
response = requests.delete(
|
||||
f"http://{WEAVIATE_HOST}:{WEAVIATE_PORT}/v1/schema/{new_collection_name}",
|
||||
headers={"Authorization": f"Bearer {WEAVIATE_API_KEY}"}
|
||||
f"{WEAVIATE_ENDPOINT}/v1/schema/{new_collection_name}",
|
||||
headers={"Authorization": f"Bearer {WEAVIATE_API_KEY}"},
|
||||
)
|
||||
if response.status_code == 200:
|
||||
print(f" Cleaned up")
|
||||
|
||||
print(f"\n SUCCESS! {old_collection_name} now has the new schema with {count} objects")
|
||||
|
||||
print(
|
||||
f"\n SUCCESS! {old_collection_name} now has the new schema with {total_copied} objects"
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
def migrate_all_collections():
|
||||
"""Main migration function"""
|
||||
|
||||
|
||||
print("=" * 80)
|
||||
print("Weaviate Collection Migration Script")
|
||||
print("Migrating from Weaviate 1.19.0 schema to 1.27.0+ schema")
|
||||
print("=" * 80)
|
||||
print()
|
||||
|
||||
|
||||
client = weaviate.connect_to_local(
|
||||
host=WEAVIATE_HOST,
|
||||
port=WEAVIATE_PORT,
|
||||
grpc_port=WEAVIATE_GRPC_PORT,
|
||||
auth_credentials=weaviate.auth.AuthApiKey(WEAVIATE_API_KEY)
|
||||
auth_credentials=weaviate.auth.AuthApiKey(WEAVIATE_API_KEY),
|
||||
)
|
||||
|
||||
|
||||
try:
|
||||
# Step 1: Identify collections that need migration
|
||||
print("Step 1: Identifying collections that need migration...")
|
||||
collections_to_migrate = identify_old_collections(client)
|
||||
|
||||
|
||||
if not collections_to_migrate:
|
||||
print("\nNo collections need migration. All collections are up to date!")
|
||||
return
|
||||
|
||||
|
||||
print(f"\nFound {len(collections_to_migrate)} collections to migrate:")
|
||||
for col in collections_to_migrate:
|
||||
print(f" - {col}")
|
||||
|
||||
|
||||
# Confirm before proceeding
|
||||
print("\nThis script will:")
|
||||
print("1. Create new collections with updated schema")
|
||||
@@ -322,54 +353,58 @@ def migrate_all_collections():
|
||||
print("3. Verify the migration")
|
||||
print("4. Optionally rename collections to activate the new ones")
|
||||
print()
|
||||
|
||||
|
||||
# Step 2: Migrate each collection
|
||||
for collection_name in collections_to_migrate:
|
||||
print("\n" + "=" * 80)
|
||||
print(f"Migrating: {collection_name}")
|
||||
print("=" * 80)
|
||||
|
||||
|
||||
try:
|
||||
# Get old schema
|
||||
schema = get_collection_schema(client, collection_name)
|
||||
|
||||
|
||||
# Create new collection
|
||||
new_collection_name = create_new_collection(client, collection_name, schema)
|
||||
|
||||
new_collection_name = create_new_collection(
|
||||
client, collection_name, schema
|
||||
)
|
||||
|
||||
# Migrate data
|
||||
migrated_count = migrate_collection_data(client, collection_name, new_collection_name)
|
||||
|
||||
migrated_count = migrate_collection_data(
|
||||
client, collection_name, new_collection_name
|
||||
)
|
||||
|
||||
# Verify migration
|
||||
success = verify_migration(client, collection_name, new_collection_name)
|
||||
|
||||
if success and migrated_count > 0:
|
||||
|
||||
if success:
|
||||
print(f"\nMigration successful for {collection_name}!")
|
||||
print(f"New collection: {new_collection_name}")
|
||||
|
||||
|
||||
# Automatically replace old collection with migrated one
|
||||
try:
|
||||
replace_old_collection(client, collection_name, new_collection_name)
|
||||
replace_old_collection(
|
||||
client, collection_name, new_collection_name
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"\nWarning: Could not automatically replace collection: {e}")
|
||||
print(
|
||||
f"\nWarning: Could not automatically replace collection: {e}"
|
||||
)
|
||||
print(f"\nTo activate manually:")
|
||||
print(f"1. Delete the old collection: {collection_name}")
|
||||
print(f"2. Rename {new_collection_name} to {collection_name}")
|
||||
|
||||
|
||||
except Exception as e:
|
||||
print(f"\nError migrating {collection_name}: {e}")
|
||||
print(f"Skipping this collection and continuing...")
|
||||
continue
|
||||
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("Migration Complete!")
|
||||
print("=" * 80)
|
||||
print("\nSummary:")
|
||||
print(f" Collections migrated: {len(collections_to_migrate)}")
|
||||
print(f"\nNext steps:")
|
||||
print(f"1. Test the new collections (*_migrated)")
|
||||
print(f"2. If everything works, delete or backup the old collections")
|
||||
print(f"3. Rename the new collections to remove '_migrated' suffix")
|
||||
|
||||
|
||||
finally:
|
||||
client.close()
|
||||
|
||||
@@ -383,6 +418,6 @@ if __name__ == "__main__":
|
||||
except Exception as e:
|
||||
print(f"\n\nFatal error: {e}")
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
@@ -6,20 +6,22 @@ title: Weaviate Migration Guide upgrading to Client v4 and Server 1.27+
|
||||
|
||||
## Overview
|
||||
|
||||
Starting with **Dify v1.9.2**, the weaviate-client has been upgraded from v3 to v4.17.0. This upgrade brings significant performance improvements and better stability, but requires **Weaviate server version 1.27.0 or higher**.
|
||||
Starting with **Dify v1.9.2**, the weaviate-client has been upgraded from v3 to v4.17.0. This upgrade brings significant performance improvements and better stability, but requires **Weaviate server version 1.27.0 or higher**.
|
||||
|
||||
<Warning>
|
||||
**BREAKING CHANGE:** The new weaviate-client v4 is NOT backward compatible with Weaviate server versions below 1.27.0. If you are running a self-hosted Weaviate instance on version 1.19.0 or older, you must upgrade your Weaviate server before upgrading Dify.
|
||||
**BREAKING CHANGE**: The new weaviate-client v4 is NOT backward compatible with Weaviate server versions below 1.27.0. If you are running a self-hosted Weaviate instance on version 1.19.0 or older, you must upgrade your Weaviate server before upgrading Dify.
|
||||
</Warning>
|
||||
|
||||
### Who Is Affected?
|
||||
|
||||
This migration affects:
|
||||
|
||||
- Self-hosted Dify users running their own Weaviate instances on versions below 1.27.0
|
||||
- Users currently on Weaviate server version 1.19.0-1.26.x
|
||||
- Users upgrading to Dify versions with weaviate-client v4
|
||||
|
||||
**Not affected:**
|
||||
**Not affected**:
|
||||
|
||||
- Cloud-hosted Weaviate users (Weaviate Cloud manages the server version)
|
||||
- Users already on Weaviate 1.27.0+ can upgrade Dify without additional steps
|
||||
- Users running Dify's default Docker Compose setup (Weaviate version is updated automatically)
|
||||
@@ -30,31 +32,31 @@ This migration affects:
|
||||
|
||||
The weaviate-client v4 introduces several breaking changes:
|
||||
|
||||
1. **Minimum Server Version:** Requires Weaviate server 1.27.0 or higher
|
||||
2. **API Changes:** New import structure (`weaviate.classes` instead of `weaviate.client`)
|
||||
3. **gRPC Support:** Uses gRPC by default on port 50051 for improved performance
|
||||
4. **Authentication Changes:** Updated authentication methods and configuration
|
||||
1. **Minimum Server Version**: Requires Weaviate server 1.27.0 or higher
|
||||
2. **API Changes**: New import structure (`weaviate.classes` instead of `weaviate.client`)
|
||||
3. **gRPC Support**: Uses gRPC by default on port 50051 for improved performance
|
||||
4. **Authentication Changes**: Updated authentication methods and configuration
|
||||
|
||||
### Why Upgrade?
|
||||
|
||||
- **Performance:** Significantly faster query and import operations via gRPC (50051)
|
||||
- **Stability:** Better connection handling and error recovery
|
||||
- **Future Compatibility:** Access to latest Weaviate features and ongoing support
|
||||
- **Security:** Weaviate 1.19.0 is over a year old and no longer receives security updates
|
||||
- **Performance**: Significantly faster query and import operations via gRPC (50051)
|
||||
- **Stability**: Better connection handling and error recovery
|
||||
- **Future Compatibility**: Access to latest Weaviate features and ongoing support
|
||||
- **Security**: Weaviate 1.19.0 is over a year old and no longer receives security updates
|
||||
|
||||
## Version Compatibility Matrix
|
||||
|
||||
| Dify Version | Weaviate-client Version | Compatible Weaviate Server Versions |
|
||||
|--------------|-------------------------|-------------------------------------|
|
||||
| ------------ | ----------------------- | ----------------------------------- |
|
||||
| ≤ 1.9.1 | v3.x | 1.19.0 - 1.26.x |
|
||||
| ≥ 1.9.2 | v4.17.0 | 1.27.0+ (tested up to 1.33.1) |
|
||||
| ≥ 1.9.2 | v4.17.0 | 1.27.0+ (tested up to 1.33.1) |
|
||||
|
||||
<Info>
|
||||
This migration applies to any Dify version using weaviate-client v4.17.0 or higher.
|
||||
This migration applies to any Dify version using weaviate-client v4.17.0 or higher.
|
||||
</Info>
|
||||
|
||||
<Info>
|
||||
Weaviate server version 1.19.0 was released over a year ago and is now outdated. Upgrading to 1.27.0+ provides access to numerous improvements in performance, stability, and features.
|
||||
Weaviate server version 1.19.0 was released over a year ago and is now outdated. Upgrading to 1.27.0+ provides access to numerous improvements in performance, stability, and features.
|
||||
</Info>
|
||||
|
||||
## Prerequisites
|
||||
@@ -62,17 +64,21 @@ Weaviate server version 1.19.0 was released over a year ago and is now outdated.
|
||||
Before starting the migration, complete these steps:
|
||||
|
||||
1. **Check Your Current Weaviate Version**
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/v1/meta
|
||||
```
|
||||
|
||||
Look for the `version` field in the response.
|
||||
|
||||
2. **Backup Your Data**
|
||||
|
||||
- Create a complete backup of your Weaviate data
|
||||
- Backup your Docker volumes if using Docker Compose
|
||||
- Document your current configuration settings
|
||||
|
||||
3. **Review System Requirements**
|
||||
|
||||
- Ensure sufficient disk space for database migration
|
||||
- Verify network connectivity between Dify and Weaviate
|
||||
- Confirm gRPC port (50051) is accessible if using external Weaviate
|
||||
@@ -88,17 +94,17 @@ Choose the migration path that matches your deployment setup and current Weaviat
|
||||
|
||||
### Choose Your Path
|
||||
|
||||
- **Path A – Migration with Backup (from 1.19):** Recommended if you are still on Weaviate 1.19. You will create a backup, upgrade to 1.27+, repair any orphaned data, and then migrate the schema.
|
||||
- **Path B – Direct Recovery (already on 1.27+):** Use this if you already upgraded to 1.27+ and your knowledge bases stopped working. This path focuses on repairing the data layout and running the schema migration.
|
||||
- **Path A – Migration with Backup (from 1.19)**: Recommended if you are still on Weaviate 1.19. You will create a backup, upgrade to 1.27+, repair any orphaned data, and then migrate the schema.
|
||||
- **Path B – Direct Recovery (already on 1.27+)**: Use this if you already upgraded to 1.27+ and your knowledge bases stopped working. This path focuses on repairing the data layout and running the schema migration.
|
||||
|
||||
<Warning>
|
||||
Do **not** attempt to downgrade back to 1.19. The schema format is incompatible and will lead to data loss.
|
||||
Do **not** attempt to downgrade back to 1.19. The schema format is incompatible and will lead to data loss.
|
||||
</Warning>
|
||||
|
||||
### Path A: Migration with Backup (From 1.19)
|
||||
|
||||
<Info>
|
||||
Safest path. Creates a backup before upgrading so you can restore if anything goes wrong.
|
||||
Safest path. Creates a backup before upgrading so you can restore if anything goes wrong.
|
||||
</Info>
|
||||
|
||||
#### Prerequisites
|
||||
@@ -112,18 +118,18 @@ Safest path. Creates a backup before upgrading so you can restore if anything go
|
||||
Edit `docker/docker-compose.yaml` so the `weaviate` service includes backup configuration:
|
||||
|
||||
```yaml
|
||||
weaviate:
|
||||
image: semitechnologies/weaviate:1.19.0
|
||||
volumes:
|
||||
- ./volumes/weaviate:/var/lib/weaviate
|
||||
- ./volumes/weaviate_backups:/var/lib/weaviate/backups
|
||||
ports:
|
||||
- "8080:8080"
|
||||
- "50051:50051"
|
||||
environment:
|
||||
ENABLE_MODULES: backup-filesystem
|
||||
BACKUP_FILESYSTEM_PATH: /var/lib/weaviate/backups
|
||||
# ... rest of your environment variables
|
||||
weaviate:
|
||||
image: semitechnologies/weaviate:1.19.0
|
||||
volumes:
|
||||
- ./volumes/weaviate:/var/lib/weaviate
|
||||
- ./volumes/weaviate_backups:/var/lib/weaviate/backups
|
||||
ports:
|
||||
- "8080:8080"
|
||||
- "50051:50051"
|
||||
environment:
|
||||
ENABLE_MODULES: backup-filesystem
|
||||
BACKUP_FILESYSTEM_PATH: /var/lib/weaviate/backups
|
||||
# ... rest of your environment variables
|
||||
```
|
||||
|
||||
Restart Weaviate to apply the change:
|
||||
@@ -137,7 +143,7 @@ sleep 10
|
||||
|
||||
#### Step A2: Create a Backup
|
||||
|
||||
1. **List your collections:**
|
||||
1. **List your collections**:
|
||||
|
||||
```bash
|
||||
curl -s -H "Authorization: Bearer <WEAVIATE_API_KEY>" \
|
||||
@@ -151,7 +157,7 @@ sleep 10
|
||||
"
|
||||
```
|
||||
|
||||
2. **Trigger the backup:** include specific collection names if you prefer.
|
||||
2. **Trigger the backup**: include specific collection names if you prefer.
|
||||
|
||||
```bash
|
||||
curl -X POST \
|
||||
@@ -164,7 +170,7 @@ sleep 10
|
||||
}'
|
||||
```
|
||||
|
||||
3. **Check backup status:**
|
||||
3. **Check backup status**:
|
||||
|
||||
```bash
|
||||
sleep 5
|
||||
@@ -173,7 +179,7 @@ sleep 10
|
||||
python3 -m json.tool | grep status
|
||||
```
|
||||
|
||||
4. **Verify backup files exist:**
|
||||
4. **Verify backup files exist**:
|
||||
|
||||
```bash
|
||||
ls -lh docker/volumes/weaviate_backups/kb-backup/
|
||||
@@ -181,7 +187,7 @@ sleep 10
|
||||
|
||||
#### Step A3: Upgrade to Weaviate 1.27+
|
||||
|
||||
1. **Upgrade Dify to a version that ships Weaviate 1.27+:**
|
||||
1. **Upgrade Dify to a version that ships Weaviate 1.27+**:
|
||||
|
||||
```bash
|
||||
cd /path/to/dify
|
||||
@@ -189,13 +195,13 @@ sleep 10
|
||||
git checkout main # or a tagged release that includes the upgrade
|
||||
```
|
||||
|
||||
2. **Confirm the new Weaviate image:**
|
||||
2. **Confirm the new Weaviate image**:
|
||||
|
||||
```bash
|
||||
grep "image: semitechnologies/weaviate" docker/docker-compose.yaml
|
||||
```
|
||||
|
||||
3. **Restart with the new version:**
|
||||
3. **Restart with the new version**:
|
||||
|
||||
```bash
|
||||
cd docker
|
||||
@@ -206,6 +212,10 @@ sleep 10
|
||||
|
||||
#### Step A4: Fix Orphaned LSM Data (if present)
|
||||
|
||||
You can fix orphaned LSM data either from the host or inside the container:
|
||||
|
||||
**Option A: From host (if volumes are mounted)**:
|
||||
|
||||
```bash
|
||||
cd docker/volumes/weaviate
|
||||
|
||||
@@ -226,6 +236,32 @@ docker compose restart weaviate
|
||||
sleep 15
|
||||
```
|
||||
|
||||
**Option B: Inside Weaviate container (recommended)**:
|
||||
|
||||
```bash
|
||||
cd /path/to/dify/docker
|
||||
docker compose exec -it weaviate /bin/sh
|
||||
|
||||
# Inside container
|
||||
cd /var/lib/weaviate
|
||||
for dir in vector_index_*_node_*_lsm; do
|
||||
[ -d "$dir" ] || continue
|
||||
|
||||
index_id=$(echo "$dir" | sed -n 's/vector_index_\([^_]*_[^_]*_[^_]*_[^_]*_[^_]*\)_node_.*/\1/p')
|
||||
shard_id=$(echo "$dir" | sed -n 's/.*_node_\([^_]*\)_lsm/\1/p')
|
||||
|
||||
mkdir -p "vector_index_${index_id}_node/$shard_id/lsm"
|
||||
cp -a "$dir/"* "vector_index_${index_id}_node/$shard_id/lsm/"
|
||||
|
||||
echo "✓ Copied $dir"
|
||||
done
|
||||
exit
|
||||
|
||||
# Restart Weaviate
|
||||
docker compose restart weaviate
|
||||
sleep 15
|
||||
```
|
||||
|
||||
#### Step A5: Migrate the Schema
|
||||
|
||||
1. **Install dependencies** (in a temporary virtualenv is fine):
|
||||
@@ -237,13 +273,35 @@ sleep 15
|
||||
pip install weaviate-client requests
|
||||
```
|
||||
|
||||
2. **Run the [migration script](https://github.com/langgenius/dify-docs/blob/main/assets/migrate_weaviate_collections.py):**
|
||||
2. **Run the [migration script](https://github.com/langgenius/dify-docs/blob/main/assets/migrate_weaviate_collections.py)** either locally or inside the Worker container.\
|
||||
**Option A: Run locally (if you have Python 3.11+ and dependencies installed)**:
|
||||
|
||||
```bash
|
||||
python3 migrate_weaviate_collections.py
|
||||
```
|
||||
|
||||
3. **Restart Dify services:**
|
||||
**Option B: Run inside Worker container (recommended for Docker setups)**:
|
||||
|
||||
```bash
|
||||
# Copy script to storage directory
|
||||
cp migrate_weaviate_collections.py /path/to/dify/docker/volumes/app/storage/
|
||||
|
||||
# Enter worker container
|
||||
cd /path/to/dify/docker
|
||||
docker compose exec -it worker /bin/bash
|
||||
|
||||
# Run migration script (use --no-cache for Dify 1.11.0+)
|
||||
uv run --no-cache /app/api/storage/migrate_weaviate_collections.py
|
||||
|
||||
# Exit container
|
||||
exit
|
||||
```
|
||||
|
||||
<Info>
|
||||
The migration script uses environment variables for configuration, making it suitable for running inside Docker containers. For Dify 1.11.0+, if you encounter permission errors with `uv`, use `uv run --no-cache` instead.
|
||||
</Info>
|
||||
|
||||
3. **Restart Dify services**:
|
||||
|
||||
```bash
|
||||
cd docker
|
||||
@@ -251,16 +309,20 @@ sleep 15
|
||||
sleep 15
|
||||
```
|
||||
|
||||
4. **Verify in the UI:** open Dify, test retrieval against your migrated knowledge bases.
|
||||
4. **Verify in the UI**: open Dify, test retrieval against your migrated knowledge bases.
|
||||
|
||||
<Warning>
|
||||
For large collections (over 10,000 objects), verify that the object count matches between old and new collections. The migration script will display verification counts automatically.
|
||||
</Warning>
|
||||
|
||||
<Info>
|
||||
After confirming a healthy migration, you can delete `weaviate_migration_env` and the backup files to reclaim disk space.
|
||||
After confirming a healthy migration, you can delete `weaviate_migration_env` and the backup files to reclaim disk space.
|
||||
</Info>
|
||||
|
||||
### Path B: Direct Recovery (Already on 1.27+)
|
||||
|
||||
<Warning>
|
||||
Only use this path if you already upgraded to 1.27+ and your knowledge bases stopped working. You cannot create a 1.19 backup anymore, so you must repair the data in place.
|
||||
Only use this path if you already upgraded to 1.27+ and your knowledge bases stopped working. You cannot create a 1.19 backup anymore, so you must repair the data in place.
|
||||
</Warning>
|
||||
|
||||
#### Prerequisites
|
||||
@@ -271,10 +333,13 @@ Only use this path if you already upgraded to 1.27+ and your knowledge bases sto
|
||||
|
||||
#### Step B1: Repair Orphaned LSM Data
|
||||
|
||||
Stop Weaviate and fix orphaned LSM data:
|
||||
|
||||
```bash
|
||||
cd docker
|
||||
cd /path/to/dify/docker
|
||||
docker compose stop weaviate
|
||||
|
||||
# Option A: From host (if volumes are mounted)
|
||||
cd volumes/weaviate
|
||||
|
||||
for dir in vector_index_*_node_*_lsm; do
|
||||
@@ -288,12 +353,24 @@ for dir in vector_index_*_node_*_lsm; do
|
||||
|
||||
echo "✓ Copied $dir"
|
||||
done
|
||||
|
||||
# Option B: Inside container (recommended)
|
||||
docker compose run --rm --entrypoint /bin/sh weaviate -c "
|
||||
cd /var/lib/weaviate
|
||||
for dir in vector_index_*_node_*_lsm; do
|
||||
[ -d \"\$dir\" ] || continue
|
||||
index_id=\$(echo \"\$dir\" | sed -n 's/vector_index_\([^_]*_[^_]*_[^_]*_[^_]*_[^_]*\)_node_.*/\1/p')
|
||||
shard_id=\$(echo \"\$dir\" | sed -n 's/.*_node_\([^_]*\)_lsm/\1/p')
|
||||
mkdir -p \"vector_index_\${index_id}_node/\$shard_id/lsm\"
|
||||
cp -a \"\$dir/\"* \"vector_index_\${index_id}_node/\$shard_id/lsm/\"
|
||||
echo \"✓ Copied \$dir\"
|
||||
done
|
||||
"
|
||||
```
|
||||
|
||||
Restart Weaviate:
|
||||
|
||||
```bash
|
||||
cd ../..
|
||||
docker compose start weaviate
|
||||
sleep 15
|
||||
```
|
||||
@@ -316,7 +393,30 @@ curl -s -H "Authorization: Bearer <WEAVIATE_API_KEY>" \
|
||||
|
||||
#### Step B2: Run the Schema Migration
|
||||
|
||||
Follow the same commands as [Step A5](#step-a5:-migrate-the-schema). Create the virtualenv if needed, install `weaviate-client` 4.x, run `migrate_weaviate_collections.py`, then restart `api`, `worker`, and `worker_beat`.
|
||||
Follow the same commands as [Step A5](#step-a5%3A-migrate-the-schema). You can run the script locally or inside the Worker container:
|
||||
|
||||
**To run inside Worker container**:
|
||||
|
||||
```bash
|
||||
# Copy script to storage directory
|
||||
cp migrate_weaviate_collections.py /path/to/dify/docker/volumes/app/storage/
|
||||
|
||||
# Enter worker container
|
||||
cd /path/to/dify/docker
|
||||
docker compose exec -it worker /bin/bash
|
||||
|
||||
# Run migration script
|
||||
uv run --no-cache /app/api/storage/migrate_weaviate_collections.py
|
||||
|
||||
# Exit and restart services
|
||||
exit
|
||||
docker compose restart api worker worker_beat
|
||||
```
|
||||
|
||||
<Info>
|
||||
The migration script uses cursor-based pagination to safely handle large
|
||||
collections. Verify object counts match after migration completes.
|
||||
</Info>
|
||||
|
||||
#### Step B3: Verify in Dify
|
||||
|
||||
@@ -327,22 +427,24 @@ Follow the same commands as [Step A5](#step-a5:-migrate-the-schema). Create the
|
||||
## Data Migration for Legacy Versions
|
||||
|
||||
<Warning>
|
||||
### CRITICAL: Data Migration Required
|
||||
**CRITICAL: Data Migration Required**
|
||||
|
||||
**Your existing knowledge bases will NOT work after upgrade without migration!**
|
||||
|
||||
### Why Migration is Needed:
|
||||
**Why Migration is Needed**:
|
||||
|
||||
- Old data: Created with Weaviate v3 client (simple schema)
|
||||
- New code: Requires Weaviate v4 format (extended schema)
|
||||
- **Incompatible**: Old data missing required properties
|
||||
|
||||
### Migration Options:
|
||||
**Migration Options**:
|
||||
|
||||
##### Option A: Use Weaviate Backup/Restore
|
||||
- Option A: Use Weaviate Backup/Restore
|
||||
|
||||
##### Option B: Re-index from Original Documents
|
||||
- Option B: Re-index from Original Documents
|
||||
|
||||
- Option C: Keep Old Weaviate (Don't Upgrade Yet) If you can't afford downtime or data loss.
|
||||
|
||||
##### Option C: Keep Old Weaviate (Don't Upgrade Yet) If you can't afford downtime or data loss.
|
||||
</Warning>
|
||||
|
||||
### Automatic Migration
|
||||
@@ -379,7 +481,7 @@ curl -X POST "http://localhost:8080/v1/backups/filesystem/pre-migration-backup/r
|
||||
```
|
||||
|
||||
<Info>
|
||||
For comprehensive migration guidance, especially for complex schemas or large datasets, refer to the official [Weaviate Migration Guide](https://weaviate.io/developers/weaviate/installation/migration).
|
||||
For comprehensive migration guidance, especially for complex schemas or large datasets, refer to the official [Weaviate Migration Guide](https://weaviate.io/developers/weaviate/installation/migration).
|
||||
</Info>
|
||||
|
||||
## Configuration Changes
|
||||
@@ -390,15 +492,17 @@ The following new environment variable is available in Dify versions with weavia
|
||||
|
||||
#### WEAVIATE_GRPC_ENDPOINT
|
||||
|
||||
**Description:** Specifies the gRPC endpoint for Weaviate connections. Using gRPC significantly improves performance for batch operations and queries.
|
||||
**Description**: Specifies the gRPC endpoint for Weaviate connections. Using gRPC significantly improves performance for batch operations and queries.
|
||||
|
||||
**Format:** `hostname:port` (NO protocol prefix)
|
||||
**Format**: `hostname:port` (NO protocol prefix)
|
||||
|
||||
**Default Ports**:
|
||||
|
||||
**Default Ports:**
|
||||
- Insecure: 50051
|
||||
- Secure (TLS): 443
|
||||
|
||||
**Examples:**
|
||||
**Examples**:
|
||||
|
||||
```bash
|
||||
# Docker Compose (internal network)
|
||||
WEAVIATE_GRPC_ENDPOINT=weaviate:50051
|
||||
@@ -414,17 +518,17 @@ WEAVIATE_GRPC_ENDPOINT=your-instance.weaviate.cloud:443
|
||||
```
|
||||
|
||||
<Warning>
|
||||
Do NOT include protocol prefixes like `grpc://` or `http://` in the WEAVIATE_GRPC_ENDPOINT value. Use only `hostname:port`.
|
||||
Do NOT include protocol prefixes like `grpc://` or `http://` in the WEAVIATE_GRPC_ENDPOINT value. Use only `hostname:port`.
|
||||
</Warning>
|
||||
|
||||
### Updated Environment Variables
|
||||
|
||||
All existing Weaviate environment variables remain the same:
|
||||
|
||||
- **WEAVIATE_ENDPOINT:** HTTP endpoint for Weaviate (e.g., `http://weaviate:8080`)
|
||||
- **WEAVIATE_API_KEY:** API key for authentication (if enabled)
|
||||
- **WEAVIATE_BATCH_SIZE:** Batch size for imports (default: 100)
|
||||
- **WEAVIATE_GRPC_ENABLED:** Enable/disable gRPC (default: true in v4)
|
||||
- **WEAVIATE_ENDPOINT**: HTTP endpoint for Weaviate (e.g., `http://weaviate:8080`)
|
||||
- **WEAVIATE_API_KEY**: API key for authentication (if enabled)
|
||||
- **WEAVIATE_BATCH_SIZE**: Batch size for imports (default: 100)
|
||||
- **WEAVIATE_GRPC_ENABLED**: Enable/disable gRPC (default: true in v4)
|
||||
|
||||
### Complete Configuration Example
|
||||
|
||||
@@ -446,8 +550,6 @@ WEAVIATE_GRPC_ENDPOINT=weaviate:50051
|
||||
WEAVIATE_BATCH_SIZE=100
|
||||
```
|
||||
|
||||
|
||||
|
||||
## Verification Steps
|
||||
|
||||
After completing the migration, verify everything is working correctly:
|
||||
@@ -483,7 +585,7 @@ Look for messages indicating successful connection without "No module named 'wea
|
||||
6. Check that status changes from "QUEUING" → "INDEXING" → "AVAILABLE"
|
||||
|
||||
<Info>
|
||||
If documents get stuck in "QUEUING" status, check that the Celery worker is running: `docker compose logs worker`
|
||||
If documents get stuck in "QUEUING" status, check that the Celery worker is running: `docker compose logs worker`.
|
||||
</Info>
|
||||
|
||||
### 4. Test Vector Search
|
||||
@@ -506,16 +608,17 @@ docker compose logs -f api | grep -i "query_time\|duration"
|
||||
```
|
||||
|
||||
<Info>
|
||||
With gRPC properly configured, vector search queries should be 2-5x faster compared to HTTP-only connections.
|
||||
With gRPC properly configured, vector search queries should be 2-5x faster compared to HTTP-only connections.
|
||||
</Info>
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Issue: "No module named 'weaviate.classes'"
|
||||
|
||||
**Cause:** The weaviate-client v4 is not installed, or v3 is still being used.
|
||||
**Cause**: The weaviate-client v4 is not installed, or v3 is still being used.
|
||||
|
||||
**Solution**:
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# For Docker installations, ensure you're running the correct Dify version
|
||||
docker compose pull
|
||||
@@ -529,63 +632,67 @@ pip install weaviate-client==4.17.0
|
||||
|
||||
### Issue: Connection Refused on gRPC Port (50051)
|
||||
|
||||
**Cause:** Port 50051 is not exposed, not accessible, or Weaviate is not listening on it.
|
||||
**Cause**: Port 50051 is not exposed, not accessible, or Weaviate is not listening on it.
|
||||
|
||||
**Solution:**
|
||||
**Solution**:
|
||||
|
||||
1. **For Docker Compose users with bundled Weaviate:**
|
||||
1. **For Docker Compose users with bundled Weaviate**:
|
||||
The port is available internally between containers. No action needed unless you're connecting from outside Docker.
|
||||
|
||||
2. **For external Weaviate:**
|
||||
2. **For external Weaviate**:
|
||||
|
||||
```bash
|
||||
# Check if Weaviate is listening on 50051
|
||||
docker ps | grep weaviate
|
||||
# Look for "0.0.0.0:50051->50051/tcp"
|
||||
|
||||
|
||||
# If not exposed, restart with port mapping
|
||||
docker run -p 8080:8080 -p 50051:50051 ...
|
||||
```
|
||||
|
||||
3. **Check firewall rules:**
|
||||
3. **Check firewall rules**:
|
||||
|
||||
```bash
|
||||
# Linux
|
||||
sudo ufw allow 50051/tcp
|
||||
|
||||
|
||||
# Check if port is listening
|
||||
netstat -tlnp | grep 50051
|
||||
```
|
||||
|
||||
### Issue: Authentication Errors (401 Unauthorized)
|
||||
|
||||
**Cause:** API key mismatch or authentication configuration issue.
|
||||
**Cause**: API key mismatch or authentication configuration issue.
|
||||
|
||||
**Solution:**
|
||||
**Solution**:
|
||||
|
||||
1. Verify API key matches in both Weaviate and Dify:
|
||||
|
||||
```bash
|
||||
# Check Weaviate authentication
|
||||
curl http://localhost:8080/v1/meta | jq '.authentication'
|
||||
|
||||
|
||||
# Check Dify configuration
|
||||
docker compose exec api env | grep WEAVIATE_API_KEY
|
||||
```
|
||||
|
||||
2. If using anonymous access:
|
||||
|
||||
```yaml
|
||||
# Weaviate docker-compose.yaml
|
||||
weaviate:
|
||||
environment:
|
||||
AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true'
|
||||
AUTHENTICATION_APIKEY_ENABLED: 'false'
|
||||
AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: "true"
|
||||
AUTHENTICATION_APIKEY_ENABLED: "false"
|
||||
```
|
||||
|
||||
|
||||
Then remove `WEAVIATE_API_KEY` from Dify configuration.
|
||||
|
||||
### Issue: Documents Stuck in "QUEUING" Status
|
||||
|
||||
**Cause:** Celery worker not running or not connected to Redis.
|
||||
**Cause**: Celery worker not running or not connected to Redis.
|
||||
|
||||
**Solution:**
|
||||
**Solution**:
|
||||
|
||||
```bash
|
||||
# Check if worker is running
|
||||
@@ -604,22 +711,25 @@ docker compose restart worker
|
||||
|
||||
### Issue: Slow Performance After Migration
|
||||
|
||||
**Cause:** gRPC not enabled or configured incorrectly.
|
||||
**Cause**: gRPC not enabled or configured incorrectly.
|
||||
|
||||
**Solution:**
|
||||
**Solution**:
|
||||
|
||||
1. Verify gRPC configuration:
|
||||
|
||||
```bash
|
||||
docker compose exec api env | grep WEAVIATE_GRPC
|
||||
```
|
||||
|
||||
|
||||
Should show:
|
||||
|
||||
```
|
||||
WEAVIATE_GRPC_ENABLED=true
|
||||
WEAVIATE_GRPC_ENDPOINT=weaviate:50051
|
||||
```
|
||||
|
||||
2. Test gRPC connectivity:
|
||||
|
||||
```bash
|
||||
docker exec -it dify-api-1 nc -zv weaviate 50051
|
||||
# Should return "succeeded"
|
||||
@@ -629,21 +739,24 @@ docker compose restart worker
|
||||
|
||||
### Issue: Schema Migration Errors
|
||||
|
||||
**Cause:** Incompatible schema changes between Weaviate versions or corrupted data.
|
||||
**Cause**: Incompatible schema changes between Weaviate versions or corrupted data.
|
||||
|
||||
**Solution:**
|
||||
**Solution**:
|
||||
|
||||
1. Check Weaviate logs for specific error messages:
|
||||
|
||||
```bash
|
||||
docker compose logs weaviate | tail -100
|
||||
```
|
||||
|
||||
2. List current schema:
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/v1/schema
|
||||
```
|
||||
|
||||
3. If necessary, delete corrupted collections (⚠️ this deletes all data):
|
||||
|
||||
```bash
|
||||
# Backup first!
|
||||
curl -X DELETE http://localhost:8080/v1/schema/YourCollectionName
|
||||
@@ -655,14 +768,15 @@ docker compose restart worker
|
||||
```
|
||||
|
||||
<Warning>
|
||||
Deleting collections removes all data. Only do this if you have a backup and are prepared to re-index all content.
|
||||
Deleting collections removes all data. Only do this if you have a backup and are prepared to re-index all content.
|
||||
</Warning>
|
||||
|
||||
### Issue: Docker Volume Permission Errors
|
||||
|
||||
**Cause:** User ID mismatch in Docker containers.
|
||||
**Cause**: User ID mismatch in Docker containers.
|
||||
|
||||
**Solution**:
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# Check ownership of Weaviate data directory
|
||||
ls -la docker/volumes/weaviate/
|
||||
@@ -674,6 +788,21 @@ sudo chown -R 1000:1000 docker/volumes/weaviate/
|
||||
docker compose restart weaviate
|
||||
```
|
||||
|
||||
### Issue: Permission Denied When Running Migration Script (Dify 1.11.0+)
|
||||
|
||||
**Cause**: The `/home/dify` directory may not exist in newer Dify versions, causing `uv` cache creation to fail.
|
||||
|
||||
**Solution**:
|
||||
|
||||
```bash
|
||||
# Option 1: Use --no-cache flag (recommended)
|
||||
uv run --no-cache migrate_weaviate_collections.py
|
||||
|
||||
# Option 2: Run as root user
|
||||
docker compose exec -u root worker /bin/bash
|
||||
uv run migrate_weaviate_collections.py
|
||||
```
|
||||
|
||||
## Rollback Plan
|
||||
|
||||
If the migration fails and you need to rollback:
|
||||
@@ -724,7 +853,7 @@ docker compose logs | grep -i error
|
||||
```
|
||||
|
||||
<Info>
|
||||
Always test the rollback procedure in a staging environment first if possible. Maintain multiple backup copies before attempting major migrations.
|
||||
Always test the rollback procedure in a staging environment first if possible. Maintain multiple backup copies before attempting major migrations.
|
||||
</Info>
|
||||
|
||||
## Additional Resources
|
||||
@@ -753,16 +882,16 @@ Always test the rollback procedure in a staging environment first if possible. M
|
||||
|
||||
This migration brings important improvements to Dify's vector storage capabilities:
|
||||
|
||||
**Better Performance:** gRPC support dramatically improves query and import speeds (2-5x faster)
|
||||
- **Better Performance**: gRPC support dramatically improves query and import speeds (2-5x faster)
|
||||
|
||||
**Improved Stability:** Enhanced connection handling and error recovery
|
||||
- **Improved Stability**: Enhanced connection handling and error recovery
|
||||
|
||||
**Security:** Access to security updates and patches not available in Weaviate 1.19.0
|
||||
- **Security**: Access to security updates and patches not available in Weaviate 1.19.0
|
||||
|
||||
**Future-Proof:** Access to latest Weaviate features and ongoing support
|
||||
- **Future-Proof**: Access to latest Weaviate features and ongoing support
|
||||
|
||||
While this is a breaking change requiring server upgrade for users on old versions, the benefits significantly outweigh the migration effort. Most Docker Compose users can complete the migration in under 15 minutes with the automatic update.
|
||||
|
||||
<Info>
|
||||
If you encounter any issues not covered in this guide, please report them on the [Dify GitHub Issues page](https://github.com/langgenius/dify/issues) with the label "weaviate" and "migration".
|
||||
</Info>
|
||||
If you encounter any issues not covered in this guide, please report them on the [Dify GitHub Issues page](https://github.com/langgenius/dify/issues) with the label "weaviate" and "migration".
|
||||
</Info>
|
||||
|
||||
Reference in New Issue
Block a user