feat: Enhance embedding management and model configuration

- Updated embedding_manager.py to include backend parameter in model configuration.
- Modified model_manager.py to utilize cache_name for ONNX models.
- Refactored hybrid_search.py to improve embedder initialization based on backend type.
- Added backend column to vector_store.py for better model configuration management.
- Implemented migration for existing database to include backend information.
- Enhanced API settings implementation with comprehensive provider and endpoint management.
- Introduced LiteLLM integration guide detailing configuration and usage.
- Added examples for LiteLLM usage in TypeScript.
This commit is contained in:
catlog22
2025-12-24 14:03:59 +08:00
parent 9b926d1a1e
commit b00113d212
22 changed files with 5507 additions and 706 deletions

View File

@@ -309,7 +309,7 @@ def generate_embeddings(
# Set/update model configuration for this index
vector_store.set_model_config(
model_profile, embedder.model_name, embedder.embedding_dim
model_profile, embedder.model_name, embedder.embedding_dim, backend=embedding_backend
)
# Use bulk insert mode for efficient batch ANN index building
# This defers ANN updates until end_bulk_insert() is called

View File

@@ -107,8 +107,9 @@ def _get_model_cache_path(cache_dir: Path, info: Dict) -> Path:
Path to the model cache directory
"""
# HuggingFace Hub naming: models--{org}--{model}
model_name = info["model_name"]
sanitized_name = f"models--{model_name.replace('/', '--')}"
# Use cache_name if available (for mapped ONNX models), else model_name
target_name = info.get("cache_name", info["model_name"])
sanitized_name = f"models--{target_name.replace('/', '--')}"
return cache_dir / sanitized_name