feat: Enhance embedding management and model configuration

- Updated embedding_manager.py to include backend parameter in model configuration.
- Modified model_manager.py to utilize cache_name for ONNX models.
- Refactored hybrid_search.py to improve embedder initialization based on backend type.
- Added backend column to vector_store.py for better model configuration management.
- Implemented migration for existing database to include backend information.
- Enhanced API settings implementation with comprehensive provider and endpoint management.
- Introduced LiteLLM integration guide detailing configuration and usage.
- Added examples for LiteLLM usage in TypeScript.
This commit is contained in:
catlog22
2025-12-24 14:03:59 +08:00
parent 9b926d1a1e
commit b00113d212
22 changed files with 5507 additions and 706 deletions

View File

@@ -25,10 +25,33 @@ export interface ModelInfo {
}
/**
* Predefined models for each provider
* Embedding model information metadata
*/
export interface EmbeddingModelInfo {
/** Model identifier (used in API calls) */
id: string;
/** Human-readable display name */
name: string;
/** Embedding dimensions */
dimensions: number;
/** Maximum input tokens */
maxTokens: number;
/** Provider identifier */
provider: string;
}
/**
* Predefined models for each API format
* Used for UI selection and validation
* Note: Most providers use OpenAI-compatible format
*/
export const PROVIDER_MODELS: Record<ProviderType, ModelInfo[]> = {
// OpenAI-compatible format (used by OpenAI, DeepSeek, Ollama, etc.)
openai: [
{
id: 'gpt-4o',
@@ -49,19 +72,32 @@ export const PROVIDER_MODELS: Record<ProviderType, ModelInfo[]> = {
supportsCaching: true
},
{
id: 'o1-mini',
name: 'O1 Mini',
contextWindow: 128000,
supportsCaching: true
id: 'deepseek-chat',
name: 'DeepSeek Chat',
contextWindow: 64000,
supportsCaching: false
},
{
id: 'gpt-4-turbo',
name: 'GPT-4 Turbo',
id: 'deepseek-coder',
name: 'DeepSeek Coder',
contextWindow: 64000,
supportsCaching: false
},
{
id: 'llama3.2',
name: 'Llama 3.2',
contextWindow: 128000,
supportsCaching: false
},
{
id: 'qwen2.5-coder',
name: 'Qwen 2.5 Coder',
contextWindow: 32000,
supportsCaching: false
}
],
// Anthropic format
anthropic: [
{
id: 'claude-sonnet-4-20250514',
@@ -89,135 +125,7 @@ export const PROVIDER_MODELS: Record<ProviderType, ModelInfo[]> = {
}
],
ollama: [
{
id: 'llama3.2',
name: 'Llama 3.2',
contextWindow: 128000,
supportsCaching: false
},
{
id: 'llama3.1',
name: 'Llama 3.1',
contextWindow: 128000,
supportsCaching: false
},
{
id: 'qwen2.5-coder',
name: 'Qwen 2.5 Coder',
contextWindow: 32000,
supportsCaching: false
},
{
id: 'codellama',
name: 'Code Llama',
contextWindow: 16000,
supportsCaching: false
},
{
id: 'mistral',
name: 'Mistral',
contextWindow: 32000,
supportsCaching: false
}
],
azure: [
{
id: 'gpt-4o',
name: 'GPT-4o (Azure)',
contextWindow: 128000,
supportsCaching: true
},
{
id: 'gpt-4o-mini',
name: 'GPT-4o Mini (Azure)',
contextWindow: 128000,
supportsCaching: true
},
{
id: 'gpt-4-turbo',
name: 'GPT-4 Turbo (Azure)',
contextWindow: 128000,
supportsCaching: false
},
{
id: 'gpt-35-turbo',
name: 'GPT-3.5 Turbo (Azure)',
contextWindow: 16000,
supportsCaching: false
}
],
google: [
{
id: 'gemini-2.0-flash-exp',
name: 'Gemini 2.0 Flash Experimental',
contextWindow: 1048576,
supportsCaching: true
},
{
id: 'gemini-1.5-pro',
name: 'Gemini 1.5 Pro',
contextWindow: 2097152,
supportsCaching: true
},
{
id: 'gemini-1.5-flash',
name: 'Gemini 1.5 Flash',
contextWindow: 1048576,
supportsCaching: true
},
{
id: 'gemini-1.0-pro',
name: 'Gemini 1.0 Pro',
contextWindow: 32000,
supportsCaching: false
}
],
mistral: [
{
id: 'mistral-large-latest',
name: 'Mistral Large',
contextWindow: 128000,
supportsCaching: false
},
{
id: 'mistral-medium-latest',
name: 'Mistral Medium',
contextWindow: 32000,
supportsCaching: false
},
{
id: 'mistral-small-latest',
name: 'Mistral Small',
contextWindow: 32000,
supportsCaching: false
},
{
id: 'codestral-latest',
name: 'Codestral',
contextWindow: 32000,
supportsCaching: false
}
],
deepseek: [
{
id: 'deepseek-chat',
name: 'DeepSeek Chat',
contextWindow: 64000,
supportsCaching: false
},
{
id: 'deepseek-coder',
name: 'DeepSeek Coder',
contextWindow: 64000,
supportsCaching: false
}
],
// Custom format
custom: [
{
id: 'custom-model',
@@ -237,6 +145,61 @@ export function getModelsForProvider(providerType: ProviderType): ModelInfo[] {
return PROVIDER_MODELS[providerType] || [];
}
/**
* Predefined embedding models for each API format
* Used for UI selection and validation
*/
export const EMBEDDING_MODELS: Record<ProviderType, EmbeddingModelInfo[]> = {
// OpenAI embedding models
openai: [
{
id: 'text-embedding-3-small',
name: 'Text Embedding 3 Small',
dimensions: 1536,
maxTokens: 8191,
provider: 'openai'
},
{
id: 'text-embedding-3-large',
name: 'Text Embedding 3 Large',
dimensions: 3072,
maxTokens: 8191,
provider: 'openai'
},
{
id: 'text-embedding-ada-002',
name: 'Ada 002',
dimensions: 1536,
maxTokens: 8191,
provider: 'openai'
}
],
// Anthropic doesn't have embedding models
anthropic: [],
// Custom embedding models
custom: [
{
id: 'custom-embedding',
name: 'Custom Embedding',
dimensions: 1536,
maxTokens: 8192,
provider: 'custom'
}
]
};
/**
* Get embedding models for a specific provider
* @param providerType - Provider type to get embedding models for
* @returns Array of embedding model information
*/
export function getEmbeddingModelsForProvider(providerType: ProviderType): EmbeddingModelInfo[] {
return EMBEDDING_MODELS[providerType] || [];
}
/**
* Get model information by ID within a provider
* @param providerType - Provider type