feat: add APIEmbedder for remote embedding with multi-endpoint support

- Introduced APIEmbedder class to handle embeddings via a remote HTTP API.
- Implemented token packing to optimize batch sizes based on token limits.
- Added support for multiple API endpoints with round-robin dispatching.
- Included retry logic for API calls with exponential backoff on failures.
- Enhanced indexing pipeline with file exclusion checks and smart chunking strategies.
- Updated tests to cover new APIEmbedder functionality and ensure robustness.
This commit is contained in:
catlog22
2026-03-17 17:17:24 +08:00
parent 34749d2fad
commit f37189dc64
18 changed files with 1633 additions and 476 deletions

View File

@@ -290,41 +290,31 @@
"envGroup": {
"embedding": "Embedding",
"reranker": "Reranker",
"concurrency": "Concurrency",
"cascade": "Cascade Search",
"indexing": "Indexing",
"chunking": "Chunking"
"search": "Search Pipeline",
"indexing": "Indexing"
},
"envField": {
"backend": "Backend",
"model": "Model",
"autoEmbedMissing": "Auto Build Missing Vectors",
"useGpu": "Use GPU",
"highAvailability": "High Availability",
"loadBalanceStrategy": "Load Balance Strategy",
"rateLimitCooldown": "Rate Limit Cooldown",
"enabled": "Enabled",
"localModel": "Local Model",
"apiUrl": "API URL",
"apiKey": "API Key",
"multiEndpoints": "Multi-Endpoint",
"embedDim": "Embed Dimension",
"apiConcurrency": "Concurrency",
"maxTokensPerBatch": "Max Tokens/Batch",
"useGpu": "Device",
"topKResults": "Top K Results",
"maxWorkers": "Max Workers",
"batchSize": "Batch Size",
"dynamicBatchSize": "Dynamic Batch Size",
"batchSizeUtilization": "Utilization Factor",
"batchSizeMax": "Max Batch Size",
"charsPerToken": "Chars Per Token",
"searchStrategy": "Search Strategy",
"coarseK": "Coarse K",
"fineK": "Fine K",
"stagedStage2Mode": "Stage-2 Mode",
"stagedClusteringStrategy": "Clustering Strategy",
"stagedClusteringMinSize": "Cluster Min Size",
"enableStagedRerank": "Enable Rerank",
"useAstGrep": "Use ast-grep",
"staticGraphEnabled": "Static Graph",
"staticGraphRelationshipTypes": "Relationship Types",
"stripComments": "Strip Comments",
"stripDocstrings": "Strip Docstrings",
"testFilePenalty": "Test File Penalty",
"docstringWeight": "Docstring Weight"
"binaryTopK": "Binary Top K",
"annTopK": "ANN Top K",
"ftsTopK": "FTS Top K",
"fusionK": "Fusion K",
"codeAwareChunking": "Code-Aware Chunking",
"indexWorkers": "Index Workers",
"maxFileSize": "Max File Size (bytes)",
"hnswEf": "HNSW ef",
"hnswM": "HNSW M"
},
"install": {
"title": "Install CodexLens",