Implement SPLADE sparse encoder and associated database migrations

- Added `splade_encoder.py` for ONNX-optimized SPLADE encoding, including methods for encoding text and batch processing.
- Created `SPLADE_IMPLEMENTATION.md` to document the SPLADE encoder's functionality, design patterns, and integration points.
- Introduced migration script `migration_009_add_splade.py` to add SPLADE metadata and posting list tables to the database.
- Developed `splade_index.py` for managing the SPLADE inverted index, supporting efficient sparse vector retrieval.
- Added verification script `verify_watcher.py` to test FileWatcher event filtering and debouncing functionality.
This commit is contained in:
catlog22
2026-01-01 17:41:22 +08:00
parent 520f2d26f2
commit 5bb01755bc
16 changed files with 3122 additions and 2792 deletions

View File

@@ -80,6 +80,18 @@ reranker = [
"transformers>=4.36",
]
# SPLADE sparse retrieval
splade = [
"transformers>=4.36",
"optimum[onnxruntime]>=1.16",
]
# SPLADE with GPU acceleration (CUDA)
splade-gpu = [
"transformers>=4.36",
"optimum[onnxruntime-gpu]>=1.16",
]
# Encoding detection for non-UTF8 files
encoding = [
"chardet>=5.0",