Implement SPLADE sparse encoder and associated database migrations

- Added `splade_encoder.py` for ONNX-optimized SPLADE encoding, including methods for encoding text and batch processing. - Created `SPLADE_IMPLEMENTATION.md` to document the SPLADE encoder's functionality, design patterns, and integration points. - Introduced migration script `migration_009_add_splade.py` to add SPLADE metadata and posting list tables to the database. - Developed `splade_index.py` for managing the SPLADE inverted index, supporting efficient sparse vector retrieval. - Added verification script `verify_watcher.py` to test FileWatcher event filtering and debouncing functionality.
2026-03-30 20:21:09 +08:00 · 2026-01-01 17:41:22 +08:00
parent 520f2d26f2
commit 5bb01755bc
16 changed files with 3122 additions and 2792 deletions
--- a/codex-lens/pyproject.toml
+++ b/codex-lens/pyproject.toml
@@ -80,6 +80,18 @@ reranker = [
    "transformers>=4.36",
 ]

+# SPLADE sparse retrieval
+splade = [
+    "transformers>=4.36",
+    "optimum[onnxruntime]>=1.16",
+]
+
+# SPLADE with GPU acceleration (CUDA)
+splade-gpu = [
+    "transformers>=4.36",
+    "optimum[onnxruntime-gpu]>=1.16",
+]
+
 # Encoding detection for non-UTF8 files
 encoding = [
    "chardet>=5.0",