Implement search and reranking functionality with FTS and embedding support

- Added BaseReranker abstract class for defining reranking interfaces.
- Implemented FastEmbedReranker using fastembed's TextCrossEncoder for scoring document-query pairs.
- Introduced FTSEngine for full-text search capabilities using SQLite FTS5.
- Developed SearchPipeline to integrate embedding, binary search, ANN indexing, FTS, and reranking.
- Added fusion methods for combining results from different search strategies using Reciprocal Rank Fusion.
- Created unit and integration tests for the new search and reranking components.
- Established configuration management for search parameters and models.
This commit is contained in:
catlog22
2026-03-16 23:03:17 +08:00
parent 5a4b18d9b1
commit de4158597b
41 changed files with 2655 additions and 1848 deletions

View File

@@ -0,0 +1,36 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "codex-lens-v2"
version = "0.1.0"
description = "Minimal code semantic search library with 2-stage pipeline"
requires-python = ">=3.10"
dependencies = []
[project.optional-dependencies]
semantic = [
"hnswlib>=0.8.0",
"numpy>=1.26",
"fastembed>=0.4.0,<2.0",
]
gpu = [
"onnxruntime-gpu>=1.16",
]
faiss-cpu = [
"faiss-cpu>=1.7.4",
]
faiss-gpu = [
"faiss-gpu>=1.7.4",
]
reranker-api = [
"httpx>=0.25",
]
dev = [
"pytest>=7.0",
"pytest-cov",
]
[tool.hatch.build.targets.wheel]
packages = ["src/codexlens"]