feat: Implement adaptive RRF weights and query intent detection

- Added integration tests for adaptive RRF weights in hybrid search.
- Enhanced query intent detection with new classifications: keyword, semantic, and mixed.
- Introduced symbol boosting in search results based on explicit symbol matches.
- Implemented embedding-based reranking with configurable options.
- Added global symbol index for efficient symbol lookups across projects.
- Improved file deletion handling on Windows to avoid permission errors.
- Updated chunk configuration to increase overlap for better context.
- Modified package.json test script to target specific test files.
- Created comprehensive writing style guidelines for documentation.
- Added TypeScript tests for query intent detection and adaptive weights.
- Established performance benchmarks for global symbol indexing.
This commit is contained in:
catlog22
2025-12-26 15:08:47 +08:00
parent ecd5085e51
commit 4061ae48c4
29 changed files with 2685 additions and 828 deletions

View File

@@ -3,6 +3,7 @@
import pytest
import sqlite3
import tempfile
import time
from pathlib import Path
from codexlens.search.hybrid_search import HybridSearchEngine
@@ -16,6 +17,22 @@ except ImportError:
SEMANTIC_DEPS_AVAILABLE = False
def _safe_unlink(path: Path, retries: int = 5, delay_s: float = 0.05) -> None:
"""Best-effort unlink for Windows where SQLite can keep files locked briefly."""
for attempt in range(retries):
try:
path.unlink()
return
except FileNotFoundError:
return
except PermissionError:
time.sleep(delay_s * (attempt + 1))
try:
path.unlink(missing_ok=True)
except (PermissionError, OSError):
pass
class TestPureVectorSearch:
"""Tests for pure vector search mode."""
@@ -48,7 +65,7 @@ class TestPureVectorSearch:
store.close()
if db_path.exists():
db_path.unlink()
_safe_unlink(db_path)
def test_pure_vector_without_embeddings(self, sample_db):
"""Test pure_vector mode returns empty when no embeddings exist."""
@@ -200,12 +217,8 @@ def login_handler(credentials: dict) -> bool:
yield db_path
store.close()
# Ignore file deletion errors on Windows (SQLite file lock)
try:
if db_path.exists():
db_path.unlink()
except PermissionError:
pass # Ignore Windows file lock errors
if db_path.exists():
_safe_unlink(db_path)
def test_pure_vector_with_embeddings(self, db_with_embeddings):
"""Test pure vector search returns results when embeddings exist."""
@@ -289,7 +302,7 @@ class TestSearchModeComparison:
store.close()
if db_path.exists():
db_path.unlink()
_safe_unlink(db_path)
def test_mode_comparison_without_embeddings(self, comparison_db):
"""Compare all search modes without embeddings."""