Add comprehensive tests for query parsing and Reciprocal Rank Fusion

- Implemented tests for the QueryParser class, covering various identifier splitting methods (CamelCase, snake_case, kebab-case), OR expansion, and FTS5 operator preservation. - Added parameterized tests to validate expected token outputs for different query formats. - Created edge case tests to ensure robustness against unusual input scenarios. - Developed tests for the Reciprocal Rank Fusion (RRF) algorithm, including score computation, weight handling, and result ranking across multiple sources. - Included tests for normalization of BM25 scores and tagging search results with source metadata.
2026-02-05 01:50:27 +08:00 · 2025-12-16 10:20:19 +08:00
parent 35485bbbb1
commit 3da0ef2adb
39 changed files with 6171 additions and 240 deletions
--- a/codex-lens/tests/fix_sql.py
+++ b/codex-lens/tests/fix_sql.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python3
+"""Fix SQL statements in test files to match new schema."""
+import re
+from pathlib import Path
+
+def fix_insert_statement(line):
+    """Fix INSERT statements to provide both name and full_path."""
+    # Match pattern: (test_path, test_content, "python")
+    # or ("test/file1.py", "content1", "python")
+    pattern = r'\(([^,]+),\s*([^,]+),\s*([^)]+)\)'
+    
+    def replace_values(match):
+        path_var, content_var, lang_var = match.groups()
+        # If it's a variable, we need to extract name from it
+        # For now, use path_var for both name and full_path
+        return f'({path_var}.split("/")[-1] if "/" in {path_var} else {path_var}, {path_var}, {content_var}, {lang_var}, 1234567890.0)'
+    
+    # Check if this is an INSERT VALUES line
+    if 'INSERT INTO files' in line and 'VALUES' in line:
+        # Simple string values like ("test/file1.py", "content1", "python")
+        if re.search(r'\("[^"]+",\s*"[^"]+",\s*"[^"]+"\)', line):
+            def replace_str_values(match):
+                parts = match.group(0)[1:-1].split('", "')
+                if len(parts) == 3:
+                    path = parts[0].strip('"')
+                    content = parts[1]
+                    lang = parts[2].strip('"')
+                    name = path.split('/')[-1]
+                    return f'("{name}", "{path}", "{content}", "{lang}", 1234567890.0)'
+                return match.group(0)
+            
+            line = re.sub(r'\("[^"]+",\s*"[^"]+",\s*"[^"]+"\)', replace_str_values, line)
+    
+    return line
+
+def main():
+    test_files = [
+        Path("test_dual_fts.py"),
+        Path("test_incremental_indexing.py"),
+        Path("test_hybrid_search_e2e.py")
+    ]
+    
+    for test_file in test_files:
+        if not test_file.exists():
+            continue
+            
+        lines = test_file.read_text(encoding='utf-8').splitlines(keepends=True)
+        
+        # Fix tuple values in execute calls
+        new_lines = []
+        i = 0
+        while i < len(lines):
+            line = lines[i]
+            
+            # Check if this is an execute with VALUES and tuple on next line
+            if 'conn.execute(' in line or 'conn.executemany(' in line:
+                # Look ahead for VALUES pattern
+                if i + 2 < len(lines) and 'VALUES' in lines[i+1]:
+                    # Check for tuple pattern on line after VALUES
+                    if i + 2 < len(lines) and re.search(r'^\s*\([^)]+\)\s*$', lines[i+2]):
+                        tuple_line = lines[i+2]
+                        # Extract values: (test_path, test_content, "python")
+                        match = re.search(r'\(([^,]+),\s*([^,]+),\s*"([^"]+)"\)', tuple_line)
+                        if match:
+                            var1, var2, var3 = match.groups()
+                            var1 = var1.strip()
+                            var2 = var2.strip()
+                            # Create new tuple with name extraction
+                            indent = re.match(r'^(\s*)', tuple_line).group(1)
+                            new_tuple = f'{indent}({var1}.split("/")[-1], {var1}, {var2}, "{var3}", 1234567890.0)\n'
+                            new_lines.append(line)
+                            new_lines.append(lines[i+1])
+                            new_lines.append(new_tuple)
+                            i += 3
+                            continue
+            
+            new_lines.append(line)
+            i += 1
+        
+        test_file.write_text(''.join(new_lines), encoding='utf-8')
+        print(f"Fixed {test_file}")
+
+if __name__ == "__main__":
+    main()