#!/usr/bin/env python """Compare search results: Hybrid vs Cascade with Reranker.""" import subprocess import sys import os import re import json os.chdir(r"D:\dongdiankaifa9\hydro_generator_module") query = "热网络计算" ansi_escape = re.compile(r'\x1b\[[0-9;]*m') def run_search(method: str) -> dict: """Run search and return parsed result dict.""" cmd = [sys.executable, "-m", "codexlens", "search", query, "--method", method, "--limit", "10", "--json"] result = subprocess.run(cmd, capture_output=True, text=True, encoding="utf-8") # Strip ANSI codes output = ansi_escape.sub('', result.stdout + result.stderr) # Find and parse JSON (properly handle nested structures) start = output.find('{') if start < 0: return {"success": False, "error": "No JSON found"} # Count braces properly, handling strings in_string = False escaped = False depth = 0 end_idx = start for i, c in enumerate(output[start:]): if escaped: escaped = False continue if c == '\\': escaped = True continue if c == '"' and not escaped: in_string = not in_string continue if not in_string: if c == '{': depth += 1 elif c == '}': depth -= 1 if depth == 0: end_idx = start + i + 1 break try: return json.loads(output[start:end_idx]) except Exception as e: return {"success": False, "error": str(e)} print("=" * 75) print(f"搜索对比: Hybrid vs Cascade") print(f"查询: {query}") print("=" * 75) # Hybrid search (no cross-encoder reranking) print("\n[1] Hybrid 搜索 (无 Cross-Encoder Reranker):") print("-" * 75) hybrid_result = run_search("hybrid") hybrid_files = [] if hybrid_result.get("success"): results = hybrid_result.get("result", {}).get("results", [])[:10] for i, r in enumerate(results, 1): name = os.path.basename(r.get("path", "")) score = r.get("score", 0) hybrid_files.append(name) print(f"{i:2}. {name:<45} score={score:.4f}") else: print("搜索失败:", hybrid_result.get("error")) # Cascade search (with cross-encoder reranking when strategy=hybrid) print("\n[2] Cascade 搜索 (使用 Cross-Encoder Reranker):") print("-" * 75) cascade_result = run_search("cascade") cascade_files = [] if cascade_result.get("success"): results = cascade_result.get("result", {}).get("results", [])[:10] for i, r in enumerate(results, 1): name = os.path.basename(r.get("path", "")) score = r.get("score", 0) cascade_files.append(name) print(f"{i:2}. {name:<45} score={score:.4f}") else: print("搜索失败:", cascade_result.get("error")) # Compare ranking changes print("\n[3] 排名变化分析:") print("-" * 75) changes = [] for i, name in enumerate(cascade_files): if name in hybrid_files: old_pos = hybrid_files.index(name) + 1 new_pos = i + 1 if old_pos != new_pos: direction = "↑" if new_pos < old_pos else "↓" changes.append(f" {name}: #{old_pos} → #{new_pos} {direction}") else: changes.append(f" {name}: NEW (不在 Hybrid 前10)") if changes: print("Reranker 排序变化:") for c in changes: print(c) else: print("排序相同 (无变化)") print("\n" + "=" * 75) print("配置说明:") print("- Hybrid: FTS + Vector 融合 (无二次精排)") print("- Cascade: 粗筛 + Cross-Encoder Reranker 精排") print("- Reranker: Qwen/Qwen3-Reranker-8B via SiliconFlow API") print("=" * 75)