#!/usr/bin/env python # -*- coding: utf-8 -*- """ Multi-dimensional search benchmark: Compare search methods across multiple queries. Dimensions: 1. Speed (time_ms) 2. Result Quality (relevance score distribution) 3. Ranking Stability (position changes vs baseline) 4. Coverage (unique files found) """ import subprocess import sys import os import re import json import time import io # Fix Windows console encoding sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace') sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace') from dataclasses import dataclass, field from typing import List, Dict, Any, Optional from pathlib import Path os.chdir(r"D:\dongdiankaifa9\hydro_generator_module") # Test queries covering different search intents TEST_QUERIES = [ ("热网络计算", "Chinese: thermal network calculation"), ("ThermalResistance", "Code identifier"), ("boundary condition handling", "Natural language"), ("stator slot cooling", "Domain-specific"), ("def build", "Code pattern"), ] # Search methods to compare SEARCH_METHODS = [ ("hybrid", None, "Hybrid (FTS+Vector RRF)"), ("vector", None, "Pure Vector"), ("cascade", "binary", "Cascade Binary"), ("cascade", "hybrid", "Cascade Hybrid (Cross-Encoder)"), ] ansi_escape = re.compile(r'\x1b\[[0-9;]*m') @dataclass class SearchResult: method: str strategy: Optional[str] query: str time_ms: float count: int top_files: List[str] top_scores: List[float] success: bool error: Optional[str] = None def run_search(query: str, method: str, strategy: Optional[str] = None, limit: int = 10) -> SearchResult: """Run a search and return structured result.""" cmd = [sys.executable, "-m", "codexlens", "search", query, "--method", method, "--limit", str(limit), "--json"] if strategy and method == "cascade": cmd.extend(["--cascade-strategy", strategy]) start = time.perf_counter() result = subprocess.run(cmd, capture_output=True, text=True, encoding="utf-8") elapsed = (time.perf_counter() - start) * 1000 # Strip ANSI codes output = ansi_escape.sub('', result.stdout + result.stderr) # Parse JSON start_idx = output.find('{') if start_idx < 0: return SearchResult( method=method, strategy=strategy, query=query, time_ms=elapsed, count=0, top_files=[], top_scores=[], success=False, error="No JSON found" ) # Parse nested JSON properly in_string = False escaped = False depth = 0 end_idx = start_idx for i, c in enumerate(output[start_idx:]): if escaped: escaped = False continue if c == '\\': escaped = True continue if c == '"' and not escaped: in_string = not in_string continue if not in_string: if c == '{': depth += 1 elif c == '}': depth -= 1 if depth == 0: end_idx = start_idx + i + 1 break try: data = json.loads(output[start_idx:end_idx]) if not data.get("success"): return SearchResult( method=method, strategy=strategy, query=query, time_ms=elapsed, count=0, top_files=[], top_scores=[], success=False, error=data.get("error", "Unknown error") ) results = data.get("result", {}).get("results", [])[:limit] stats = data.get("result", {}).get("stats", {}) top_files = [os.path.basename(r.get("path", "")) for r in results] top_scores = [r.get("score", 0) for r in results] return SearchResult( method=method, strategy=strategy, query=query, time_ms=stats.get("time_ms", elapsed), count=len(results), top_files=top_files, top_scores=top_scores, success=True ) except Exception as e: return SearchResult( method=method, strategy=strategy, query=query, time_ms=elapsed, count=0, top_files=[], top_scores=[], success=False, error=str(e) ) def calculate_ranking_similarity(baseline: List[str], candidate: List[str]) -> float: """Calculate ranking similarity using normalized DCG.""" if not baseline or not candidate: return 0.0 # Simple overlap-based similarity with position weighting score = 0.0 for i, file in enumerate(candidate[:10]): if file in baseline: baseline_pos = baseline.index(file) # Weight by position similarity pos_diff = abs(i - baseline_pos) score += 1.0 / (1 + pos_diff * 0.2) return score / min(len(baseline), 10) def print_divider(char="=", width=80): print(char * width) def main(): print_divider() print("🔬 CodexLens 搜索方法多维度对比测试") print_divider() print(f"测试目录: {os.getcwd()}") print(f"测试查询数: {len(TEST_QUERIES)}") print(f"对比方法数: {len(SEARCH_METHODS)}") print_divider() all_results: Dict[str, Dict[str, SearchResult]] = {} # Run all tests for query, query_desc in TEST_QUERIES: print(f"\n📝 查询: \"{query}\" ({query_desc})") print("-" * 60) all_results[query] = {} for method, strategy, method_name in SEARCH_METHODS: method_key = f"{method}_{strategy}" if strategy else method print(f" ⏳ {method_name}...", end=" ", flush=True) result = run_search(query, method, strategy) all_results[query][method_key] = result if result.success: print(f"✓ {result.time_ms:.0f}ms, {result.count} results") else: print(f"✗ {result.error}") # === Analysis === print("\n") print_divider() print("📊 综合分析报告") print_divider() # 1. Speed Comparison print("\n### 1️⃣ 速度对比 (平均耗时 ms)") print("-" * 60) method_times: Dict[str, List[float]] = {f"{m}_{s}" if s else m: [] for m, s, _ in SEARCH_METHODS} for query in all_results: for method_key, result in all_results[query].items(): if result.success: method_times[method_key].append(result.time_ms) speed_ranking = [] for method, strategy, method_name in SEARCH_METHODS: method_key = f"{method}_{strategy}" if strategy else method times = method_times[method_key] if times: avg_time = sum(times) / len(times) min_time = min(times) max_time = max(times) speed_ranking.append((method_name, avg_time, min_time, max_time)) speed_ranking.sort(key=lambda x: x[1]) print(f"{'方法':<35} {'平均':>10} {'最快':>10} {'最慢':>10}") print("-" * 65) for method_name, avg, min_t, max_t in speed_ranking: print(f"{method_name:<35} {avg:>10.0f} {min_t:>10.0f} {max_t:>10.0f}") # Speed winner if speed_ranking: fastest = speed_ranking[0] slowest = speed_ranking[-1] speedup = slowest[1] / fastest[1] if fastest[1] > 0 else 0 print(f"\n🏆 最快: {fastest[0]} (比最慢快 {speedup:.1f}x)") # 2. Score Distribution print("\n### 2️⃣ 相关性得分分布 (Top-10 平均分)") print("-" * 60) method_scores: Dict[str, List[float]] = {f"{m}_{s}" if s else m: [] for m, s, _ in SEARCH_METHODS} for query in all_results: for method_key, result in all_results[query].items(): if result.success and result.top_scores: avg_score = sum(result.top_scores) / len(result.top_scores) method_scores[method_key].append(avg_score) print(f"{'方法':<35} {'平均分':>12} {'分布范围':>20}") print("-" * 67) for method, strategy, method_name in SEARCH_METHODS: method_key = f"{method}_{strategy}" if strategy else method scores = method_scores[method_key] if scores: avg_score = sum(scores) / len(scores) min_score = min(scores) max_score = max(scores) print(f"{method_name:<35} {avg_score:>12.4f} {min_score:.4f} - {max_score:.4f}") # 3. Ranking Stability (vs Hybrid as baseline) print("\n### 3️⃣ 排名稳定性 (与 Hybrid 基线对比)") print("-" * 60) print(f"{'方法':<35} {'相似度':>12} {'说明':>20}") print("-" * 67) for method, strategy, method_name in SEARCH_METHODS: method_key = f"{method}_{strategy}" if strategy else method if method_key == "hybrid": print(f"{method_name:<35} {'1.0000':>12} {'(基线)':>20}") continue similarities = [] for query in all_results: baseline = all_results[query].get("hybrid") candidate = all_results[query].get(method_key) if baseline and candidate and baseline.success and candidate.success: sim = calculate_ranking_similarity(baseline.top_files, candidate.top_files) similarities.append(sim) if similarities: avg_sim = sum(similarities) / len(similarities) diff_level = "高度一致" if avg_sim > 0.7 else "中度差异" if avg_sim > 0.4 else "显著差异" print(f"{method_name:<35} {avg_sim:>12.4f} {diff_level:>20}") # 4. Detailed Query Comparison print("\n### 4️⃣ 各查询详细对比") print("-" * 60) for query, query_desc in TEST_QUERIES: print(f"\n📌 \"{query}\" ({query_desc})") print() # Show top-3 results for each method for method, strategy, method_name in SEARCH_METHODS: method_key = f"{method}_{strategy}" if strategy else method result = all_results[query].get(method_key) if result and result.success: print(f" [{method_name}] {result.time_ms:.0f}ms") for i, (file, score) in enumerate(zip(result.top_files[:3], result.top_scores[:3]), 1): print(f" {i}. {file:<40} {score:.4f}") else: print(f" [{method_name}] 失败: {result.error if result else 'N/A'}") print() # 5. Summary print_divider() print("📋 总结") print_divider() print(""" ┌─────────────────────────────────────────────────────────────────────┐ │ 方法特点总结 │ ├─────────────────────────────────────────────────────────────────────┤ │ Hybrid (FTS+Vector) │ 基线方法,综合质量好,速度中等 │ │ Pure Vector │ 语义理解强,适合自然语言查询 │ │ Cascade Binary │ 速度最快,适合大代码库快速检索 │ │ Cascade Hybrid │ Cross-Encoder 精排,质量最高但速度较慢 │ └─────────────────────────────────────────────────────────────────────┘ 推荐使用场景: • 日常搜索: hybrid (默认) • 大代码库快速检索: cascade --cascade-strategy binary • 追求最高质量: cascade --cascade-strategy hybrid • 自然语言查询: vector """) print_divider() if __name__ == "__main__": main()