Files
Claude-Code-Workflow/archive/benchmark_search.py
catlog22 874b70726d chore: archive unused test scripts and temporary documents
- Moved 6 empty test files (*.test.ts) to archive/
- Moved 5 Python test scripts (*.py) to archive/
- Moved 5 outdated/temporary documents to archive/
- Cleaned up root directory for better organization
2026-01-24 21:26:03 +08:00

331 lines
12 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Multi-dimensional search benchmark: Compare search methods across multiple queries.
Dimensions:
1. Speed (time_ms)
2. Result Quality (relevance score distribution)
3. Ranking Stability (position changes vs baseline)
4. Coverage (unique files found)
"""
import subprocess
import sys
import os
import re
import json
import time
import io
# Fix Windows console encoding
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
from dataclasses import dataclass, field
from typing import List, Dict, Any, Optional
from pathlib import Path
os.chdir(r"D:\dongdiankaifa9\hydro_generator_module")
# Test queries covering different search intents
TEST_QUERIES = [
("热网络计算", "Chinese: thermal network calculation"),
("ThermalResistance", "Code identifier"),
("boundary condition handling", "Natural language"),
("stator slot cooling", "Domain-specific"),
("def build", "Code pattern"),
]
# Search methods to compare
SEARCH_METHODS = [
("hybrid", None, "Hybrid (FTS+Vector RRF)"),
("vector", None, "Pure Vector"),
("cascade", "binary", "Cascade Binary"),
("cascade", "hybrid", "Cascade Hybrid (Cross-Encoder)"),
]
ansi_escape = re.compile(r'\x1b\[[0-9;]*m')
@dataclass
class SearchResult:
method: str
strategy: Optional[str]
query: str
time_ms: float
count: int
top_files: List[str]
top_scores: List[float]
success: bool
error: Optional[str] = None
def run_search(query: str, method: str, strategy: Optional[str] = None, limit: int = 10) -> SearchResult:
"""Run a search and return structured result."""
cmd = [sys.executable, "-m", "codexlens", "search", query,
"--method", method, "--limit", str(limit), "--json"]
if strategy and method == "cascade":
cmd.extend(["--cascade-strategy", strategy])
start = time.perf_counter()
result = subprocess.run(cmd, capture_output=True, text=True, encoding="utf-8")
elapsed = (time.perf_counter() - start) * 1000
# Strip ANSI codes
output = ansi_escape.sub('', result.stdout + result.stderr)
# Parse JSON
start_idx = output.find('{')
if start_idx < 0:
return SearchResult(
method=method, strategy=strategy, query=query,
time_ms=elapsed, count=0, top_files=[], top_scores=[],
success=False, error="No JSON found"
)
# Parse nested JSON properly
in_string = False
escaped = False
depth = 0
end_idx = start_idx
for i, c in enumerate(output[start_idx:]):
if escaped:
escaped = False
continue
if c == '\\':
escaped = True
continue
if c == '"' and not escaped:
in_string = not in_string
continue
if not in_string:
if c == '{':
depth += 1
elif c == '}':
depth -= 1
if depth == 0:
end_idx = start_idx + i + 1
break
try:
data = json.loads(output[start_idx:end_idx])
if not data.get("success"):
return SearchResult(
method=method, strategy=strategy, query=query,
time_ms=elapsed, count=0, top_files=[], top_scores=[],
success=False, error=data.get("error", "Unknown error")
)
results = data.get("result", {}).get("results", [])[:limit]
stats = data.get("result", {}).get("stats", {})
top_files = [os.path.basename(r.get("path", "")) for r in results]
top_scores = [r.get("score", 0) for r in results]
return SearchResult(
method=method, strategy=strategy, query=query,
time_ms=stats.get("time_ms", elapsed),
count=len(results),
top_files=top_files,
top_scores=top_scores,
success=True
)
except Exception as e:
return SearchResult(
method=method, strategy=strategy, query=query,
time_ms=elapsed, count=0, top_files=[], top_scores=[],
success=False, error=str(e)
)
def calculate_ranking_similarity(baseline: List[str], candidate: List[str]) -> float:
"""Calculate ranking similarity using normalized DCG."""
if not baseline or not candidate:
return 0.0
# Simple overlap-based similarity with position weighting
score = 0.0
for i, file in enumerate(candidate[:10]):
if file in baseline:
baseline_pos = baseline.index(file)
# Weight by position similarity
pos_diff = abs(i - baseline_pos)
score += 1.0 / (1 + pos_diff * 0.2)
return score / min(len(baseline), 10)
def print_divider(char="=", width=80):
print(char * width)
def main():
print_divider()
print("🔬 CodexLens 搜索方法多维度对比测试")
print_divider()
print(f"测试目录: {os.getcwd()}")
print(f"测试查询数: {len(TEST_QUERIES)}")
print(f"对比方法数: {len(SEARCH_METHODS)}")
print_divider()
all_results: Dict[str, Dict[str, SearchResult]] = {}
# Run all tests
for query, query_desc in TEST_QUERIES:
print(f"\n📝 查询: \"{query}\" ({query_desc})")
print("-" * 60)
all_results[query] = {}
for method, strategy, method_name in SEARCH_METHODS:
method_key = f"{method}_{strategy}" if strategy else method
print(f"{method_name}...", end=" ", flush=True)
result = run_search(query, method, strategy)
all_results[query][method_key] = result
if result.success:
print(f"{result.time_ms:.0f}ms, {result.count} results")
else:
print(f"{result.error}")
# === Analysis ===
print("\n")
print_divider()
print("📊 综合分析报告")
print_divider()
# 1. Speed Comparison
print("\n### 1⃣ 速度对比 (平均耗时 ms)")
print("-" * 60)
method_times: Dict[str, List[float]] = {f"{m}_{s}" if s else m: [] for m, s, _ in SEARCH_METHODS}
for query in all_results:
for method_key, result in all_results[query].items():
if result.success:
method_times[method_key].append(result.time_ms)
speed_ranking = []
for method, strategy, method_name in SEARCH_METHODS:
method_key = f"{method}_{strategy}" if strategy else method
times = method_times[method_key]
if times:
avg_time = sum(times) / len(times)
min_time = min(times)
max_time = max(times)
speed_ranking.append((method_name, avg_time, min_time, max_time))
speed_ranking.sort(key=lambda x: x[1])
print(f"{'方法':<35} {'平均':>10} {'最快':>10} {'最慢':>10}")
print("-" * 65)
for method_name, avg, min_t, max_t in speed_ranking:
print(f"{method_name:<35} {avg:>10.0f} {min_t:>10.0f} {max_t:>10.0f}")
# Speed winner
if speed_ranking:
fastest = speed_ranking[0]
slowest = speed_ranking[-1]
speedup = slowest[1] / fastest[1] if fastest[1] > 0 else 0
print(f"\n🏆 最快: {fastest[0]} (比最慢快 {speedup:.1f}x)")
# 2. Score Distribution
print("\n### 2⃣ 相关性得分分布 (Top-10 平均分)")
print("-" * 60)
method_scores: Dict[str, List[float]] = {f"{m}_{s}" if s else m: [] for m, s, _ in SEARCH_METHODS}
for query in all_results:
for method_key, result in all_results[query].items():
if result.success and result.top_scores:
avg_score = sum(result.top_scores) / len(result.top_scores)
method_scores[method_key].append(avg_score)
print(f"{'方法':<35} {'平均分':>12} {'分布范围':>20}")
print("-" * 67)
for method, strategy, method_name in SEARCH_METHODS:
method_key = f"{method}_{strategy}" if strategy else method
scores = method_scores[method_key]
if scores:
avg_score = sum(scores) / len(scores)
min_score = min(scores)
max_score = max(scores)
print(f"{method_name:<35} {avg_score:>12.4f} {min_score:.4f} - {max_score:.4f}")
# 3. Ranking Stability (vs Hybrid as baseline)
print("\n### 3⃣ 排名稳定性 (与 Hybrid 基线对比)")
print("-" * 60)
print(f"{'方法':<35} {'相似度':>12} {'说明':>20}")
print("-" * 67)
for method, strategy, method_name in SEARCH_METHODS:
method_key = f"{method}_{strategy}" if strategy else method
if method_key == "hybrid":
print(f"{method_name:<35} {'1.0000':>12} {'(基线)':>20}")
continue
similarities = []
for query in all_results:
baseline = all_results[query].get("hybrid")
candidate = all_results[query].get(method_key)
if baseline and candidate and baseline.success and candidate.success:
sim = calculate_ranking_similarity(baseline.top_files, candidate.top_files)
similarities.append(sim)
if similarities:
avg_sim = sum(similarities) / len(similarities)
diff_level = "高度一致" if avg_sim > 0.7 else "中度差异" if avg_sim > 0.4 else "显著差异"
print(f"{method_name:<35} {avg_sim:>12.4f} {diff_level:>20}")
# 4. Detailed Query Comparison
print("\n### 4⃣ 各查询详细对比")
print("-" * 60)
for query, query_desc in TEST_QUERIES:
print(f"\n📌 \"{query}\" ({query_desc})")
print()
# Show top-3 results for each method
for method, strategy, method_name in SEARCH_METHODS:
method_key = f"{method}_{strategy}" if strategy else method
result = all_results[query].get(method_key)
if result and result.success:
print(f" [{method_name}] {result.time_ms:.0f}ms")
for i, (file, score) in enumerate(zip(result.top_files[:3], result.top_scores[:3]), 1):
print(f" {i}. {file:<40} {score:.4f}")
else:
print(f" [{method_name}] 失败: {result.error if result else 'N/A'}")
print()
# 5. Summary
print_divider()
print("📋 总结")
print_divider()
print("""
┌─────────────────────────────────────────────────────────────────────┐
│ 方法特点总结 │
├─────────────────────────────────────────────────────────────────────┤
│ Hybrid (FTS+Vector) │ 基线方法,综合质量好,速度中等 │
│ Pure Vector │ 语义理解强,适合自然语言查询 │
│ Cascade Binary │ 速度最快,适合大代码库快速检索 │
│ Cascade Hybrid │ Cross-Encoder 精排,质量最高但速度较慢 │
└─────────────────────────────────────────────────────────────────────┘
推荐使用场景:
• 日常搜索: hybrid (默认)
• 大代码库快速检索: cascade --cascade-strategy binary
• 追求最高质量: cascade --cascade-strategy hybrid
• 自然语言查询: vector
""")
print_divider()
if __name__ == "__main__":
main()