mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-05 01:50:27 +08:00
feat: Add code analysis and LLM action templates with detailed configurations and examples
- Introduced a comprehensive code analysis action template for integrating code exploration and analysis capabilities. - Added LLM action template for seamless integration of LLM calls with customizable prompts and tools. - Implemented a benchmark search script to compare multiple search methods across various dimensions including speed, result quality, ranking stability, and coverage. - Provided preset configurations for common analysis tasks and LLM actions, enhancing usability and flexibility.
This commit is contained in:
330
benchmark_search.py
Normal file
330
benchmark_search.py
Normal file
@@ -0,0 +1,330 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Multi-dimensional search benchmark: Compare search methods across multiple queries.
|
||||
|
||||
Dimensions:
|
||||
1. Speed (time_ms)
|
||||
2. Result Quality (relevance score distribution)
|
||||
3. Ranking Stability (position changes vs baseline)
|
||||
4. Coverage (unique files found)
|
||||
"""
|
||||
import subprocess
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
import time
|
||||
import io
|
||||
|
||||
# Fix Windows console encoding
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
|
||||
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Dict, Any, Optional
|
||||
from pathlib import Path
|
||||
|
||||
os.chdir(r"D:\dongdiankaifa9\hydro_generator_module")
|
||||
|
||||
# Test queries covering different search intents
|
||||
TEST_QUERIES = [
|
||||
("热网络计算", "Chinese: thermal network calculation"),
|
||||
("ThermalResistance", "Code identifier"),
|
||||
("boundary condition handling", "Natural language"),
|
||||
("stator slot cooling", "Domain-specific"),
|
||||
("def build", "Code pattern"),
|
||||
]
|
||||
|
||||
# Search methods to compare
|
||||
SEARCH_METHODS = [
|
||||
("hybrid", None, "Hybrid (FTS+Vector RRF)"),
|
||||
("vector", None, "Pure Vector"),
|
||||
("cascade", "binary", "Cascade Binary"),
|
||||
("cascade", "hybrid", "Cascade Hybrid (Cross-Encoder)"),
|
||||
]
|
||||
|
||||
ansi_escape = re.compile(r'\x1b\[[0-9;]*m')
|
||||
|
||||
|
||||
@dataclass
|
||||
class SearchResult:
|
||||
method: str
|
||||
strategy: Optional[str]
|
||||
query: str
|
||||
time_ms: float
|
||||
count: int
|
||||
top_files: List[str]
|
||||
top_scores: List[float]
|
||||
success: bool
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
def run_search(query: str, method: str, strategy: Optional[str] = None, limit: int = 10) -> SearchResult:
|
||||
"""Run a search and return structured result."""
|
||||
cmd = [sys.executable, "-m", "codexlens", "search", query,
|
||||
"--method", method, "--limit", str(limit), "--json"]
|
||||
|
||||
if strategy and method == "cascade":
|
||||
cmd.extend(["--cascade-strategy", strategy])
|
||||
|
||||
start = time.perf_counter()
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, encoding="utf-8")
|
||||
elapsed = (time.perf_counter() - start) * 1000
|
||||
|
||||
# Strip ANSI codes
|
||||
output = ansi_escape.sub('', result.stdout + result.stderr)
|
||||
|
||||
# Parse JSON
|
||||
start_idx = output.find('{')
|
||||
if start_idx < 0:
|
||||
return SearchResult(
|
||||
method=method, strategy=strategy, query=query,
|
||||
time_ms=elapsed, count=0, top_files=[], top_scores=[],
|
||||
success=False, error="No JSON found"
|
||||
)
|
||||
|
||||
# Parse nested JSON properly
|
||||
in_string = False
|
||||
escaped = False
|
||||
depth = 0
|
||||
end_idx = start_idx
|
||||
|
||||
for i, c in enumerate(output[start_idx:]):
|
||||
if escaped:
|
||||
escaped = False
|
||||
continue
|
||||
if c == '\\':
|
||||
escaped = True
|
||||
continue
|
||||
if c == '"' and not escaped:
|
||||
in_string = not in_string
|
||||
continue
|
||||
if not in_string:
|
||||
if c == '{':
|
||||
depth += 1
|
||||
elif c == '}':
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
end_idx = start_idx + i + 1
|
||||
break
|
||||
|
||||
try:
|
||||
data = json.loads(output[start_idx:end_idx])
|
||||
if not data.get("success"):
|
||||
return SearchResult(
|
||||
method=method, strategy=strategy, query=query,
|
||||
time_ms=elapsed, count=0, top_files=[], top_scores=[],
|
||||
success=False, error=data.get("error", "Unknown error")
|
||||
)
|
||||
|
||||
results = data.get("result", {}).get("results", [])[:limit]
|
||||
stats = data.get("result", {}).get("stats", {})
|
||||
|
||||
top_files = [os.path.basename(r.get("path", "")) for r in results]
|
||||
top_scores = [r.get("score", 0) for r in results]
|
||||
|
||||
return SearchResult(
|
||||
method=method, strategy=strategy, query=query,
|
||||
time_ms=stats.get("time_ms", elapsed),
|
||||
count=len(results),
|
||||
top_files=top_files,
|
||||
top_scores=top_scores,
|
||||
success=True
|
||||
)
|
||||
except Exception as e:
|
||||
return SearchResult(
|
||||
method=method, strategy=strategy, query=query,
|
||||
time_ms=elapsed, count=0, top_files=[], top_scores=[],
|
||||
success=False, error=str(e)
|
||||
)
|
||||
|
||||
|
||||
def calculate_ranking_similarity(baseline: List[str], candidate: List[str]) -> float:
|
||||
"""Calculate ranking similarity using normalized DCG."""
|
||||
if not baseline or not candidate:
|
||||
return 0.0
|
||||
|
||||
# Simple overlap-based similarity with position weighting
|
||||
score = 0.0
|
||||
for i, file in enumerate(candidate[:10]):
|
||||
if file in baseline:
|
||||
baseline_pos = baseline.index(file)
|
||||
# Weight by position similarity
|
||||
pos_diff = abs(i - baseline_pos)
|
||||
score += 1.0 / (1 + pos_diff * 0.2)
|
||||
|
||||
return score / min(len(baseline), 10)
|
||||
|
||||
|
||||
def print_divider(char="=", width=80):
|
||||
print(char * width)
|
||||
|
||||
|
||||
def main():
|
||||
print_divider()
|
||||
print("🔬 CodexLens 搜索方法多维度对比测试")
|
||||
print_divider()
|
||||
print(f"测试目录: {os.getcwd()}")
|
||||
print(f"测试查询数: {len(TEST_QUERIES)}")
|
||||
print(f"对比方法数: {len(SEARCH_METHODS)}")
|
||||
print_divider()
|
||||
|
||||
all_results: Dict[str, Dict[str, SearchResult]] = {}
|
||||
|
||||
# Run all tests
|
||||
for query, query_desc in TEST_QUERIES:
|
||||
print(f"\n📝 查询: \"{query}\" ({query_desc})")
|
||||
print("-" * 60)
|
||||
|
||||
all_results[query] = {}
|
||||
|
||||
for method, strategy, method_name in SEARCH_METHODS:
|
||||
method_key = f"{method}_{strategy}" if strategy else method
|
||||
print(f" ⏳ {method_name}...", end=" ", flush=True)
|
||||
|
||||
result = run_search(query, method, strategy)
|
||||
all_results[query][method_key] = result
|
||||
|
||||
if result.success:
|
||||
print(f"✓ {result.time_ms:.0f}ms, {result.count} results")
|
||||
else:
|
||||
print(f"✗ {result.error}")
|
||||
|
||||
# === Analysis ===
|
||||
print("\n")
|
||||
print_divider()
|
||||
print("📊 综合分析报告")
|
||||
print_divider()
|
||||
|
||||
# 1. Speed Comparison
|
||||
print("\n### 1️⃣ 速度对比 (平均耗时 ms)")
|
||||
print("-" * 60)
|
||||
|
||||
method_times: Dict[str, List[float]] = {f"{m}_{s}" if s else m: [] for m, s, _ in SEARCH_METHODS}
|
||||
|
||||
for query in all_results:
|
||||
for method_key, result in all_results[query].items():
|
||||
if result.success:
|
||||
method_times[method_key].append(result.time_ms)
|
||||
|
||||
speed_ranking = []
|
||||
for method, strategy, method_name in SEARCH_METHODS:
|
||||
method_key = f"{method}_{strategy}" if strategy else method
|
||||
times = method_times[method_key]
|
||||
if times:
|
||||
avg_time = sum(times) / len(times)
|
||||
min_time = min(times)
|
||||
max_time = max(times)
|
||||
speed_ranking.append((method_name, avg_time, min_time, max_time))
|
||||
|
||||
speed_ranking.sort(key=lambda x: x[1])
|
||||
|
||||
print(f"{'方法':<35} {'平均':>10} {'最快':>10} {'最慢':>10}")
|
||||
print("-" * 65)
|
||||
for method_name, avg, min_t, max_t in speed_ranking:
|
||||
print(f"{method_name:<35} {avg:>10.0f} {min_t:>10.0f} {max_t:>10.0f}")
|
||||
|
||||
# Speed winner
|
||||
if speed_ranking:
|
||||
fastest = speed_ranking[0]
|
||||
slowest = speed_ranking[-1]
|
||||
speedup = slowest[1] / fastest[1] if fastest[1] > 0 else 0
|
||||
print(f"\n🏆 最快: {fastest[0]} (比最慢快 {speedup:.1f}x)")
|
||||
|
||||
# 2. Score Distribution
|
||||
print("\n### 2️⃣ 相关性得分分布 (Top-10 平均分)")
|
||||
print("-" * 60)
|
||||
|
||||
method_scores: Dict[str, List[float]] = {f"{m}_{s}" if s else m: [] for m, s, _ in SEARCH_METHODS}
|
||||
|
||||
for query in all_results:
|
||||
for method_key, result in all_results[query].items():
|
||||
if result.success and result.top_scores:
|
||||
avg_score = sum(result.top_scores) / len(result.top_scores)
|
||||
method_scores[method_key].append(avg_score)
|
||||
|
||||
print(f"{'方法':<35} {'平均分':>12} {'分布范围':>20}")
|
||||
print("-" * 67)
|
||||
for method, strategy, method_name in SEARCH_METHODS:
|
||||
method_key = f"{method}_{strategy}" if strategy else method
|
||||
scores = method_scores[method_key]
|
||||
if scores:
|
||||
avg_score = sum(scores) / len(scores)
|
||||
min_score = min(scores)
|
||||
max_score = max(scores)
|
||||
print(f"{method_name:<35} {avg_score:>12.4f} {min_score:.4f} - {max_score:.4f}")
|
||||
|
||||
# 3. Ranking Stability (vs Hybrid as baseline)
|
||||
print("\n### 3️⃣ 排名稳定性 (与 Hybrid 基线对比)")
|
||||
print("-" * 60)
|
||||
|
||||
print(f"{'方法':<35} {'相似度':>12} {'说明':>20}")
|
||||
print("-" * 67)
|
||||
|
||||
for method, strategy, method_name in SEARCH_METHODS:
|
||||
method_key = f"{method}_{strategy}" if strategy else method
|
||||
if method_key == "hybrid":
|
||||
print(f"{method_name:<35} {'1.0000':>12} {'(基线)':>20}")
|
||||
continue
|
||||
|
||||
similarities = []
|
||||
for query in all_results:
|
||||
baseline = all_results[query].get("hybrid")
|
||||
candidate = all_results[query].get(method_key)
|
||||
if baseline and candidate and baseline.success and candidate.success:
|
||||
sim = calculate_ranking_similarity(baseline.top_files, candidate.top_files)
|
||||
similarities.append(sim)
|
||||
|
||||
if similarities:
|
||||
avg_sim = sum(similarities) / len(similarities)
|
||||
diff_level = "高度一致" if avg_sim > 0.7 else "中度差异" if avg_sim > 0.4 else "显著差异"
|
||||
print(f"{method_name:<35} {avg_sim:>12.4f} {diff_level:>20}")
|
||||
|
||||
# 4. Detailed Query Comparison
|
||||
print("\n### 4️⃣ 各查询详细对比")
|
||||
print("-" * 60)
|
||||
|
||||
for query, query_desc in TEST_QUERIES:
|
||||
print(f"\n📌 \"{query}\" ({query_desc})")
|
||||
print()
|
||||
|
||||
# Show top-3 results for each method
|
||||
for method, strategy, method_name in SEARCH_METHODS:
|
||||
method_key = f"{method}_{strategy}" if strategy else method
|
||||
result = all_results[query].get(method_key)
|
||||
|
||||
if result and result.success:
|
||||
print(f" [{method_name}] {result.time_ms:.0f}ms")
|
||||
for i, (file, score) in enumerate(zip(result.top_files[:3], result.top_scores[:3]), 1):
|
||||
print(f" {i}. {file:<40} {score:.4f}")
|
||||
else:
|
||||
print(f" [{method_name}] 失败: {result.error if result else 'N/A'}")
|
||||
print()
|
||||
|
||||
# 5. Summary
|
||||
print_divider()
|
||||
print("📋 总结")
|
||||
print_divider()
|
||||
|
||||
print("""
|
||||
┌─────────────────────────────────────────────────────────────────────┐
|
||||
│ 方法特点总结 │
|
||||
├─────────────────────────────────────────────────────────────────────┤
|
||||
│ Hybrid (FTS+Vector) │ 基线方法,综合质量好,速度中等 │
|
||||
│ Pure Vector │ 语义理解强,适合自然语言查询 │
|
||||
│ Cascade Binary │ 速度最快,适合大代码库快速检索 │
|
||||
│ Cascade Hybrid │ Cross-Encoder 精排,质量最高但速度较慢 │
|
||||
└─────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
推荐使用场景:
|
||||
• 日常搜索: hybrid (默认)
|
||||
• 大代码库快速检索: cascade --cascade-strategy binary
|
||||
• 追求最高质量: cascade --cascade-strategy hybrid
|
||||
• 自然语言查询: vector
|
||||
""")
|
||||
|
||||
print_divider()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user