mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-11 02:33:51 +08:00
Add scripts for inspecting LLM summaries and testing misleading comments
- Implement `inspect_llm_summaries.py` to display LLM-generated summaries from the semantic_chunks table in the database. - Create `show_llm_analysis.py` to demonstrate LLM analysis of misleading code examples, highlighting discrepancies between comments and actual functionality. - Develop `test_misleading_comments.py` to compare pure vector search with LLM-enhanced search, focusing on the impact of misleading or missing comments on search results. - Introduce `test_llm_enhanced_search.py` to provide a test suite for evaluating the effectiveness of LLM-enhanced vector search against pure vector search. - Ensure all new scripts are integrated with the existing codebase and follow the established coding standards.
This commit is contained in:
112
codex-lens/scripts/show_llm_analysis.py
Normal file
112
codex-lens/scripts/show_llm_analysis.py
Normal file
@@ -0,0 +1,112 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Directly show LLM analysis of test code."""
|
||||
|
||||
from codexlens.semantic.llm_enhancer import LLMEnhancer, LLMConfig, FileData
|
||||
|
||||
# Misleading code example
|
||||
TEST_CODE = '''"""Email sending service."""
|
||||
import psycopg2
|
||||
from psycopg2 import pool
|
||||
from contextlib import contextmanager
|
||||
|
||||
class EmailSender:
|
||||
"""SMTP email sender with retry logic."""
|
||||
|
||||
def __init__(self, min_conn: int = 1, max_conn: int = 10):
|
||||
"""Initialize email sender."""
|
||||
self.pool = psycopg2.pool.SimpleConnectionPool(
|
||||
min_conn, max_conn,
|
||||
user='dbuser', host='localhost', database='myapp'
|
||||
)
|
||||
|
||||
@contextmanager
|
||||
def send_email(self):
|
||||
"""Send email message."""
|
||||
conn = self.pool.getconn()
|
||||
try:
|
||||
yield conn
|
||||
conn.commit()
|
||||
finally:
|
||||
self.pool.putconn(conn)
|
||||
'''
|
||||
|
||||
print("="*80)
|
||||
print("LLM ANALYSIS OF MISLEADING CODE")
|
||||
print("="*80)
|
||||
|
||||
print("\n[Original Code with Misleading Comments]")
|
||||
print("-"*80)
|
||||
print(TEST_CODE)
|
||||
print("-"*80)
|
||||
|
||||
print("\n[Actual Functionality]")
|
||||
print(" - Imports: psycopg2 (PostgreSQL library)")
|
||||
print(" - Class: EmailSender (but name is misleading!)")
|
||||
print(" - Actually: Creates PostgreSQL connection pool")
|
||||
print(" - Methods: send_email (actually gets DB connection)")
|
||||
|
||||
print("\n[Misleading Documentation]")
|
||||
print(" - Module docstring: 'Email sending service' (WRONG)")
|
||||
print(" - Class docstring: 'SMTP email sender' (WRONG)")
|
||||
print(" - Method docstring: 'Send email message' (WRONG)")
|
||||
|
||||
print("\n" + "="*80)
|
||||
print("TESTING LLM UNDERSTANDING")
|
||||
print("="*80)
|
||||
|
||||
# Test LLM analysis
|
||||
config = LLMConfig(enabled=True, tool="gemini", batch_size=1)
|
||||
enhancer = LLMEnhancer(config)
|
||||
|
||||
if not enhancer.check_available():
|
||||
print("\n[X] CCW CLI not available")
|
||||
print("Install: npm install -g ccw")
|
||||
exit(1)
|
||||
|
||||
print("\n[Calling Gemini to analyze code...]")
|
||||
file_data = FileData(path="db/pool.py", content=TEST_CODE, language="python")
|
||||
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
result = enhancer.enhance_files([file_data], Path(tmpdir))
|
||||
|
||||
if "db/pool.py" in result:
|
||||
metadata = result["db/pool.py"]
|
||||
|
||||
print("\n[LLM-Generated Summary]")
|
||||
print("-"*80)
|
||||
print(f"Summary: {metadata.summary}")
|
||||
print(f"\nPurpose: {metadata.purpose}")
|
||||
print(f"\nKeywords: {', '.join(metadata.keywords)}")
|
||||
print("-"*80)
|
||||
|
||||
print("\n[Analysis]")
|
||||
# Check if LLM identified the real functionality
|
||||
summary_lower = metadata.summary.lower()
|
||||
keywords_lower = [k.lower() for k in metadata.keywords]
|
||||
|
||||
correct_terms = ['database', 'postgresql', 'connection', 'pool', 'psycopg']
|
||||
misleading_terms = ['email', 'smtp', 'send']
|
||||
|
||||
found_correct = sum(1 for term in correct_terms
|
||||
if term in summary_lower or any(term in k for k in keywords_lower))
|
||||
found_misleading = sum(1 for term in misleading_terms
|
||||
if term in summary_lower or any(term in k for k in keywords_lower))
|
||||
|
||||
print(f"Correct terms found: {found_correct}/{len(correct_terms)}")
|
||||
print(f"Misleading terms found: {found_misleading}/{len(misleading_terms)}")
|
||||
|
||||
if found_correct > found_misleading:
|
||||
print("\n[OK] LLM correctly identified actual functionality!")
|
||||
print(" LLM ignored misleading comments and analyzed code behavior")
|
||||
elif found_misleading > found_correct:
|
||||
print("\n[X] LLM was misled by incorrect comments")
|
||||
print(" LLM trusted documentation over code analysis")
|
||||
else:
|
||||
print("\n[~] Mixed results - LLM found both correct and misleading terms")
|
||||
else:
|
||||
print("\n[X] LLM analysis failed - no results returned")
|
||||
|
||||
print("\n" + "="*80)
|
||||
Reference in New Issue
Block a user