mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-10 02:24:35 +08:00
- Implement `inspect_llm_summaries.py` to display LLM-generated summaries from the semantic_chunks table in the database. - Create `show_llm_analysis.py` to demonstrate LLM analysis of misleading code examples, highlighting discrepancies between comments and actual functionality. - Develop `test_misleading_comments.py` to compare pure vector search with LLM-enhanced search, focusing on the impact of misleading or missing comments on search results. - Introduce `test_llm_enhanced_search.py` to provide a test suite for evaluating the effectiveness of LLM-enhanced vector search against pure vector search. - Ensure all new scripts are integrated with the existing codebase and follow the established coding standards.
89 lines
2.8 KiB
Python
89 lines
2.8 KiB
Python
#!/usr/bin/env python3
|
|
"""Inspect LLM-generated summaries in semantic_chunks table."""
|
|
|
|
import sqlite3
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
def inspect_summaries(db_path: Path):
|
|
"""Show LLM-generated summaries from database."""
|
|
if not db_path.exists():
|
|
print(f"Error: Database not found: {db_path}")
|
|
return
|
|
|
|
with sqlite3.connect(db_path) as conn:
|
|
conn.row_factory = sqlite3.Row
|
|
|
|
# Check if semantic_chunks table exists
|
|
cursor = conn.execute(
|
|
"SELECT name FROM sqlite_master WHERE type='table' AND name='semantic_chunks'"
|
|
)
|
|
if not cursor.fetchone():
|
|
print("No semantic_chunks table found")
|
|
return
|
|
|
|
# Get all chunks with metadata
|
|
cursor = conn.execute("""
|
|
SELECT file_path, chunk_index, content,
|
|
json_extract(metadata, '$.llm_summary') as summary,
|
|
json_extract(metadata, '$.llm_keywords') as keywords,
|
|
json_extract(metadata, '$.llm_purpose') as purpose,
|
|
json_extract(metadata, '$.strategy') as strategy
|
|
FROM semantic_chunks
|
|
ORDER BY file_path, chunk_index
|
|
""")
|
|
|
|
chunks = cursor.fetchall()
|
|
|
|
if not chunks:
|
|
print("No chunks found in database")
|
|
return
|
|
|
|
print("="*80)
|
|
print("LLM-GENERATED SUMMARIES INSPECTION")
|
|
print("="*80)
|
|
|
|
current_file = None
|
|
for chunk in chunks:
|
|
file_path = chunk['file_path']
|
|
|
|
if file_path != current_file:
|
|
print(f"\n{'='*80}")
|
|
print(f"FILE: {file_path}")
|
|
print(f"{'='*80}")
|
|
current_file = file_path
|
|
|
|
print(f"\n[Chunk {chunk['chunk_index']}]")
|
|
print(f"Strategy: {chunk['strategy']}")
|
|
|
|
if chunk['summary']:
|
|
print(f"\nLLM Summary:")
|
|
print(f" {chunk['summary']}")
|
|
|
|
if chunk['keywords']:
|
|
print(f"\nKeywords:")
|
|
print(f" {chunk['keywords']}")
|
|
|
|
if chunk['purpose']:
|
|
print(f"\nPurpose:")
|
|
print(f" {chunk['purpose']}")
|
|
|
|
# Show first 200 chars of content
|
|
content = chunk['content']
|
|
if len(content) > 200:
|
|
content = content[:200] + "..."
|
|
print(f"\nOriginal Content (first 200 chars):")
|
|
print(f" {content}")
|
|
print("-" * 80)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) < 2:
|
|
print("Usage: python inspect_llm_summaries.py <path_to_index.db>")
|
|
print("\nExample:")
|
|
print(" python inspect_llm_summaries.py ~/.codexlens/indexes/myproject/_index.db")
|
|
sys.exit(1)
|
|
|
|
db_path = Path(sys.argv[1])
|
|
inspect_summaries(db_path)
|