Add scripts for inspecting LLM summaries and testing misleading comments

- Implement `inspect_llm_summaries.py` to display LLM-generated summaries from the semantic_chunks table in the database. - Create `show_llm_analysis.py` to demonstrate LLM analysis of misleading code examples, highlighting discrepancies between comments and actual functionality. - Develop `test_misleading_comments.py` to compare pure vector search with LLM-enhanced search, focusing on the impact of misleading or missing comments on search results. - Introduce `test_llm_enhanced_search.py` to provide a test suite for evaluating the effectiveness of LLM-enhanced vector search against pure vector search. - Ensure all new scripts are integrated with the existing codebase and follow the established coding standards.
2026-02-13 02:41:50 +08:00 · 2025-12-16 20:29:28 +08:00
parent df23975a0b
commit d21066c282
14 changed files with 3170 additions and 57 deletions
--- a/codex-lens/scripts/inspect_llm_summaries.py
+++ b/codex-lens/scripts/inspect_llm_summaries.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+"""Inspect LLM-generated summaries in semantic_chunks table."""
+
+import sqlite3
+import sys
+from pathlib import Path
+
+def inspect_summaries(db_path: Path):
+    """Show LLM-generated summaries from database."""
+    if not db_path.exists():
+        print(f"Error: Database not found: {db_path}")
+        return
+
+    with sqlite3.connect(db_path) as conn:
+        conn.row_factory = sqlite3.Row
+
+        # Check if semantic_chunks table exists
+        cursor = conn.execute(
+            "SELECT name FROM sqlite_master WHERE type='table' AND name='semantic_chunks'"
+        )
+        if not cursor.fetchone():
+            print("No semantic_chunks table found")
+            return
+
+        # Get all chunks with metadata
+        cursor = conn.execute("""
+            SELECT file_path, chunk_index, content,
+                   json_extract(metadata, '$.llm_summary') as summary,
+                   json_extract(metadata, '$.llm_keywords') as keywords,
+                   json_extract(metadata, '$.llm_purpose') as purpose,
+                   json_extract(metadata, '$.strategy') as strategy
+            FROM semantic_chunks
+            ORDER BY file_path, chunk_index
+        """)
+
+        chunks = cursor.fetchall()
+
+        if not chunks:
+            print("No chunks found in database")
+            return
+
+        print("="*80)
+        print("LLM-GENERATED SUMMARIES INSPECTION")
+        print("="*80)
+
+        current_file = None
+        for chunk in chunks:
+            file_path = chunk['file_path']
+
+            if file_path != current_file:
+                print(f"\n{'='*80}")
+                print(f"FILE: {file_path}")
+                print(f"{'='*80}")
+                current_file = file_path
+
+            print(f"\n[Chunk {chunk['chunk_index']}]")
+            print(f"Strategy: {chunk['strategy']}")
+
+            if chunk['summary']:
+                print(f"\nLLM Summary:")
+                print(f"  {chunk['summary']}")
+
+            if chunk['keywords']:
+                print(f"\nKeywords:")
+                print(f"  {chunk['keywords']}")
+
+            if chunk['purpose']:
+                print(f"\nPurpose:")
+                print(f"  {chunk['purpose']}")
+
+            # Show first 200 chars of content
+            content = chunk['content']
+            if len(content) > 200:
+                content = content[:200] + "..."
+            print(f"\nOriginal Content (first 200 chars):")
+            print(f"  {content}")
+            print("-" * 80)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("Usage: python inspect_llm_summaries.py <path_to_index.db>")
+        print("\nExample:")
+        print("  python inspect_llm_summaries.py ~/.codexlens/indexes/myproject/_index.db")
+        sys.exit(1)
+
+    db_path = Path(sys.argv[1])
+    inspect_summaries(db_path)