mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-05 01:50:27 +08:00
Previously, embeddings were only generated for root directory files (1.6% coverage, 5/303 files). This fix implements recursive processing across all subdirectory indexes, achieving 100% coverage with 2,042 semantic chunks across all 303 files in 26 index databases. Key improvements: 1. **Recursive embeddings generation** (embedding_manager.py): - Add generate_embeddings_recursive() to process all _index.db files in directory tree - Add get_embeddings_status() for comprehensive coverage statistics - Add discover_all_index_dbs() helper for recursive file discovery 2. **Enhanced CLI commands** (commands.py): - embeddings-generate: Add --recursive flag for full project coverage - init: Use recursive generation by default for complete indexing - status: Display embeddings coverage statistics with 50% threshold 3. **Smart search routing improvements** (smart-search.ts): - Add 50% embeddings coverage threshold for hybrid mode routing - Auto-fallback to exact mode when coverage insufficient - Strip ANSI color codes from JSON output for correct parsing - Add embeddings_coverage_percent to IndexStatus and SearchMetadata - Provide clear warnings with actionable suggestions 4. **Documentation and analysis**: - Add SMART_SEARCH_ANALYSIS.md with initial investigation - Add SMART_SEARCH_CORRECTED_ANALYSIS.md revealing true extent of issue - Add EMBEDDINGS_FIX_SUMMARY.md with complete fix summary - Add check_embeddings.py script for coverage verification Results: - Coverage improved from 1.6% (5/303 files) to 100% (303/303 files) - 62.5x increase - Semantic chunks increased from 10 to 2,042 - 204x increase - All 26 subdirectory indexes now have embeddings vs just 1 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
48 lines
1.6 KiB
Python
48 lines
1.6 KiB
Python
import sqlite3
|
|
import os
|
|
|
|
# Find all _index.db files
|
|
root_dir = r'C:\Users\dyw\.codexlens\indexes\D\Claude_dms3\ccw'
|
|
index_files = []
|
|
for dirpath, dirnames, filenames in os.walk(root_dir):
|
|
if '_index.db' in filenames:
|
|
index_files.append(os.path.join(dirpath, '_index.db'))
|
|
|
|
print(f'Found {len(index_files)} index databases\n')
|
|
|
|
total_files = 0
|
|
total_chunks = 0
|
|
dirs_with_chunks = 0
|
|
|
|
for db_path in sorted(index_files):
|
|
rel_path = db_path.replace(r'C:\Users\dyw\.codexlens\indexes\D\Claude_dms3\ccw\\', '')
|
|
conn = sqlite3.connect(db_path)
|
|
|
|
try:
|
|
cursor = conn.execute('SELECT COUNT(*) FROM files')
|
|
file_count = cursor.fetchone()[0]
|
|
total_files += file_count
|
|
|
|
try:
|
|
cursor = conn.execute('SELECT COUNT(*) FROM semantic_chunks')
|
|
chunk_count = cursor.fetchone()[0]
|
|
total_chunks += chunk_count
|
|
|
|
if chunk_count > 0:
|
|
dirs_with_chunks += 1
|
|
print(f'[+] {rel_path:<40} Files: {file_count:3d} Chunks: {chunk_count:3d}')
|
|
else:
|
|
print(f'[ ] {rel_path:<40} Files: {file_count:3d} (no chunks)')
|
|
except sqlite3.OperationalError:
|
|
print(f'[ ] {rel_path:<40} Files: {file_count:3d} (no semantic_chunks table)')
|
|
except Exception as e:
|
|
print(f'[!] {rel_path:<40} Error: {e}')
|
|
finally:
|
|
conn.close()
|
|
|
|
print(f'\n=== Summary ===')
|
|
print(f'Total index databases: {len(index_files)}')
|
|
print(f'Directories with embeddings: {dirs_with_chunks}')
|
|
print(f'Total files indexed: {total_files}')
|
|
print(f'Total semantic chunks: {total_chunks}')
|