mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-05 01:50:27 +08:00
feat: 增强索引树构建逻辑,支持递归检查子目录中的可索引文件
This commit is contained in:
@@ -535,10 +535,15 @@ def generate_embeddings(
|
||||
|
||||
# skip_token_count=True: Use fast estimation (len/4) instead of expensive tiktoken
|
||||
# This significantly reduces CPU usage with minimal impact on metadata accuracy
|
||||
# Load chunk stripping config from settings
|
||||
from codexlens.config import Config
|
||||
chunk_cfg = Config.load()
|
||||
chunker = Chunker(config=ChunkConfig(
|
||||
max_chunk_size=chunk_size,
|
||||
overlap=overlap,
|
||||
skip_token_count=True
|
||||
skip_token_count=True,
|
||||
strip_comments=getattr(chunk_cfg, 'chunk_strip_comments', True),
|
||||
strip_docstrings=getattr(chunk_cfg, 'chunk_strip_docstrings', True),
|
||||
))
|
||||
|
||||
# Log embedder info with endpoint count for multi-endpoint mode
|
||||
@@ -1307,10 +1312,15 @@ def generate_dense_embeddings_centralized(
|
||||
"error": f"Invalid embedding backend: {embedding_backend}",
|
||||
}
|
||||
|
||||
# Load chunk stripping config from settings
|
||||
from codexlens.config import Config
|
||||
chunk_cfg = Config.load()
|
||||
chunker = Chunker(config=ChunkConfig(
|
||||
max_chunk_size=chunk_size,
|
||||
overlap=overlap,
|
||||
skip_token_count=True
|
||||
skip_token_count=True,
|
||||
strip_comments=getattr(chunk_cfg, 'chunk_strip_comments', True),
|
||||
strip_docstrings=getattr(chunk_cfg, 'chunk_strip_docstrings', True),
|
||||
))
|
||||
|
||||
if progress_callback:
|
||||
@@ -1319,8 +1329,7 @@ def generate_dense_embeddings_centralized(
|
||||
progress_callback(f"Using model: {embedder.model_name} ({embedder.embedding_dim} dimensions)")
|
||||
|
||||
# Calculate dynamic batch size based on model capacity
|
||||
from codexlens.config import Config
|
||||
batch_config = Config.load()
|
||||
batch_config = chunk_cfg # Reuse already loaded config
|
||||
effective_batch_size = calculate_dynamic_batch_size(batch_config, embedder)
|
||||
|
||||
if progress_callback and batch_config.api_batch_size_dynamic:
|
||||
|
||||
@@ -412,7 +412,8 @@ class IndexTreeBuilder:
|
||||
A directory is indexed if:
|
||||
1. It's not in IGNORE_DIRS
|
||||
2. It doesn't start with '.'
|
||||
3. It contains at least one supported language file
|
||||
3. It contains at least one supported language file, OR
|
||||
4. It has subdirectories that contain supported files (transitive)
|
||||
|
||||
Args:
|
||||
dir_path: Directory to check
|
||||
@@ -427,7 +428,50 @@ class IndexTreeBuilder:
|
||||
|
||||
# Check for supported files in this directory
|
||||
source_files = self._iter_source_files(dir_path, languages)
|
||||
return len(source_files) > 0
|
||||
if len(source_files) > 0:
|
||||
return True
|
||||
|
||||
# Check if any subdirectory has indexable files (transitive)
|
||||
# This handles cases like 'src' which has no direct files but has 'src/codexlens'
|
||||
for item in dir_path.iterdir():
|
||||
if not item.is_dir():
|
||||
continue
|
||||
if item.name in self.IGNORE_DIRS or item.name.startswith("."):
|
||||
continue
|
||||
# Recursively check subdirectories
|
||||
if self._has_indexable_files_recursive(item, languages):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _has_indexable_files_recursive(self, dir_path: Path, languages: List[str] = None) -> bool:
|
||||
"""Check if directory or any subdirectory has indexable files.
|
||||
|
||||
Args:
|
||||
dir_path: Directory to check
|
||||
languages: Optional language filter
|
||||
|
||||
Returns:
|
||||
True if directory tree contains indexable files
|
||||
"""
|
||||
# Check for supported files in this directory
|
||||
source_files = self._iter_source_files(dir_path, languages)
|
||||
if len(source_files) > 0:
|
||||
return True
|
||||
|
||||
# Check subdirectories
|
||||
try:
|
||||
for item in dir_path.iterdir():
|
||||
if not item.is_dir():
|
||||
continue
|
||||
if item.name in self.IGNORE_DIRS or item.name.startswith("."):
|
||||
continue
|
||||
if self._has_indexable_files_recursive(item, languages):
|
||||
return True
|
||||
except PermissionError:
|
||||
pass
|
||||
|
||||
return False
|
||||
|
||||
def _build_level_parallel(
|
||||
self,
|
||||
|
||||
Reference in New Issue
Block a user