Files
Claude-Code-Workflow/codex-lens/src/codexlens/config.py
catlog22 0fe16963cd Add comprehensive tests for tokenizer, performance benchmarks, and TreeSitter parser functionality
- Implemented unit tests for the Tokenizer class, covering various text inputs, edge cases, and fallback mechanisms.
- Created performance benchmarks comparing tiktoken and pure Python implementations for token counting.
- Developed extensive tests for TreeSitterSymbolParser across Python, JavaScript, and TypeScript, ensuring accurate symbol extraction and parsing.
- Added configuration documentation for MCP integration and custom prompts, enhancing usability and flexibility.
- Introduced a refactor script for GraphAnalyzer to streamline future improvements.
2025-12-15 14:36:09 +08:00

199 lines
6.8 KiB
Python

"""Configuration system for CodexLens."""
from __future__ import annotations
import os
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional
from .errors import ConfigError
# Workspace-local directory name
WORKSPACE_DIR_NAME = ".codexlens"
def _default_global_dir() -> Path:
"""Get global CodexLens data directory."""
env_override = os.getenv("CODEXLENS_DATA_DIR")
if env_override:
return Path(env_override).expanduser().resolve()
return (Path.home() / ".codexlens").resolve()
def find_workspace_root(start_path: Path) -> Optional[Path]:
"""Find the workspace root by looking for .codexlens directory.
Searches from start_path upward to find an existing .codexlens directory.
Returns None if not found.
"""
current = start_path.resolve()
# Search up to filesystem root
while current != current.parent:
workspace_dir = current / WORKSPACE_DIR_NAME
if workspace_dir.is_dir():
return current
current = current.parent
# Check root as well
workspace_dir = current / WORKSPACE_DIR_NAME
if workspace_dir.is_dir():
return current
return None
@dataclass
class Config:
"""Runtime configuration for CodexLens.
- data_dir: Base directory for all persistent CodexLens data.
- venv_path: Optional virtualenv used for language tooling.
- supported_languages: Language IDs and their associated file extensions.
- parsing_rules: Per-language parsing and chunking hints.
"""
data_dir: Path = field(default_factory=_default_global_dir)
venv_path: Path = field(default_factory=lambda: _default_global_dir() / "venv")
supported_languages: Dict[str, Dict[str, Any]] = field(
default_factory=lambda: {
"python": {"extensions": [".py"], "tree_sitter_language": "python"},
"javascript": {"extensions": [".js", ".jsx"], "tree_sitter_language": "javascript"},
"typescript": {"extensions": [".ts", ".tsx"], "tree_sitter_language": "typescript"},
"java": {"extensions": [".java"], "tree_sitter_language": "java"},
"go": {"extensions": [".go"], "tree_sitter_language": "go"},
"zig": {"extensions": [".zig"], "tree_sitter_language": "zig"},
"objective-c": {"extensions": [".m", ".mm"], "tree_sitter_language": "objc"},
}
)
parsing_rules: Dict[str, Dict[str, Any]] = field(
default_factory=lambda: {
"default": {
"max_chunk_chars": 4000,
"max_chunk_lines": 200,
"overlap_lines": 20,
}
}
)
llm_enabled: bool = False
llm_tool: str = "gemini"
llm_timeout_ms: int = 300000
llm_batch_size: int = 5
# Hybrid chunker configuration
hybrid_max_chunk_size: int = 2000 # Max characters per chunk before LLM refinement
hybrid_llm_refinement: bool = False # Enable LLM-based semantic boundary refinement
def __post_init__(self) -> None:
try:
self.data_dir = self.data_dir.expanduser().resolve()
self.venv_path = self.venv_path.expanduser().resolve()
self.data_dir.mkdir(parents=True, exist_ok=True)
except Exception as exc:
raise ConfigError(f"Failed to initialize data_dir at {self.data_dir}: {exc}") from exc
@property
def cache_dir(self) -> Path:
"""Directory for transient caches."""
return self.data_dir / "cache"
@property
def index_dir(self) -> Path:
"""Directory where index artifacts are stored."""
return self.data_dir / "index"
@property
def db_path(self) -> Path:
"""Default SQLite index path."""
return self.index_dir / "codexlens.db"
def ensure_runtime_dirs(self) -> None:
"""Create standard runtime directories if missing."""
for directory in (self.cache_dir, self.index_dir):
try:
directory.mkdir(parents=True, exist_ok=True)
except Exception as exc:
raise ConfigError(f"Failed to create directory {directory}: {exc}") from exc
def language_for_path(self, path: str | Path) -> str | None:
"""Infer a supported language ID from a file path."""
extension = Path(path).suffix.lower()
for language_id, spec in self.supported_languages.items():
extensions: List[str] = spec.get("extensions", [])
if extension in extensions:
return language_id
return None
def rules_for_language(self, language_id: str) -> Dict[str, Any]:
"""Get parsing rules for a specific language, falling back to defaults."""
return {**self.parsing_rules.get("default", {}), **self.parsing_rules.get(language_id, {})}
@dataclass
class WorkspaceConfig:
"""Workspace-local configuration for CodexLens.
Stores index data in project/.codexlens/ directory.
"""
workspace_root: Path
def __post_init__(self) -> None:
self.workspace_root = Path(self.workspace_root).resolve()
@property
def codexlens_dir(self) -> Path:
"""The .codexlens directory in workspace root."""
return self.workspace_root / WORKSPACE_DIR_NAME
@property
def db_path(self) -> Path:
"""SQLite index path for this workspace."""
return self.codexlens_dir / "index.db"
@property
def cache_dir(self) -> Path:
"""Cache directory for this workspace."""
return self.codexlens_dir / "cache"
def initialize(self) -> None:
"""Create the .codexlens directory structure."""
try:
self.codexlens_dir.mkdir(parents=True, exist_ok=True)
self.cache_dir.mkdir(parents=True, exist_ok=True)
# Create .gitignore to exclude cache but keep index
gitignore_path = self.codexlens_dir / ".gitignore"
if not gitignore_path.exists():
gitignore_path.write_text(
"# CodexLens workspace data\n"
"cache/\n"
"*.log\n"
)
except Exception as exc:
raise ConfigError(f"Failed to initialize workspace at {self.codexlens_dir}: {exc}") from exc
def exists(self) -> bool:
"""Check if workspace is already initialized."""
return self.codexlens_dir.is_dir() and self.db_path.exists()
@classmethod
def from_path(cls, path: Path) -> Optional["WorkspaceConfig"]:
"""Create WorkspaceConfig from a path by finding workspace root.
Returns None if no workspace found.
"""
root = find_workspace_root(path)
if root is None:
return None
return cls(workspace_root=root)
@classmethod
def create_at(cls, path: Path) -> "WorkspaceConfig":
"""Create a new workspace at the given path."""
config = cls(workspace_root=path)
config.initialize()
return config