mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-10 02:24:35 +08:00
fix: CodexLens model detection, hybrid search stability, and JSON logging
- Fix model installation detection using fastembed ONNX cache names - Add embeddings_config table for model metadata tracking - Fix hybrid search segfault by using single-threaded GPU mode - Suppress INFO logs in JSON mode to prevent error display - Add model dropdown filtering to show only installed models 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -35,8 +35,17 @@ from .output import (
|
||||
app = typer.Typer(help="CodexLens CLI — local code indexing and search.")
|
||||
|
||||
|
||||
def _configure_logging(verbose: bool) -> None:
|
||||
level = logging.DEBUG if verbose else logging.INFO
|
||||
def _configure_logging(verbose: bool, json_mode: bool = False) -> None:
|
||||
"""Configure logging level.
|
||||
|
||||
In JSON mode, suppress INFO logs to keep stderr clean for error parsing.
|
||||
Only WARNING and above are shown to avoid mixing logs with JSON output.
|
||||
"""
|
||||
if json_mode and not verbose:
|
||||
# In JSON mode, suppress INFO logs to keep stderr clean
|
||||
level = logging.WARNING
|
||||
else:
|
||||
level = logging.DEBUG if verbose else logging.INFO
|
||||
logging.basicConfig(level=level, format="%(levelname)s %(message)s")
|
||||
|
||||
|
||||
@@ -95,7 +104,7 @@ def init(
|
||||
If semantic search dependencies are installed, automatically generates embeddings
|
||||
after indexing completes. Use --no-embeddings to skip this step.
|
||||
"""
|
||||
_configure_logging(verbose)
|
||||
_configure_logging(verbose, json_mode)
|
||||
config = Config()
|
||||
languages = _parse_languages(language)
|
||||
base_path = path.expanduser().resolve()
|
||||
@@ -314,7 +323,7 @@ def search(
|
||||
# Force hybrid mode
|
||||
codexlens search "authentication" --mode hybrid
|
||||
"""
|
||||
_configure_logging(verbose)
|
||||
_configure_logging(verbose, json_mode)
|
||||
search_path = path.expanduser().resolve()
|
||||
|
||||
# Validate mode
|
||||
@@ -487,7 +496,7 @@ def symbol(
|
||||
verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."),
|
||||
) -> None:
|
||||
"""Look up symbols by name and optional kind."""
|
||||
_configure_logging(verbose)
|
||||
_configure_logging(verbose, json_mode)
|
||||
search_path = path.expanduser().resolve()
|
||||
|
||||
registry: RegistryStore | None = None
|
||||
@@ -538,7 +547,7 @@ def inspect(
|
||||
verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."),
|
||||
) -> None:
|
||||
"""Analyze a single file and display symbols."""
|
||||
_configure_logging(verbose)
|
||||
_configure_logging(verbose, json_mode)
|
||||
config = Config()
|
||||
factory = ParserFactory(config)
|
||||
|
||||
@@ -588,7 +597,7 @@ def status(
|
||||
verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."),
|
||||
) -> None:
|
||||
"""Show index status and configuration."""
|
||||
_configure_logging(verbose)
|
||||
_configure_logging(verbose, json_mode)
|
||||
|
||||
registry: RegistryStore | None = None
|
||||
try:
|
||||
@@ -648,7 +657,7 @@ def status(
|
||||
# Embedding manager not available
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to get embeddings status: {e}")
|
||||
logging.debug(f"Failed to get embeddings status: {e}")
|
||||
|
||||
stats = {
|
||||
"index_root": str(index_root),
|
||||
@@ -737,7 +746,7 @@ def projects(
|
||||
- show <path>: Show details for a specific project
|
||||
- remove <path>: Remove a project from the registry
|
||||
"""
|
||||
_configure_logging(verbose)
|
||||
_configure_logging(verbose, json_mode)
|
||||
|
||||
registry: RegistryStore | None = None
|
||||
try:
|
||||
@@ -892,7 +901,7 @@ def config(
|
||||
Config keys:
|
||||
- index_dir: Directory to store indexes (default: ~/.codexlens/indexes)
|
||||
"""
|
||||
_configure_logging(verbose)
|
||||
_configure_logging(verbose, json_mode)
|
||||
|
||||
config_file = Path.home() / ".codexlens" / "config.json"
|
||||
|
||||
@@ -1057,7 +1066,7 @@ def migrate(
|
||||
This is a safe operation that preserves all existing data.
|
||||
Progress is shown during migration.
|
||||
"""
|
||||
_configure_logging(verbose)
|
||||
_configure_logging(verbose, json_mode)
|
||||
base_path = path.expanduser().resolve()
|
||||
|
||||
registry: RegistryStore | None = None
|
||||
@@ -1183,7 +1192,7 @@ def clean(
|
||||
With path, removes that project's indexes.
|
||||
With --all, removes all indexes (use with caution).
|
||||
"""
|
||||
_configure_logging(verbose)
|
||||
_configure_logging(verbose, json_mode)
|
||||
|
||||
try:
|
||||
mapper = PathMapper()
|
||||
@@ -1329,7 +1338,7 @@ def semantic_list(
|
||||
Shows files that have LLM-generated summaries and keywords.
|
||||
Results are aggregated from all index databases in the project.
|
||||
"""
|
||||
_configure_logging(verbose)
|
||||
_configure_logging(verbose, json_mode)
|
||||
base_path = path.expanduser().resolve()
|
||||
|
||||
registry: Optional[RegistryStore] = None
|
||||
@@ -1798,7 +1807,7 @@ def embeddings_generate(
|
||||
codexlens embeddings-generate ~/.codexlens/indexes/project/_index.db # Specific index
|
||||
codexlens embeddings-generate ~/projects/my-app --model fast --force # Regenerate with fast model
|
||||
"""
|
||||
_configure_logging(verbose)
|
||||
_configure_logging(verbose, json_mode)
|
||||
|
||||
from codexlens.cli.embedding_manager import generate_embeddings, generate_embeddings_recursive
|
||||
|
||||
|
||||
@@ -279,6 +279,21 @@ def generate_embeddings(
|
||||
|
||||
try:
|
||||
with VectorStore(index_path) as vector_store:
|
||||
# Check model compatibility with existing embeddings
|
||||
if not force:
|
||||
is_compatible, warning = vector_store.check_model_compatibility(
|
||||
model_profile, embedder.model_name, embedder.embedding_dim
|
||||
)
|
||||
if not is_compatible:
|
||||
return {
|
||||
"success": False,
|
||||
"error": warning,
|
||||
}
|
||||
|
||||
# Set/update model configuration for this index
|
||||
vector_store.set_model_config(
|
||||
model_profile, embedder.model_name, embedder.embedding_dim
|
||||
)
|
||||
# Use bulk insert mode for efficient batch ANN index building
|
||||
# This defers ANN updates until end_bulk_insert() is called
|
||||
with vector_store.bulk_insert():
|
||||
|
||||
@@ -16,9 +16,11 @@ except ImportError:
|
||||
# Model profiles with metadata
|
||||
# Note: 768d is max recommended dimension for optimal performance/quality balance
|
||||
# 1024d models are available but not recommended due to higher resource usage
|
||||
# cache_name: The actual Hugging Face repo name used by fastembed for ONNX caching
|
||||
MODEL_PROFILES = {
|
||||
"fast": {
|
||||
"model_name": "BAAI/bge-small-en-v1.5",
|
||||
"cache_name": "qdrant/bge-small-en-v1.5-onnx-q", # fastembed uses ONNX version
|
||||
"dimensions": 384,
|
||||
"size_mb": 80,
|
||||
"description": "Fast, lightweight, English-optimized",
|
||||
@@ -27,6 +29,7 @@ MODEL_PROFILES = {
|
||||
},
|
||||
"base": {
|
||||
"model_name": "BAAI/bge-base-en-v1.5",
|
||||
"cache_name": "qdrant/bge-base-en-v1.5-onnx-q", # fastembed uses ONNX version
|
||||
"dimensions": 768,
|
||||
"size_mb": 220,
|
||||
"description": "General purpose, good balance of speed and quality",
|
||||
@@ -35,6 +38,7 @@ MODEL_PROFILES = {
|
||||
},
|
||||
"code": {
|
||||
"model_name": "jinaai/jina-embeddings-v2-base-code",
|
||||
"cache_name": "jinaai/jina-embeddings-v2-base-code", # Uses original name
|
||||
"dimensions": 768,
|
||||
"size_mb": 150,
|
||||
"description": "Code-optimized, best for programming languages",
|
||||
@@ -43,6 +47,7 @@ MODEL_PROFILES = {
|
||||
},
|
||||
"minilm": {
|
||||
"model_name": "sentence-transformers/all-MiniLM-L6-v2",
|
||||
"cache_name": "qdrant/all-MiniLM-L6-v2-onnx", # fastembed uses ONNX version
|
||||
"dimensions": 384,
|
||||
"size_mb": 90,
|
||||
"description": "Popular lightweight model, good quality",
|
||||
@@ -51,6 +56,7 @@ MODEL_PROFILES = {
|
||||
},
|
||||
"multilingual": {
|
||||
"model_name": "intfloat/multilingual-e5-large",
|
||||
"cache_name": "qdrant/multilingual-e5-large-onnx", # fastembed uses ONNX version
|
||||
"dimensions": 1024,
|
||||
"size_mb": 1000,
|
||||
"description": "Multilingual + code support (high resource usage)",
|
||||
@@ -59,6 +65,7 @@ MODEL_PROFILES = {
|
||||
},
|
||||
"balanced": {
|
||||
"model_name": "mixedbread-ai/mxbai-embed-large-v1",
|
||||
"cache_name": "mixedbread-ai/mxbai-embed-large-v1", # Uses original name
|
||||
"dimensions": 1024,
|
||||
"size_mb": 600,
|
||||
"description": "High accuracy, general purpose (high resource usage)",
|
||||
@@ -87,6 +94,23 @@ def get_cache_dir() -> Path:
|
||||
return cache_dir
|
||||
|
||||
|
||||
def _get_model_cache_path(cache_dir: Path, info: Dict) -> Path:
|
||||
"""Get the actual cache path for a model.
|
||||
|
||||
fastembed uses ONNX versions of models with different names than the original.
|
||||
This function returns the correct path based on the cache_name field.
|
||||
|
||||
Args:
|
||||
cache_dir: The fastembed cache directory
|
||||
info: Model profile info dictionary
|
||||
|
||||
Returns:
|
||||
Path to the model cache directory
|
||||
"""
|
||||
cache_name = info.get("cache_name", info["model_name"])
|
||||
return cache_dir / f"models--{cache_name.replace('/', '--')}"
|
||||
|
||||
|
||||
def list_models() -> Dict[str, any]:
|
||||
"""List available model profiles and their installation status.
|
||||
|
||||
@@ -106,13 +130,13 @@ def list_models() -> Dict[str, any]:
|
||||
for profile, info in MODEL_PROFILES.items():
|
||||
model_name = info["model_name"]
|
||||
|
||||
# Check if model is cached
|
||||
# Check if model is cached using the actual cache name
|
||||
installed = False
|
||||
cache_size_mb = 0
|
||||
|
||||
if cache_exists:
|
||||
# Check for model directory in cache
|
||||
model_cache_path = cache_dir / f"models--{model_name.replace('/', '--')}"
|
||||
# Check for model directory in cache using correct cache_name
|
||||
model_cache_path = _get_model_cache_path(cache_dir, info)
|
||||
if model_cache_path.exists():
|
||||
installed = True
|
||||
# Calculate cache size
|
||||
@@ -166,7 +190,8 @@ def download_model(profile: str, progress_callback: Optional[callable] = None) -
|
||||
"error": f"Unknown profile: {profile}. Available: {', '.join(MODEL_PROFILES.keys())}",
|
||||
}
|
||||
|
||||
model_name = MODEL_PROFILES[profile]["model_name"]
|
||||
info = MODEL_PROFILES[profile]
|
||||
model_name = info["model_name"]
|
||||
|
||||
try:
|
||||
# Download model by instantiating TextEmbedding
|
||||
@@ -179,9 +204,9 @@ def download_model(profile: str, progress_callback: Optional[callable] = None) -
|
||||
if progress_callback:
|
||||
progress_callback(f"Model {model_name} downloaded successfully")
|
||||
|
||||
# Get cache info
|
||||
# Get cache info using correct cache_name
|
||||
cache_dir = get_cache_dir()
|
||||
model_cache_path = cache_dir / f"models--{model_name.replace('/', '--')}"
|
||||
model_cache_path = _get_model_cache_path(cache_dir, info)
|
||||
|
||||
cache_size = 0
|
||||
if model_cache_path.exists():
|
||||
@@ -224,9 +249,10 @@ def delete_model(profile: str) -> Dict[str, any]:
|
||||
"error": f"Unknown profile: {profile}. Available: {', '.join(MODEL_PROFILES.keys())}",
|
||||
}
|
||||
|
||||
model_name = MODEL_PROFILES[profile]["model_name"]
|
||||
info = MODEL_PROFILES[profile]
|
||||
model_name = info["model_name"]
|
||||
cache_dir = get_cache_dir()
|
||||
model_cache_path = cache_dir / f"models--{model_name.replace('/', '--')}"
|
||||
model_cache_path = _get_model_cache_path(cache_dir, info)
|
||||
|
||||
if not model_cache_path.exists():
|
||||
return {
|
||||
@@ -281,9 +307,9 @@ def get_model_info(profile: str) -> Dict[str, any]:
|
||||
info = MODEL_PROFILES[profile]
|
||||
model_name = info["model_name"]
|
||||
|
||||
# Check installation status
|
||||
# Check installation status using correct cache_name
|
||||
cache_dir = get_cache_dir()
|
||||
model_cache_path = cache_dir / f"models--{model_name.replace('/', '--')}"
|
||||
model_cache_path = _get_model_cache_path(cache_dir, info)
|
||||
installed = model_cache_path.exists()
|
||||
|
||||
cache_size_mb = None
|
||||
|
||||
Reference in New Issue
Block a user