chore: move ccw-skill-hub to standalone repository

Migrated ccw-skill-hub to D:/ccw-skill-hub as independent git project. Removed nested git repos (ccw/frontend/ccw-skill-hub, skill-hub-repo, skill-hub-temp).
2026-02-28 09:23:08 +08:00 · 2026-02-24 11:57:26 +08:00
parent 6f0bbe84ea
commit 61e313a0c1
35 changed files with 3189 additions and 362 deletions
--- a/codex-lens/src/.gitignore
+++ b/codex-lens/src/.gitignore
@@ -0,0 +1 @@
+.ace-tool/
--- a/codex-lens/src/codexlens/cli/embedding_manager.py
+++ b/codex-lens/src/codexlens/cli/embedding_manager.py
@@ -1,4 +1,42 @@
-"""Embedding Manager - Manage semantic embeddings for code indexes."""
+"""Embedding Manager - Manage semantic embeddings for code indexes.
+
+This module provides functions for generating and managing semantic embeddings
+for code indexes, supporting both fastembed and litellm backends.
+
+Example Usage:
+    Generate embeddings for a single index:
+
+    >>> from pathlib import Path
+    >>> from codexlens.cli.embedding_manager import generate_embeddings
+    >>> result = generate_embeddings(
+    ...     index_path=Path("path/to/_index.db"),
+    ...     force=True
+    ... )
+    >>> if result["success"]:
+    ...     print(f"Generated {result['total_chunks_created']} embeddings")
+
+    Generate embeddings for an entire project with centralized index:
+
+    >>> from codexlens.cli.embedding_manager import generate_dense_embeddings_centralized
+    >>> result = generate_dense_embeddings_centralized(
+    ...     index_root=Path("path/to/project"),
+    ...     force=True,
+    ...     progress_callback=lambda msg: print(msg)
+    ... )
+
+    Check if embeddings exist:
+
+    >>> from codexlens.cli.embedding_manager import check_index_embeddings
+    >>> status = check_index_embeddings(Path("path/to/_index.db"))
+    >>> print(status["result"]["has_embeddings"])
+
+Backward Compatibility:
+    The deprecated `discover_all_index_dbs()` function is maintained for compatibility.
+    `generate_embeddings_recursive()` is deprecated but functional; use
+    `generate_dense_embeddings_centralized()` instead.
+    The `EMBEDDING_BATCH_SIZE` constant is kept as a reference but actual batch size
+    is calculated dynamically via `calculate_dynamic_batch_size()`.
+"""

 import gc
 import json
@@ -53,11 +91,11 @@ def calculate_dynamic_batch_size(config, embedder) -> int:
    - Utilization factor (default 80% to leave headroom)

    Args:
-        config: Config object with api_batch_size_* settings
-        embedder: Embedding model object with max_tokens property
+        config: Config object with api_batch_size_* settings.
+        embedder: Embedding model object with max_tokens property.

    Returns:
-        Calculated batch size, clamped to [1, api_batch_size_max]
+        int: Calculated batch size, clamped to [1, api_batch_size_max].
    """
    # If dynamic calculation is disabled, return static value
    if not getattr(config, 'api_batch_size_dynamic', False):
@@ -147,8 +185,12 @@ def _cleanup_fastembed_resources() -> None:
    try:
        from codexlens.semantic.embedder import clear_embedder_cache
        clear_embedder_cache()
-    except Exception:
+    except (ImportError, AttributeError):
+        # Expected when semantic module unavailable or cache function doesn't exist
        pass
+    except Exception as exc:
+        # Log unexpected errors but don't fail cleanup
+        logger.debug(f"Unexpected error during fastembed cleanup: {exc}")


 def _generate_chunks_from_cursor(
@@ -201,9 +243,18 @@ def _generate_chunks_from_cursor(
                    total_files += 1
                    for chunk in chunks:
                        yield (chunk, file_path)
+            except (OSError, UnicodeDecodeError) as e:
+                # File access or encoding errors
+                logger.error(f"Failed to read file {file_path}: {e}")
+                failed_files.append((file_path, f"File read error: {e}"))
+            except ValueError as e:
+                # Chunking configuration errors
+                logger.error(f"Chunking config error for {file_path}: {e}")
+                failed_files.append((file_path, f"Chunking error: {e}"))
            except Exception as e:
-                logger.error(f"Failed to chunk {file_path}: {e}")
-                failed_files.append((file_path, str(e)))
+                # Other unexpected errors
+                logger.error(f"Unexpected error processing {file_path}: {e}")
+                failed_files.append((file_path, f"Unexpected error: {e}"))


 def _create_token_aware_batches(
@@ -371,8 +422,153 @@ def _get_embedding_defaults() -> tuple[str, str, bool, List, str, float]:
            config.embedding_strategy,
            config.embedding_cooldown,
        )
-    except Exception:
+    except (ImportError, AttributeError, OSError, ValueError) as exc:
+        # Config not available or malformed - use defaults
+        logger.debug(f"Using default embedding config (config load failed): {exc}")
        return "fastembed", "code", True, [], "latency_aware", 60.0
+    except Exception as exc:
+        # Unexpected error - still use defaults but log
+        logger.warning(f"Unexpected error loading embedding config: {exc}")
+        return "fastembed", "code", True, [], "latency_aware", 60.0
+
+
+def _apply_embedding_config_defaults(
+    embedding_backend: Optional[str],
+    model_profile: Optional[str],
+    use_gpu: Optional[bool],
+    endpoints: Optional[List],
+    strategy: Optional[str],
+    cooldown: Optional[float],
+) -> tuple[str, str, bool, List, str, float]:
+    """Apply config defaults to embedding parameters.
+
+    This helper function reduces code duplication across embedding generation
+    functions by centralizing the default value application logic.
+
+    Args:
+        embedding_backend: Embedding backend (fastembed/litellm) or None for default
+        model_profile: Model profile/name or None for default
+        use_gpu: GPU flag or None for default
+        endpoints: API endpoints list or None for default
+        strategy: Selection strategy or None for default
+        cooldown: Cooldown seconds or None for default
+
+    Returns:
+        Tuple of (backend, model, use_gpu, endpoints, strategy, cooldown) with
+        defaults applied where None was passed.
+    """
+    (default_backend, default_model, default_gpu,
+     default_endpoints, default_strategy, default_cooldown) = _get_embedding_defaults()
+
+    backend = embedding_backend if embedding_backend is not None else default_backend
+    model = model_profile if model_profile is not None else default_model
+    gpu = use_gpu if use_gpu is not None else default_gpu
+    eps = endpoints if endpoints is not None else default_endpoints
+    strat = strategy if strategy is not None else default_strategy
+    cool = cooldown if cooldown is not None else default_cooldown
+
+    return backend, model, gpu, eps, strat, cool
+
+
+def _calculate_max_workers(
+    embedding_backend: str,
+    endpoints: Optional[List],
+    max_workers: Optional[int],
+) -> int:
+    """Calculate optimal max_workers based on backend and endpoint count.
+
+    Args:
+        embedding_backend: The embedding backend being used
+        endpoints: List of API endpoints (for litellm multi-endpoint mode)
+        max_workers: Explicitly specified max_workers or None for auto-calculation
+
+    Returns:
+        Calculated or specified max_workers value
+    """
+    if max_workers is not None:
+        return max_workers
+
+    endpoint_count = len(endpoints) if endpoints else 1
+
+    # Set dynamic max_workers default based on backend type and endpoint count
+    # - FastEmbed: CPU-bound, sequential is optimal (1 worker)
+    # - LiteLLM single endpoint: 4 workers default
+    # - LiteLLM multi-endpoint: workers = endpoint_count * 2 (to saturate all APIs)
+    if embedding_backend == "litellm":
+        if endpoint_count > 1:
+            return endpoint_count * 2  # No cap, scale with endpoints
+        else:
+            return 4
+    else:
+        return 1
+
+
+def _initialize_embedder_and_chunker(
+    embedding_backend: str,
+    model_profile: str,
+    use_gpu: bool,
+    endpoints: Optional[List],
+    strategy: str,
+    cooldown: float,
+    chunk_size: int,
+    overlap: int,
+) -> tuple:
+    """Initialize embedder and chunker for embedding generation.
+
+    This helper function reduces code duplication by centralizing embedder
+    and chunker initialization logic.
+
+    Args:
+        embedding_backend: The embedding backend (fastembed/litellm)
+        model_profile: Model profile or name
+        use_gpu: Whether to use GPU acceleration
+        endpoints: Optional API endpoints for load balancing
+        strategy: Selection strategy for multi-endpoint mode
+        cooldown: Cooldown seconds for rate-limited endpoints
+        chunk_size: Maximum chunk size in characters
+        overlap: Overlap size in characters
+
+    Returns:
+        Tuple of (embedder, chunker, endpoint_count)
+
+    Raises:
+        ValueError: If embedding_backend is invalid
+    """
+    from codexlens.semantic.factory import get_embedder as get_embedder_factory
+    from codexlens.semantic.chunker import Chunker, ChunkConfig
+    from codexlens.config import Config
+
+    # Initialize embedder using factory (supports fastembed, litellm, and rotational)
+    # For fastembed: model_profile is a profile name (fast/code/multilingual/balanced)
+    # For litellm: model_profile is a model name (e.g., qwen3-embedding)
+    # For multi-endpoint: endpoints list enables load balancing
+    if embedding_backend == "fastembed":
+        embedder = get_embedder_factory(backend="fastembed", profile=model_profile, use_gpu=use_gpu)
+    elif embedding_backend == "litellm":
+        embedder = get_embedder_factory(
+            backend="litellm",
+            model=model_profile,
+            endpoints=endpoints if endpoints else None,
+            strategy=strategy,
+            cooldown=cooldown,
+        )
+    else:
+        raise ValueError(f"Invalid embedding backend: {embedding_backend}. Must be 'fastembed' or 'litellm'.")
+
+    # skip_token_count=True: Use fast estimation (len/4) instead of expensive tiktoken
+    # This significantly reduces CPU usage with minimal impact on metadata accuracy
+    # Load chunk stripping config from settings
+    chunk_cfg = Config.load()
+    chunker = Chunker(config=ChunkConfig(
+        max_chunk_size=chunk_size,
+        overlap=overlap,
+        skip_token_count=True,
+        strip_comments=getattr(chunk_cfg, 'chunk_strip_comments', True),
+        strip_docstrings=getattr(chunk_cfg, 'chunk_strip_docstrings', True),
+    ))
+
+    endpoint_count = len(endpoints) if endpoints else 1
+    return embedder, chunker, endpoint_count


 def generate_embeddings(
@@ -397,16 +593,16 @@ def generate_embeddings(
    LiteLLM backend to improve throughput.

    Args:
-        index_path: Path to _index.db file
+        index_path: Path to _index.db file.
        embedding_backend: Embedding backend to use (fastembed or litellm).
                          Defaults to config setting.
        model_profile: Model profile for fastembed (fast, code, multilingual, balanced)
                      or model name for litellm (e.g., qwen3-embedding).
                      Defaults to config setting.
-        force: If True, regenerate even if embeddings exist
-        chunk_size: Maximum chunk size in characters
-        overlap: Overlap size in characters for sliding window chunking (default: 200)
-        progress_callback: Optional callback for progress updates
+        force: If True, regenerate even if embeddings exist.
+        chunk_size: Maximum chunk size in characters.
+        overlap: Overlap size in characters for sliding window chunking (default: 200).
+        progress_callback: Optional callback for progress updates.
        use_gpu: Whether to use GPU acceleration (fastembed only).
                Defaults to config setting.
        max_tokens_per_batch: Maximum tokens per batch for token-aware batching.
@@ -420,40 +616,22 @@ def generate_embeddings(
        cooldown: Default cooldown seconds for rate-limited endpoints.

    Returns:
-        Result dictionary with generation statistics
+        Dict[str, any]: Result dictionary with generation statistics.
+            Contains keys: success, error (if failed), files_processed,
+            total_chunks_created, execution_time, etc.
+
+    Raises:
+        ValueError: If embedding_backend is invalid.
+        ImportError: If semantic module is not available.
    """
-    # Get defaults from config if not specified
-    (default_backend, default_model, default_gpu,
-     default_endpoints, default_strategy, default_cooldown) = _get_embedding_defaults()
+    # Apply config defaults
+    embedding_backend, model_profile, use_gpu, endpoints, strategy, cooldown = \
+        _apply_embedding_config_defaults(
+            embedding_backend, model_profile, use_gpu, endpoints, strategy, cooldown
+        )

-    if embedding_backend is None:
-        embedding_backend = default_backend
-    if model_profile is None:
-        model_profile = default_model
-    if use_gpu is None:
-        use_gpu = default_gpu
-    if endpoints is None:
-        endpoints = default_endpoints
-    if strategy is None:
-        strategy = default_strategy
-    if cooldown is None:
-        cooldown = default_cooldown
-
-    # Calculate endpoint count for worker scaling
-    endpoint_count = len(endpoints) if endpoints else 1
-
-    # Set dynamic max_workers default based on backend type and endpoint count
-    # - FastEmbed: CPU-bound, sequential is optimal (1 worker)
-    # - LiteLLM single endpoint: 4 workers default
-    # - LiteLLM multi-endpoint: workers = endpoint_count * 2 (to saturate all APIs)
-    if max_workers is None:
-        if embedding_backend == "litellm":
-            if endpoint_count > 1:
-                max_workers = endpoint_count * 2  # No cap, scale with endpoints
-            else:
-                max_workers = 4
-        else:
-            max_workers = 1
+    # Calculate max_workers
+    max_workers = _calculate_max_workers(embedding_backend, endpoints, max_workers)

    backend_available, backend_error = is_embedding_backend_available(embedding_backend)
    if not backend_available:
@@ -487,51 +665,23 @@ def generate_embeddings(
            with sqlite3.connect(index_path) as conn:
                conn.execute("DELETE FROM semantic_chunks")
                conn.commit()
+        except sqlite3.DatabaseError as e:
+            return {
+                "success": False,
+                "error": f"Database error clearing chunks: {str(e)}",
+            }
        except Exception as e:
            return {
                "success": False,
                "error": f"Failed to clear existing chunks: {str(e)}",
            }

-    # Initialize components
+    # Initialize embedder and chunker using helper
    try:
-        # Import factory function to support both backends
-        from codexlens.semantic.factory import get_embedder as get_embedder_factory
-        from codexlens.semantic.vector_store import VectorStore
-        from codexlens.semantic.chunker import Chunker, ChunkConfig
-
-        # Initialize embedder using factory (supports fastembed, litellm, and rotational)
-        # For fastembed: model_profile is a profile name (fast/code/multilingual/balanced)
-        # For litellm: model_profile is a model name (e.g., qwen3-embedding)
-        # For multi-endpoint: endpoints list enables load balancing
-        if embedding_backend == "fastembed":
-            embedder = get_embedder_factory(backend="fastembed", profile=model_profile, use_gpu=use_gpu)
-        elif embedding_backend == "litellm":
-            embedder = get_embedder_factory(
-                backend="litellm",
-                model=model_profile,
-                endpoints=endpoints if endpoints else None,
-                strategy=strategy,
-                cooldown=cooldown,
-            )
-        else:
-            return {
-                "success": False,
-                "error": f"Invalid embedding backend: {embedding_backend}. Must be 'fastembed' or 'litellm'.",
-            }
-
-        # skip_token_count=True: Use fast estimation (len/4) instead of expensive tiktoken
-        # This significantly reduces CPU usage with minimal impact on metadata accuracy
-        # Load chunk stripping config from settings
-        from codexlens.config import Config
-        chunk_cfg = Config.load()
-        chunker = Chunker(config=ChunkConfig(
-            max_chunk_size=chunk_size,
-            overlap=overlap,
-            skip_token_count=True,
-            strip_comments=getattr(chunk_cfg, 'chunk_strip_comments', True),
-            strip_docstrings=getattr(chunk_cfg, 'chunk_strip_docstrings', True),
-        ))
+        embedder, chunker, endpoint_count = _initialize_embedder_and_chunker(
+            embedding_backend, model_profile, use_gpu, endpoints, strategy, cooldown,
+            chunk_size, overlap
+        )

        # Log embedder info with endpoint count for multi-endpoint mode
        if progress_callback:
@@ -547,10 +697,17 @@ def generate_embeddings(
        if progress_callback and batch_config.api_batch_size_dynamic:
            progress_callback(f"Dynamic batch size: {effective_batch_size} (model max_tokens={getattr(embedder, 'max_tokens', 8192)})")

-    except Exception as e:
+    except (ImportError, ValueError) as e:
+        # Missing dependency or invalid configuration
        return {
            "success": False,
-            "error": f"Failed to initialize components: {str(e)}",
+            "error": f"Failed to initialize embedding components: {str(e)}",
+        }
+    except Exception as e:
+        # Other unexpected errors
+        return {
+            "success": False,
+            "error": f"Unexpected error initializing components: {str(e)}",
        }

    # --- STREAMING PROCESSING ---
@@ -814,8 +971,8 @@ def generate_embeddings(
        try:
            _cleanup_fastembed_resources()
            gc.collect()
-        except Exception:
-            pass
+        except Exception as cleanup_exc:
+            logger.debug(f"Cleanup error during exception handling: {cleanup_exc}")
        return {"success": False, "error": f"Failed to read or process files: {str(e)}"}

    elapsed_time = time.time() - start_time
@@ -825,8 +982,8 @@ def generate_embeddings(
    try:
        _cleanup_fastembed_resources()
        gc.collect()
-    except Exception:
-        pass
+    except Exception as cleanup_exc:
+        logger.debug(f"Cleanup error during finalization: {cleanup_exc}")

    return {
        "success": True,
@@ -922,7 +1079,8 @@ def build_centralized_binary_vectors_from_existing(
                        }

                # We count per-dim later after selecting a target dim.
-        except Exception:
+        except (sqlite3.DatabaseError, ValueError, TypeError):
+            # Skip corrupted or malformed indexes
            continue

    if not dims_seen:
@@ -971,7 +1129,8 @@ def build_centralized_binary_vectors_from_existing(
                    "SELECT COUNT(*) FROM semantic_chunks WHERE embedding IS NOT NULL AND length(embedding) > 0"
                ).fetchone()
                total_chunks += int(row[0] if row else 0)
-        except Exception:
+        except (sqlite3.DatabaseError, ValueError, TypeError):
+            # Skip corrupted or malformed indexes
            continue

    if not total_chunks:
@@ -987,7 +1146,7 @@ def build_centralized_binary_vectors_from_existing(
    # Prepare output files / DB.
    try:
        import numpy as np
-    except Exception as exc:
+    except ImportError as exc:
        return {"success": False, "error": f"numpy required to build binary vectors: {exc}"}

    store = VectorMetadataStore(vectors_meta_path)
@@ -1243,35 +1402,14 @@ def generate_embeddings_recursive(
        stacklevel=2
    )

-    # Get defaults from config if not specified
-    (default_backend, default_model, default_gpu,
-     default_endpoints, default_strategy, default_cooldown) = _get_embedding_defaults()
+    # Apply config defaults
+    embedding_backend, model_profile, use_gpu, endpoints, strategy, cooldown = \
+        _apply_embedding_config_defaults(
+            embedding_backend, model_profile, use_gpu, endpoints, strategy, cooldown
+        )

-    if embedding_backend is None:
-        embedding_backend = default_backend
-    if model_profile is None:
-        model_profile = default_model
-    if use_gpu is None:
-        use_gpu = default_gpu
-    if endpoints is None:
-        endpoints = default_endpoints
-    if strategy is None:
-        strategy = default_strategy
-    if cooldown is None:
-        cooldown = default_cooldown
-
-    # Calculate endpoint count for worker scaling
-    endpoint_count = len(endpoints) if endpoints else 1
-
-    # Set dynamic max_workers default based on backend type and endpoint count
-    if max_workers is None:
-        if embedding_backend == "litellm":
-            if endpoint_count > 1:
-                max_workers = endpoint_count * 2  # No cap, scale with endpoints
-            else:
-                max_workers = 4
-        else:
-            max_workers = 1
+    # Calculate max_workers
+    max_workers = _calculate_max_workers(embedding_backend, endpoints, max_workers)

    # Discover all _index.db files (using internal helper to avoid double deprecation warning)
    index_files = _discover_index_dbs_internal(index_root)
@@ -1401,34 +1539,14 @@ def generate_dense_embeddings_centralized(
    """
    from codexlens.config import VECTORS_HNSW_NAME

-    # Get defaults from config if not specified
-    (default_backend, default_model, default_gpu,
-     default_endpoints, default_strategy, default_cooldown) = _get_embedding_defaults()
+    # Apply config defaults
+    embedding_backend, model_profile, use_gpu, endpoints, strategy, cooldown = \
+        _apply_embedding_config_defaults(
+            embedding_backend, model_profile, use_gpu, endpoints, strategy, cooldown
+        )

-    if embedding_backend is None:
-        embedding_backend = default_backend
-    if model_profile is None:
-        model_profile = default_model
-    if use_gpu is None:
-        use_gpu = default_gpu
-    if endpoints is None:
-        endpoints = default_endpoints
-    if strategy is None:
-        strategy = default_strategy
-    if cooldown is None:
-        cooldown = default_cooldown
-
-    # Calculate endpoint count for worker scaling
-    endpoint_count = len(endpoints) if endpoints else 1
-
-    if max_workers is None:
-        if embedding_backend == "litellm":
-            if endpoint_count > 1:
-                max_workers = endpoint_count * 2
-            else:
-                max_workers = 4
-        else:
-            max_workers = 1
+    # Calculate max_workers
+    max_workers = _calculate_max_workers(embedding_backend, endpoints, max_workers)

    backend_available, backend_error = is_embedding_backend_available(embedding_backend)
    if not backend_available:
@@ -1470,38 +1588,18 @@ def generate_dense_embeddings_centralized(
            "error": f"Centralized vector index already exists at {central_hnsw_path}. Use --force to regenerate.",
        }

-    # Initialize embedder
+    # Initialize embedder and chunker using helper
    try:
-        from codexlens.semantic.factory import get_embedder as get_embedder_factory
-        from codexlens.semantic.chunker import Chunker, ChunkConfig
        from codexlens.semantic.ann_index import ANNIndex

-        if embedding_backend == "fastembed":
-            embedder = get_embedder_factory(backend="fastembed", profile=model_profile, use_gpu=use_gpu)
-        elif embedding_backend == "litellm":
-            embedder = get_embedder_factory(
-                backend="litellm",
-                model=model_profile,
-                endpoints=endpoints if endpoints else None,
-                strategy=strategy,
-                cooldown=cooldown,
-            )
-        else:
-            return {
-                "success": False,
-                "error": f"Invalid embedding backend: {embedding_backend}",
-            }
+        embedder, chunker, endpoint_count = _initialize_embedder_and_chunker(
+            embedding_backend, model_profile, use_gpu, endpoints, strategy, cooldown,
+            chunk_size, overlap
+        )

-        # Load chunk stripping config from settings
+        # Load chunk stripping config for batch size calculation
        from codexlens.config import Config
-        chunk_cfg = Config.load()
-        chunker = Chunker(config=ChunkConfig(
-            max_chunk_size=chunk_size,
-            overlap=overlap,
-            skip_token_count=True,
-            strip_comments=getattr(chunk_cfg, 'chunk_strip_comments', True),
-            strip_docstrings=getattr(chunk_cfg, 'chunk_strip_docstrings', True),
-        ))
+        batch_config = Config.load()

        if progress_callback:
            if endpoint_count > 1:
@@ -1509,7 +1607,6 @@ def generate_dense_embeddings_centralized(
            progress_callback(f"Using model: {embedder.model_name} ({embedder.embedding_dim} dimensions)")

        # Calculate dynamic batch size based on model capacity
-        batch_config = chunk_cfg  # Reuse already loaded config
        effective_batch_size = calculate_dynamic_batch_size(batch_config, embedder)

        if progress_callback and batch_config.api_batch_size_dynamic:
--- a/codex-lens/src/codexlens/env_config.py
+++ b/codex-lens/src/codexlens/env_config.py
@@ -120,8 +120,12 @@ def load_env_file(env_path: Path) -> Dict[str, str]:
            if result:
                key, value = result
                env_vars[key] = value
-    except Exception as exc:
+    except (OSError, UnicodeDecodeError) as exc:
+        # File access errors or encoding issues are expected and logged
        log.warning("Failed to load .env file %s: %s", env_path, exc)
+    except Exception as exc:
+        # Other unexpected errors are also logged but indicate a code issue
+        log.warning("Unexpected error loading .env file %s: %s", env_path, exc)
    
    return env_vars