mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-05 01:50:27 +08:00
203 lines
6.9 KiB
Python
203 lines
6.9 KiB
Python
"""Factory for creating clustering strategies.
|
|
|
|
Provides a unified interface for instantiating different clustering backends
|
|
with automatic fallback chain: hdbscan -> dbscan -> noop.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import Any, Optional
|
|
|
|
from .base import BaseClusteringStrategy, ClusteringConfig
|
|
from .noop_strategy import NoOpStrategy
|
|
|
|
|
|
def check_clustering_strategy_available(strategy: str) -> tuple[bool, str | None]:
|
|
"""Check whether a specific clustering strategy can be used.
|
|
|
|
Args:
|
|
strategy: Strategy name to check. Options:
|
|
- "hdbscan": HDBSCAN clustering (requires hdbscan package)
|
|
- "dbscan": DBSCAN clustering (requires sklearn)
|
|
- "frequency": Frequency-based clustering (always available)
|
|
- "noop": No-op strategy (always available)
|
|
|
|
Returns:
|
|
Tuple of (is_available, error_message).
|
|
error_message is None if available, otherwise contains install instructions.
|
|
"""
|
|
strategy = (strategy or "").strip().lower()
|
|
|
|
if strategy == "hdbscan":
|
|
try:
|
|
import hdbscan # noqa: F401
|
|
except ImportError:
|
|
return False, (
|
|
"hdbscan package not available. "
|
|
"Install with: pip install codexlens[clustering]"
|
|
)
|
|
return True, None
|
|
|
|
if strategy == "dbscan":
|
|
try:
|
|
from sklearn.cluster import DBSCAN # noqa: F401
|
|
except ImportError:
|
|
return False, (
|
|
"scikit-learn package not available. "
|
|
"Install with: pip install codexlens[clustering]"
|
|
)
|
|
return True, None
|
|
|
|
if strategy == "frequency":
|
|
# Frequency strategy is always available (no external deps)
|
|
return True, None
|
|
|
|
if strategy == "noop":
|
|
return True, None
|
|
|
|
return False, (
|
|
f"Invalid clustering strategy: {strategy}. "
|
|
"Must be 'hdbscan', 'dbscan', 'frequency', or 'noop'."
|
|
)
|
|
|
|
|
|
def get_strategy(
|
|
strategy: str = "hdbscan",
|
|
config: Optional[ClusteringConfig] = None,
|
|
*,
|
|
fallback: bool = True,
|
|
**kwargs: Any,
|
|
) -> BaseClusteringStrategy:
|
|
"""Factory function to create clustering strategy with fallback chain.
|
|
|
|
The fallback chain is: hdbscan -> dbscan -> frequency -> noop
|
|
|
|
Args:
|
|
strategy: Clustering strategy to use. Options:
|
|
- "hdbscan": HDBSCAN clustering (default, recommended)
|
|
- "dbscan": DBSCAN clustering (fallback)
|
|
- "frequency": Frequency-based clustering (groups by symbol occurrence)
|
|
- "noop": No-op strategy (returns all results ungrouped)
|
|
- "auto": Try hdbscan, then dbscan, then noop
|
|
config: Clustering configuration. Uses defaults if not provided.
|
|
For frequency strategy, pass FrequencyConfig for full control.
|
|
fallback: If True (default), automatically fall back to next strategy
|
|
in the chain when primary is unavailable. If False, raise ImportError
|
|
when requested strategy is unavailable.
|
|
**kwargs: Additional strategy-specific arguments.
|
|
For DBSCANStrategy: eps, eps_percentile
|
|
For FrequencyStrategy: group_by, min_frequency, etc.
|
|
|
|
Returns:
|
|
BaseClusteringStrategy: Configured clustering strategy instance.
|
|
|
|
Raises:
|
|
ValueError: If strategy is not recognized.
|
|
ImportError: If required dependencies are not installed and fallback=False.
|
|
|
|
Example:
|
|
>>> from codexlens.search.clustering import get_strategy, ClusteringConfig
|
|
>>> config = ClusteringConfig(min_cluster_size=3)
|
|
>>> # Auto-select best available strategy
|
|
>>> strategy = get_strategy("auto", config)
|
|
>>> # Explicitly use HDBSCAN (will fall back if unavailable)
|
|
>>> strategy = get_strategy("hdbscan", config)
|
|
>>> # Use frequency-based strategy
|
|
>>> from codexlens.search.clustering import FrequencyConfig
|
|
>>> freq_config = FrequencyConfig(min_frequency=2, group_by="symbol")
|
|
>>> strategy = get_strategy("frequency", freq_config)
|
|
"""
|
|
strategy = (strategy or "").strip().lower()
|
|
|
|
# Handle "auto" - try strategies in order
|
|
if strategy == "auto":
|
|
return _get_best_available_strategy(config, **kwargs)
|
|
|
|
if strategy == "hdbscan":
|
|
ok, err = check_clustering_strategy_available("hdbscan")
|
|
if ok:
|
|
from .hdbscan_strategy import HDBSCANStrategy
|
|
return HDBSCANStrategy(config)
|
|
|
|
if fallback:
|
|
# Try dbscan fallback
|
|
ok_dbscan, _ = check_clustering_strategy_available("dbscan")
|
|
if ok_dbscan:
|
|
from .dbscan_strategy import DBSCANStrategy
|
|
return DBSCANStrategy(config, **kwargs)
|
|
# Final fallback to noop
|
|
return NoOpStrategy(config)
|
|
|
|
raise ImportError(err)
|
|
|
|
if strategy == "dbscan":
|
|
ok, err = check_clustering_strategy_available("dbscan")
|
|
if ok:
|
|
from .dbscan_strategy import DBSCANStrategy
|
|
return DBSCANStrategy(config, **kwargs)
|
|
|
|
if fallback:
|
|
# Fallback to noop
|
|
return NoOpStrategy(config)
|
|
|
|
raise ImportError(err)
|
|
|
|
if strategy == "frequency":
|
|
from .frequency_strategy import FrequencyStrategy, FrequencyConfig
|
|
# If config is ClusteringConfig but not FrequencyConfig, create default FrequencyConfig
|
|
if config is None or not isinstance(config, FrequencyConfig):
|
|
freq_config = FrequencyConfig(**kwargs) if kwargs else FrequencyConfig()
|
|
else:
|
|
freq_config = config
|
|
return FrequencyStrategy(freq_config)
|
|
|
|
if strategy == "noop":
|
|
return NoOpStrategy(config)
|
|
|
|
raise ValueError(
|
|
f"Unknown clustering strategy: {strategy}. "
|
|
"Supported strategies: 'hdbscan', 'dbscan', 'frequency', 'noop', 'auto'"
|
|
)
|
|
|
|
|
|
def _get_best_available_strategy(
|
|
config: Optional[ClusteringConfig] = None,
|
|
**kwargs: Any,
|
|
) -> BaseClusteringStrategy:
|
|
"""Get the best available clustering strategy.
|
|
|
|
Tries strategies in order: hdbscan -> dbscan -> noop
|
|
|
|
Args:
|
|
config: Clustering configuration.
|
|
**kwargs: Additional strategy-specific arguments.
|
|
|
|
Returns:
|
|
Best available clustering strategy instance.
|
|
"""
|
|
# Try HDBSCAN first
|
|
ok, _ = check_clustering_strategy_available("hdbscan")
|
|
if ok:
|
|
from .hdbscan_strategy import HDBSCANStrategy
|
|
return HDBSCANStrategy(config)
|
|
|
|
# Try DBSCAN second
|
|
ok, _ = check_clustering_strategy_available("dbscan")
|
|
if ok:
|
|
from .dbscan_strategy import DBSCANStrategy
|
|
return DBSCANStrategy(config, **kwargs)
|
|
|
|
# Fallback to NoOp
|
|
return NoOpStrategy(config)
|
|
|
|
|
|
# Alias for backward compatibility
|
|
ClusteringStrategyFactory = type(
|
|
"ClusteringStrategyFactory",
|
|
(),
|
|
{
|
|
"get_strategy": staticmethod(get_strategy),
|
|
"check_available": staticmethod(check_clustering_strategy_available),
|
|
},
|
|
)
|