Claude-Code-Workflow/.claude/python_script/config.yaml

# Configuration for UltraThink Path-Aware Analyzer
# Based on gemini-wrapper patterns with intelligent enhancements

# Token limits for project size classification
token_limits:
  small_project: 500000      # <500K tokens - include most files
  medium_project: 2000000    # 500K-2M tokens - smart selection
  large_project: 10000000    # >2M tokens - precise targeting
  max_files: 1000            # Maximum files to process

# File patterns to exclude (performance and relevance)
exclude_patterns:
  - "*/node_modules/*"
  - "*/.git/*"
  - "*/build/*"
  - "*/dist/*"
  - "*/.next/*"
  - "*/.nuxt/*"
  - "*/target/*"
  - "*/vendor/*"
  - "*/__pycache__/*"
  - "*.pyc"
  - "*.pyo"
  - "*.log"
  - "*.tmp"
  - "*.temp"
  - "*.history"

# File extensions grouped by category
file_extensions:
  code:
    - ".py"
    - ".js"
    - ".ts"
    - ".tsx"
    - ".jsx"
    - ".java"
    - ".cpp"
    - ".c"
    - ".h"
    - ".rs"
    - ".go"
    - ".php"
    - ".rb"
    - ".sh"
    - ".bash"
  docs:
    - ".md"
    - ".txt"
    - ".rst"
    - ".adoc"
  config:
    - ".json"
    - ".yaml"
    - ".yml"
    - ".toml"
    - ".ini"
    - ".env"
  web:
    - ".html"
    - ".css"
    - ".scss"
    - ".sass"
    - ".xml"

# Embedding/RAG configuration
embedding:
  enabled: true  # Set to true to enable RAG features
  model: "codesage/codesage-large-v2"  # CodeSage V2 for code embeddings
  cache_dir: "cache"
  similarity_threshold: 0.6  # Higher threshold for better code similarity
  max_context_length: 2048  # Increased for CodeSage V2 capabilities
  batch_size: 8  # Reduced for larger model
  trust_remote_code: true  # Required for CodeSage V2

# Context analysis settings
context_analysis:
  # Keywords that indicate specific domains/modules
  domain_keywords:
    auth: ["auth", "login", "user", "password", "jwt", "token", "session"]
    database: ["db", "database", "sql", "query", "model", "schema", "migration"]
    api: ["api", "endpoint", "route", "controller", "service", "handler"]
    frontend: ["ui", "component", "view", "template", "style", "css"]
    backend: ["server", "service", "logic", "business", "core"]
    test: ["test", "spec", "unit", "integration", "mock"]
    config: ["config", "setting", "environment", "env"]
    util: ["util", "helper", "common", "shared", "lib"]

  # Programming language indicators
  language_indicators:
    python: [".py", "python", "pip", "requirements.txt", "setup.py"]
    javascript: [".js", ".ts", "npm", "package.json", "node"]
    java: [".java", "maven", "gradle", "pom.xml"]
    go: [".go", "go.mod", "go.sum"]
    rust: [".rs", "cargo", "Cargo.toml"]

# Path matching and ranking
path_matching:
  # Scoring weights for relevance calculation
  weights:
    keyword_match: 0.4        # Direct keyword match in filename/path
    extension_match: 0.2      # File extension relevance
    directory_context: 0.2    # Directory name relevance
    file_size_penalty: 0.1    # Penalty for very large files
    recency_bonus: 0.1        # Bonus for recently modified files

  # Maximum files to return per category
  max_files_per_category: 20

  # Minimum relevance score to include file
  min_relevance_score: 0.1

# Output formatting
output:
  # How to format path patterns
  pattern_format: "@{{{path}}}"  # Results in @{path/to/file}

  # Include project documentation by default
  always_include:
    - "CLAUDE.md"
    - "**/CLAUDE.md"
    - "README.md"
    - "docs/**/*.md"

  # Maximum total files in output
  max_total_files: 50

# Analysis modes
modes:
  auto:
    description: "Fully automatic path detection"
    enabled: true
  guided:
    description: "Suggest paths for user confirmation"
    enabled: true
  pattern:
    description: "Use explicit patterns from user"
    enabled: true
  hybrid:
    description: "Combine auto-detection with user patterns"
    enabled: true

# Performance settings
performance:
  # Cache settings
  cache_enabled: true
  cache_ttl: 3600  # Cache TTL in seconds (1 hour)

  # File size limits
  max_file_size: 10485760  # 10MB max file size to analyze

  # Parallel processing
  max_workers: 4  # Number of parallel workers for file processing

# Logging configuration
logging:
  level: "INFO"  # DEBUG, INFO, WARNING, ERROR
  file: ".claude/scripts/ultrathink/ultrathink.log"
  format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"