feat: Add unified LiteLLM API management with dashboard UI and CLI integration

- Create ccw-litellm Python package with AbstractEmbedder and AbstractLLMClient interfaces - Add BaseEmbedder abstraction and factory pattern to codex-lens for pluggable backends - Implement API Settings dashboard page for provider credentials and custom endpoints - Add REST API routes for CRUD operations on providers and endpoints - Extend CLI with --model parameter for custom endpoint routing - Integrate existing context-cache for @pattern file resolution - Add provider model registry with predefined models per provider type - Include i18n translations (en/zh) for all new UI elements 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-12 02:37:45 +08:00 · 2025-12-23 20:36:32 +08:00
parent 5228581324
commit bf66b095c7
44 changed files with 4948 additions and 19 deletions
--- a/ccw-litellm/src/ccw_litellm/interfaces/init.py
+++ b/ccw-litellm/src/ccw_litellm/interfaces/init.py
@@ -0,0 +1,14 @@
+"""Abstract interfaces for ccw-litellm."""
+
+from __future__ import annotations
+
+from .embedder import AbstractEmbedder
+from .llm import AbstractLLMClient, ChatMessage, LLMResponse
+
+__all__ = [
+    "AbstractEmbedder",
+    "AbstractLLMClient",
+    "ChatMessage",
+    "LLMResponse",
+]
+
--- a/ccw-litellm/src/ccw_litellm/interfaces/embedder.py
+++ b/ccw-litellm/src/ccw_litellm/interfaces/embedder.py
@@ -0,0 +1,52 @@
+from __future__ import annotations
+
+import asyncio
+from abc import ABC, abstractmethod
+from typing import Any, Sequence
+
+import numpy as np
+from numpy.typing import NDArray
+
+
+class AbstractEmbedder(ABC):
+    """Embedding interface compatible with fastembed-style embedders.
+
+    Implementers only need to provide the synchronous `embed` method; an
+    asynchronous `aembed` wrapper is provided for convenience.
+    """
+
+    @property
+    @abstractmethod
+    def dimensions(self) -> int:
+        """Embedding vector size."""
+
+    @abstractmethod
+    def embed(
+        self,
+        texts: str | Sequence[str],
+        *,
+        batch_size: int | None = None,
+        **kwargs: Any,
+    ) -> NDArray[np.floating]:
+        """Embed one or more texts.
+
+        Returns:
+            A numpy array of shape (n_texts, dimensions).
+        """
+
+    async def aembed(
+        self,
+        texts: str | Sequence[str],
+        *,
+        batch_size: int | None = None,
+        **kwargs: Any,
+    ) -> NDArray[np.floating]:
+        """Async wrapper around `embed` using a worker thread by default."""
+
+        return await asyncio.to_thread(
+            self.embed,
+            texts,
+            batch_size=batch_size,
+            **kwargs,
+        )
+
--- a/ccw-litellm/src/ccw_litellm/interfaces/llm.py
+++ b/ccw-litellm/src/ccw_litellm/interfaces/llm.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+import asyncio
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Any, Literal, Sequence
+
+
+@dataclass(frozen=True, slots=True)
+class ChatMessage:
+    role: Literal["system", "user", "assistant", "tool"]
+    content: str
+
+
+@dataclass(frozen=True, slots=True)
+class LLMResponse:
+    content: str
+    raw: Any | None = None
+
+
+class AbstractLLMClient(ABC):
+    """LiteLLM-like client interface.
+
+    Implementers only need to provide synchronous methods; async wrappers are
+    provided via `asyncio.to_thread`.
+    """
+
+    @abstractmethod
+    def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> LLMResponse:
+        """Chat completion for a sequence of messages."""
+
+    @abstractmethod
+    def complete(self, prompt: str, **kwargs: Any) -> LLMResponse:
+        """Text completion for a prompt."""
+
+    async def achat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> LLMResponse:
+        """Async wrapper around `chat` using a worker thread by default."""
+
+        return await asyncio.to_thread(self.chat, messages, **kwargs)
+
+    async def acomplete(self, prompt: str, **kwargs: Any) -> LLMResponse:
+        """Async wrapper around `complete` using a worker thread by default."""
+
+        return await asyncio.to_thread(self.complete, prompt, **kwargs)
+