Refactor code structure and remove redundant changes

This commit is contained in:
catlog22
2026-01-24 14:47:47 +08:00
parent cf5fecd66d
commit f2b0a5bbc9
113 changed files with 43217 additions and 235 deletions

View File

@@ -0,0 +1,61 @@
"""Base class for embedders.
Defines the interface that all embedders must implement.
"""
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Iterable
import numpy as np
class BaseEmbedder(ABC):
"""Base class for all embedders.
All embedder implementations must inherit from this class and implement
the abstract methods to ensure a consistent interface.
"""
@property
@abstractmethod
def embedding_dim(self) -> int:
"""Return embedding dimensions.
Returns:
int: Dimension of the embedding vectors.
"""
...
@property
@abstractmethod
def model_name(self) -> str:
"""Return model name.
Returns:
str: Name or identifier of the underlying model.
"""
...
@property
def max_tokens(self) -> int:
"""Return maximum token limit for embeddings.
Returns:
int: Maximum number of tokens that can be embedded at once.
Default is 8192 if not overridden by implementation.
"""
return 8192
@abstractmethod
def embed_to_numpy(self, texts: str | Iterable[str]) -> np.ndarray:
"""Embed texts to numpy array.
Args:
texts: Single text or iterable of texts to embed.
Returns:
numpy.ndarray: Array of shape (n_texts, embedding_dim) containing embeddings.
"""
...