Refactor code structure and remove redundant changes

2026-02-13 02:41:50 +08:00 · 2026-01-24 14:47:47 +08:00
parent cf5fecd66d
commit f2b0a5bbc9
113 changed files with 43217 additions and 235 deletions
--- a/codex-lens/build/lib/codexlens/entities.py
+++ b/codex-lens/build/lib/codexlens/entities.py
@@ -0,0 +1,128 @@
+"""Pydantic entity models for CodexLens."""
+
+from __future__ import annotations
+
+import math
+from enum import Enum
+from typing import Any, Dict, List, Optional, Tuple
+
+from pydantic import BaseModel, Field, field_validator
+
+
+class Symbol(BaseModel):
+    """A code symbol discovered in a file."""
+
+    name: str = Field(..., min_length=1)
+    kind: str = Field(..., min_length=1)
+    range: Tuple[int, int] = Field(..., description="(start_line, end_line), 1-based inclusive")
+    file: Optional[str] = Field(default=None, description="Full path to the file containing this symbol")
+
+    @field_validator("range")
+    @classmethod
+    def validate_range(cls, value: Tuple[int, int]) -> Tuple[int, int]:
+        if len(value) != 2:
+            raise ValueError("range must be a (start_line, end_line) tuple")
+        start_line, end_line = value
+        if start_line < 1 or end_line < 1:
+            raise ValueError("range lines must be >= 1")
+        if end_line < start_line:
+            raise ValueError("end_line must be >= start_line")
+        return value
+
+
+class SemanticChunk(BaseModel):
+    """A semantically meaningful chunk of content, optionally embedded."""
+
+    content: str = Field(..., min_length=1)
+    embedding: Optional[List[float]] = Field(default=None, description="Vector embedding for semantic search")
+    metadata: Dict[str, Any] = Field(default_factory=dict)
+    id: Optional[int] = Field(default=None, description="Database row ID")
+    file_path: Optional[str] = Field(default=None, description="Source file path")
+
+    @field_validator("embedding")
+    @classmethod
+    def validate_embedding(cls, value: Optional[List[float]]) -> Optional[List[float]]:
+        if value is None:
+            return value
+        if not value:
+            raise ValueError("embedding cannot be empty when provided")
+        norm = math.sqrt(sum(x * x for x in value))
+        epsilon = 1e-10
+        if norm < epsilon:
+            raise ValueError("embedding cannot be a zero vector")
+        return value
+
+
+class IndexedFile(BaseModel):
+    """An indexed source file with symbols and optional semantic chunks."""
+
+    path: str = Field(..., min_length=1)
+    language: str = Field(..., min_length=1)
+    symbols: List[Symbol] = Field(default_factory=list)
+    chunks: List[SemanticChunk] = Field(default_factory=list)
+    relationships: List["CodeRelationship"] = Field(default_factory=list)
+
+    @field_validator("path", "language")
+    @classmethod
+    def strip_and_validate_nonempty(cls, value: str) -> str:
+        cleaned = value.strip()
+        if not cleaned:
+            raise ValueError("value cannot be blank")
+        return cleaned
+
+
+class RelationshipType(str, Enum):
+    """Types of code relationships."""
+    CALL = "calls"
+    INHERITS = "inherits"
+    IMPORTS = "imports"
+
+
+class CodeRelationship(BaseModel):
+    """A relationship between code symbols (e.g., function calls, inheritance)."""
+
+    source_symbol: str = Field(..., min_length=1, description="Name of source symbol")
+    target_symbol: str = Field(..., min_length=1, description="Name of target symbol")
+    relationship_type: RelationshipType = Field(..., description="Type of relationship (call, inherits, etc.)")
+    source_file: str = Field(..., min_length=1, description="File path containing source symbol")
+    target_file: Optional[str] = Field(default=None, description="File path containing target (None if same file)")
+    source_line: int = Field(..., ge=1, description="Line number where relationship occurs (1-based)")
+
+
+class AdditionalLocation(BaseModel):
+    """A pointer to another location where a similar result was found.
+
+    Used for grouping search results with similar scores and content,
+    where the primary result is stored in SearchResult and secondary
+    locations are stored in this model.
+    """
+
+    path: str = Field(..., min_length=1)
+    score: float = Field(..., ge=0.0)
+    start_line: Optional[int] = Field(default=None, description="Start line of the result (1-based)")
+    end_line: Optional[int] = Field(default=None, description="End line of the result (1-based)")
+    symbol_name: Optional[str] = Field(default=None, description="Name of matched symbol")
+
+
+class SearchResult(BaseModel):
+    """A unified search result for lexical or semantic search."""
+
+    path: str = Field(..., min_length=1)
+    score: float = Field(..., ge=0.0)
+    excerpt: Optional[str] = None
+    content: Optional[str] = Field(default=None, description="Full content of matched code block")
+    symbol: Optional[Symbol] = None
+    chunk: Optional[SemanticChunk] = None
+    metadata: Dict[str, Any] = Field(default_factory=dict)
+
+    # Additional context for complete code blocks
+    start_line: Optional[int] = Field(default=None, description="Start line of code block (1-based)")
+    end_line: Optional[int] = Field(default=None, description="End line of code block (1-based)")
+    symbol_name: Optional[str] = Field(default=None, description="Name of matched symbol/function/class")
+    symbol_kind: Optional[str] = Field(default=None, description="Kind of symbol (function/class/method)")
+
+    # Field for grouping similar results
+    additional_locations: List["AdditionalLocation"] = Field(
+        default_factory=list,
+        description="Other locations for grouped results with similar scores and content."
+    )