"""Pydantic entity models for CodexLens.""" from __future__ import annotations import math from enum import Enum from typing import Any, Dict, List, Optional, Tuple from pydantic import BaseModel, Field, field_validator class Symbol(BaseModel): """A code symbol discovered in a file.""" name: str = Field(..., min_length=1) kind: str = Field(..., min_length=1) range: Tuple[int, int] = Field(..., description="(start_line, end_line), 1-based inclusive") file: Optional[str] = Field(default=None, description="Full path to the file containing this symbol") @field_validator("range") @classmethod def validate_range(cls, value: Tuple[int, int]) -> Tuple[int, int]: if len(value) != 2: raise ValueError("range must be a (start_line, end_line) tuple") start_line, end_line = value if start_line < 1 or end_line < 1: raise ValueError("range lines must be >= 1") if end_line < start_line: raise ValueError("end_line must be >= start_line") return value class SemanticChunk(BaseModel): """A semantically meaningful chunk of content, optionally embedded.""" content: str = Field(..., min_length=1) embedding: Optional[List[float]] = Field(default=None, description="Vector embedding for semantic search") metadata: Dict[str, Any] = Field(default_factory=dict) id: Optional[int] = Field(default=None, description="Database row ID") file_path: Optional[str] = Field(default=None, description="Source file path") @field_validator("embedding") @classmethod def validate_embedding(cls, value: Optional[List[float]]) -> Optional[List[float]]: if value is None: return value if not value: raise ValueError("embedding cannot be empty when provided") norm = math.sqrt(sum(x * x for x in value)) epsilon = 1e-10 if norm < epsilon: raise ValueError("embedding cannot be a zero vector") return value class IndexedFile(BaseModel): """An indexed source file with symbols and optional semantic chunks.""" path: str = Field(..., min_length=1) language: str = Field(..., min_length=1) symbols: List[Symbol] = Field(default_factory=list) chunks: List[SemanticChunk] = Field(default_factory=list) relationships: List["CodeRelationship"] = Field(default_factory=list) @field_validator("path", "language") @classmethod def strip_and_validate_nonempty(cls, value: str) -> str: cleaned = value.strip() if not cleaned: raise ValueError("value cannot be blank") return cleaned class RelationshipType(str, Enum): """Types of code relationships.""" CALL = "calls" INHERITS = "inherits" IMPORTS = "imports" class CodeRelationship(BaseModel): """A relationship between code symbols (e.g., function calls, inheritance).""" source_symbol: str = Field(..., min_length=1, description="Name of source symbol") target_symbol: str = Field(..., min_length=1, description="Name of target symbol") relationship_type: RelationshipType = Field(..., description="Type of relationship (call, inherits, etc.)") source_file: str = Field(..., min_length=1, description="File path containing source symbol") target_file: Optional[str] = Field(default=None, description="File path containing target (None if same file)") source_line: int = Field(..., ge=1, description="Line number where relationship occurs (1-based)") class AdditionalLocation(BaseModel): """A pointer to another location where a similar result was found. Used for grouping search results with similar scores and content, where the primary result is stored in SearchResult and secondary locations are stored in this model. """ path: str = Field(..., min_length=1) score: float = Field(..., ge=0.0) start_line: Optional[int] = Field(default=None, description="Start line of the result (1-based)") end_line: Optional[int] = Field(default=None, description="End line of the result (1-based)") symbol_name: Optional[str] = Field(default=None, description="Name of matched symbol") class SearchResult(BaseModel): """A unified search result for lexical or semantic search.""" path: str = Field(..., min_length=1) score: float = Field(..., ge=0.0) excerpt: Optional[str] = None content: Optional[str] = Field(default=None, description="Full content of matched code block") symbol: Optional[Symbol] = None chunk: Optional[SemanticChunk] = None metadata: Dict[str, Any] = Field(default_factory=dict) # Additional context for complete code blocks start_line: Optional[int] = Field(default=None, description="Start line of code block (1-based)") end_line: Optional[int] = Field(default=None, description="End line of code block (1-based)") symbol_name: Optional[str] = Field(default=None, description="Name of matched symbol/function/class") symbol_kind: Optional[str] = Field(default=None, description="Kind of symbol (function/class/method)") # Field for grouping similar results additional_locations: List["AdditionalLocation"] = Field( default_factory=list, description="Other locations for grouped results with similar scores and content." )