mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-05 01:50:27 +08:00
129 lines
5.2 KiB
Python
129 lines
5.2 KiB
Python
"""Pydantic entity models for CodexLens."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import math
|
|
from enum import Enum
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|
|
|
from pydantic import BaseModel, Field, field_validator
|
|
|
|
|
|
class Symbol(BaseModel):
|
|
"""A code symbol discovered in a file."""
|
|
|
|
name: str = Field(..., min_length=1)
|
|
kind: str = Field(..., min_length=1)
|
|
range: Tuple[int, int] = Field(..., description="(start_line, end_line), 1-based inclusive")
|
|
file: Optional[str] = Field(default=None, description="Full path to the file containing this symbol")
|
|
|
|
@field_validator("range")
|
|
@classmethod
|
|
def validate_range(cls, value: Tuple[int, int]) -> Tuple[int, int]:
|
|
if len(value) != 2:
|
|
raise ValueError("range must be a (start_line, end_line) tuple")
|
|
start_line, end_line = value
|
|
if start_line < 1 or end_line < 1:
|
|
raise ValueError("range lines must be >= 1")
|
|
if end_line < start_line:
|
|
raise ValueError("end_line must be >= start_line")
|
|
return value
|
|
|
|
|
|
class SemanticChunk(BaseModel):
|
|
"""A semantically meaningful chunk of content, optionally embedded."""
|
|
|
|
content: str = Field(..., min_length=1)
|
|
embedding: Optional[List[float]] = Field(default=None, description="Vector embedding for semantic search")
|
|
metadata: Dict[str, Any] = Field(default_factory=dict)
|
|
id: Optional[int] = Field(default=None, description="Database row ID")
|
|
file_path: Optional[str] = Field(default=None, description="Source file path")
|
|
|
|
@field_validator("embedding")
|
|
@classmethod
|
|
def validate_embedding(cls, value: Optional[List[float]]) -> Optional[List[float]]:
|
|
if value is None:
|
|
return value
|
|
if not value:
|
|
raise ValueError("embedding cannot be empty when provided")
|
|
norm = math.sqrt(sum(x * x for x in value))
|
|
epsilon = 1e-10
|
|
if norm < epsilon:
|
|
raise ValueError("embedding cannot be a zero vector")
|
|
return value
|
|
|
|
|
|
class IndexedFile(BaseModel):
|
|
"""An indexed source file with symbols and optional semantic chunks."""
|
|
|
|
path: str = Field(..., min_length=1)
|
|
language: str = Field(..., min_length=1)
|
|
symbols: List[Symbol] = Field(default_factory=list)
|
|
chunks: List[SemanticChunk] = Field(default_factory=list)
|
|
relationships: List["CodeRelationship"] = Field(default_factory=list)
|
|
|
|
@field_validator("path", "language")
|
|
@classmethod
|
|
def strip_and_validate_nonempty(cls, value: str) -> str:
|
|
cleaned = value.strip()
|
|
if not cleaned:
|
|
raise ValueError("value cannot be blank")
|
|
return cleaned
|
|
|
|
|
|
class RelationshipType(str, Enum):
|
|
"""Types of code relationships."""
|
|
CALL = "calls"
|
|
INHERITS = "inherits"
|
|
IMPORTS = "imports"
|
|
|
|
|
|
class CodeRelationship(BaseModel):
|
|
"""A relationship between code symbols (e.g., function calls, inheritance)."""
|
|
|
|
source_symbol: str = Field(..., min_length=1, description="Name of source symbol")
|
|
target_symbol: str = Field(..., min_length=1, description="Name of target symbol")
|
|
relationship_type: RelationshipType = Field(..., description="Type of relationship (call, inherits, etc.)")
|
|
source_file: str = Field(..., min_length=1, description="File path containing source symbol")
|
|
target_file: Optional[str] = Field(default=None, description="File path containing target (None if same file)")
|
|
source_line: int = Field(..., ge=1, description="Line number where relationship occurs (1-based)")
|
|
|
|
|
|
class AdditionalLocation(BaseModel):
|
|
"""A pointer to another location where a similar result was found.
|
|
|
|
Used for grouping search results with similar scores and content,
|
|
where the primary result is stored in SearchResult and secondary
|
|
locations are stored in this model.
|
|
"""
|
|
|
|
path: str = Field(..., min_length=1)
|
|
score: float = Field(..., ge=0.0)
|
|
start_line: Optional[int] = Field(default=None, description="Start line of the result (1-based)")
|
|
end_line: Optional[int] = Field(default=None, description="End line of the result (1-based)")
|
|
symbol_name: Optional[str] = Field(default=None, description="Name of matched symbol")
|
|
|
|
|
|
class SearchResult(BaseModel):
|
|
"""A unified search result for lexical or semantic search."""
|
|
|
|
path: str = Field(..., min_length=1)
|
|
score: float = Field(..., ge=0.0)
|
|
excerpt: Optional[str] = None
|
|
content: Optional[str] = Field(default=None, description="Full content of matched code block")
|
|
symbol: Optional[Symbol] = None
|
|
chunk: Optional[SemanticChunk] = None
|
|
metadata: Dict[str, Any] = Field(default_factory=dict)
|
|
|
|
# Additional context for complete code blocks
|
|
start_line: Optional[int] = Field(default=None, description="Start line of code block (1-based)")
|
|
end_line: Optional[int] = Field(default=None, description="End line of code block (1-based)")
|
|
symbol_name: Optional[str] = Field(default=None, description="Name of matched symbol/function/class")
|
|
symbol_kind: Optional[str] = Field(default=None, description="Kind of symbol (function/class/method)")
|
|
|
|
# Field for grouping similar results
|
|
additional_locations: List["AdditionalLocation"] = Field(
|
|
default_factory=list,
|
|
description="Other locations for grouped results with similar scores and content."
|
|
)
|