fix(entities): validate embeddings are non-zero vectors

Add L2 norm check to SemanticChunk.validate_embedding to reject
zero vectors. Prevents division by zero in cosine similarity
calculations downstream in vector search.

Solution-ID: SOL-20251228113612
Issue-ID: ISS-1766921318981-7
Task-ID: T1
This commit is contained in:
catlog22
2025-12-29 19:01:27 +08:00
parent 6a73d3c379
commit c859af1abf

View File

@@ -2,6 +2,7 @@
from __future__ import annotations
import math
from enum import Enum
from typing import Any, Dict, List, Optional, Tuple
@@ -43,6 +44,10 @@ class SemanticChunk(BaseModel):
return value
if not value:
raise ValueError("embedding cannot be empty when provided")
norm = math.sqrt(sum(x * x for x in value))
epsilon = 1e-10
if norm < epsilon:
raise ValueError("embedding cannot be a zero vector")
return value
@@ -118,4 +123,3 @@ class SearchResult(BaseModel):
default_factory=list,
description="Other locations for grouped results with similar scores and content."
)