mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-12 02:37:45 +08:00
fix(entities): validate embeddings are non-zero vectors
Add L2 norm check to SemanticChunk.validate_embedding to reject zero vectors. Prevents division by zero in cosine similarity calculations downstream in vector search. Solution-ID: SOL-20251228113612 Issue-ID: ISS-1766921318981-7 Task-ID: T1
This commit is contained in:
@@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import math
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Any, Dict, List, Optional, Tuple
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
@@ -43,6 +44,10 @@ class SemanticChunk(BaseModel):
|
|||||||
return value
|
return value
|
||||||
if not value:
|
if not value:
|
||||||
raise ValueError("embedding cannot be empty when provided")
|
raise ValueError("embedding cannot be empty when provided")
|
||||||
|
norm = math.sqrt(sum(x * x for x in value))
|
||||||
|
epsilon = 1e-10
|
||||||
|
if norm < epsilon:
|
||||||
|
raise ValueError("embedding cannot be a zero vector")
|
||||||
return value
|
return value
|
||||||
|
|
||||||
|
|
||||||
@@ -118,4 +123,3 @@ class SearchResult(BaseModel):
|
|||||||
default_factory=list,
|
default_factory=list,
|
||||||
description="Other locations for grouped results with similar scores and content."
|
description="Other locations for grouped results with similar scores and content."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user