mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-05 01:50:27 +08:00
fix(entities): validate embeddings are non-zero vectors
Add L2 norm check to SemanticChunk.validate_embedding to reject zero vectors. Prevents division by zero in cosine similarity calculations downstream in vector search. Solution-ID: SOL-20251228113612 Issue-ID: ISS-1766921318981-7 Task-ID: T1
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
@@ -43,6 +44,10 @@ class SemanticChunk(BaseModel):
|
||||
return value
|
||||
if not value:
|
||||
raise ValueError("embedding cannot be empty when provided")
|
||||
norm = math.sqrt(sum(x * x for x in value))
|
||||
epsilon = 1e-10
|
||||
if norm < epsilon:
|
||||
raise ValueError("embedding cannot be a zero vector")
|
||||
return value
|
||||
|
||||
|
||||
@@ -118,4 +123,3 @@ class SearchResult(BaseModel):
|
||||
default_factory=list,
|
||||
description="Other locations for grouped results with similar scores and content."
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user