From c859af1abf3f29ea211493d5afc6a1c2a15790e5 Mon Sep 17 00:00:00 2001 From: catlog22 Date: Mon, 29 Dec 2025 19:01:27 +0800 Subject: [PATCH] fix(entities): validate embeddings are non-zero vectors Add L2 norm check to SemanticChunk.validate_embedding to reject zero vectors. Prevents division by zero in cosine similarity calculations downstream in vector search. Solution-ID: SOL-20251228113612 Issue-ID: ISS-1766921318981-7 Task-ID: T1 --- codex-lens/src/codexlens/entities.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/codex-lens/src/codexlens/entities.py b/codex-lens/src/codexlens/entities.py index 08e51f54..2e1477d0 100644 --- a/codex-lens/src/codexlens/entities.py +++ b/codex-lens/src/codexlens/entities.py @@ -2,6 +2,7 @@ from __future__ import annotations +import math from enum import Enum from typing import Any, Dict, List, Optional, Tuple @@ -43,6 +44,10 @@ class SemanticChunk(BaseModel): return value if not value: raise ValueError("embedding cannot be empty when provided") + norm = math.sqrt(sum(x * x for x in value)) + epsilon = 1e-10 + if norm < epsilon: + raise ValueError("embedding cannot be a zero vector") return value @@ -118,4 +123,3 @@ class SearchResult(BaseModel): default_factory=list, description="Other locations for grouped results with similar scores and content." ) -