mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-12 02:37:45 +08:00
Add comprehensive tests for semantic chunking and search functionality
- Implemented tests for the ChunkConfig and Chunker classes, covering default and custom configurations. - Added tests for symbol-based chunking, including single and multiple symbols, handling of empty symbols, and preservation of line numbers. - Developed tests for sliding window chunking, ensuring correct chunking behavior with various content sizes and configurations. - Created integration tests for semantic search, validating embedding generation, vector storage, and search accuracy across a complex codebase. - Included performance tests for embedding generation and search operations. - Established tests for chunking strategies, comparing symbol-based and sliding window approaches. - Enhanced test coverage for edge cases, including handling of unicode characters and out-of-bounds symbol ranges.
This commit is contained in:
@@ -119,6 +119,7 @@ class VectorStore:
|
||||
query_embedding: List[float],
|
||||
top_k: int = 10,
|
||||
min_score: float = 0.0,
|
||||
return_full_content: bool = True,
|
||||
) -> List[SearchResult]:
|
||||
"""Find chunks most similar to query embedding.
|
||||
|
||||
@@ -126,6 +127,7 @@ class VectorStore:
|
||||
query_embedding: Query vector.
|
||||
top_k: Maximum results to return.
|
||||
min_score: Minimum similarity score (0-1).
|
||||
return_full_content: If True, return full code block content.
|
||||
|
||||
Returns:
|
||||
List of SearchResult ordered by similarity (highest first).
|
||||
@@ -144,14 +146,39 @@ class VectorStore:
|
||||
if score >= min_score:
|
||||
metadata = json.loads(metadata_json) if metadata_json else {}
|
||||
|
||||
# Build excerpt
|
||||
# Build excerpt (short preview)
|
||||
excerpt = content[:200] + "..." if len(content) > 200 else content
|
||||
|
||||
# Extract symbol information from metadata
|
||||
symbol_name = metadata.get("symbol_name")
|
||||
symbol_kind = metadata.get("symbol_kind")
|
||||
start_line = metadata.get("start_line")
|
||||
end_line = metadata.get("end_line")
|
||||
|
||||
# Build Symbol object if we have symbol info
|
||||
symbol = None
|
||||
if symbol_name and symbol_kind and start_line and end_line:
|
||||
try:
|
||||
from codexlens.entities import Symbol
|
||||
symbol = Symbol(
|
||||
name=symbol_name,
|
||||
kind=symbol_kind,
|
||||
range=(start_line, end_line)
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
results.append((score, SearchResult(
|
||||
path=file_path,
|
||||
score=score,
|
||||
excerpt=excerpt,
|
||||
symbol=None,
|
||||
content=content if return_full_content else None,
|
||||
symbol=symbol,
|
||||
metadata=metadata,
|
||||
start_line=start_line,
|
||||
end_line=end_line,
|
||||
symbol_name=symbol_name,
|
||||
symbol_kind=symbol_kind,
|
||||
)))
|
||||
|
||||
# Sort by score descending
|
||||
|
||||
Reference in New Issue
Block a user