Add comprehensive tests for vector/semantic search functionality

- Implement full coverage tests for Embedder model loading and embedding generation - Add CRUD operations and caching tests for VectorStore - Include cosine similarity computation tests - Validate semantic search accuracy and relevance through various queries - Establish performance benchmarks for embedding and search operations - Ensure edge cases and error handling are covered - Test thread safety and concurrent access scenarios - Verify availability of semantic search dependencies
2026-03-29 20:11:04 +08:00 · 2025-12-14 17:17:09 +08:00
parent 8d542b8e45
commit 79a2953862
47 changed files with 11208 additions and 4336 deletions
--- a/codex-lens/tests/test_llm_enhancer.py
+++ b/codex-lens/tests/test_llm_enhancer.py
@@ -0,0 +1,831 @@
+"""Tests for LLM-based semantic enhancement functionality.
+
+Tests cover:
+- LLMConfig and data classes
+- LLMEnhancer initialization and configuration
+- Prompt building and JSON parsing
+- Batch processing logic
+- CCW CLI invocation (mocked)
+- EnhancedSemanticIndexer integration
+- Error handling and fallback behavior
+"""
+
+import json
+import tempfile
+from pathlib import Path
+from typing import Dict, Any
+from unittest.mock import MagicMock, patch, PropertyMock
+
+import pytest
+
+from codexlens.entities import SemanticChunk, Symbol
+from codexlens.semantic.llm_enhancer import (
+    SemanticMetadata,
+    FileData,
+    LLMConfig,
+    LLMEnhancer,
+    EnhancedSemanticIndexer,
+    create_enhancer,
+    create_enhanced_indexer,
+)
+
+
+# === Data Class Tests ===
+
+class TestSemanticMetadata:
+    """Tests for SemanticMetadata dataclass."""
+
+    def test_basic_creation(self):
+        """Test creating SemanticMetadata with required fields."""
+        metadata = SemanticMetadata(
+            summary="Authentication handler",
+            keywords=["auth", "login", "jwt"],
+            purpose="auth",
+        )
+        assert metadata.summary == "Authentication handler"
+        assert metadata.keywords == ["auth", "login", "jwt"]
+        assert metadata.purpose == "auth"
+        assert metadata.file_path is None
+        assert metadata.symbol_name is None
+        assert metadata.llm_tool is None
+
+    def test_full_creation(self):
+        """Test creating SemanticMetadata with all fields."""
+        metadata = SemanticMetadata(
+            summary="User login function",
+            keywords=["login", "user"],
+            purpose="auth",
+            file_path="/test/auth.py",
+            symbol_name="login",
+            llm_tool="gemini",
+        )
+        assert metadata.file_path == "/test/auth.py"
+        assert metadata.symbol_name == "login"
+        assert metadata.llm_tool == "gemini"
+
+    def test_empty_keywords(self):
+        """Test creating SemanticMetadata with empty keywords."""
+        metadata = SemanticMetadata(
+            summary="Empty",
+            keywords=[],
+            purpose="",
+        )
+        assert metadata.keywords == []
+
+
+class TestFileData:
+    """Tests for FileData dataclass."""
+
+    def test_basic_creation(self):
+        """Test creating FileData with required fields."""
+        data = FileData(
+            path="/test/file.py",
+            content="def hello(): pass",
+            language="python",
+        )
+        assert data.path == "/test/file.py"
+        assert data.content == "def hello(): pass"
+        assert data.language == "python"
+        assert data.symbols == []
+
+    def test_with_symbols(self):
+        """Test creating FileData with symbols."""
+        symbols = [
+            Symbol(name="hello", kind="function", range=(1, 1)),
+            Symbol(name="MyClass", kind="class", range=(3, 10)),
+        ]
+        data = FileData(
+            path="/test/file.py",
+            content="code",
+            language="python",
+            symbols=symbols,
+        )
+        assert len(data.symbols) == 2
+        assert data.symbols[0].name == "hello"
+
+
+class TestLLMConfig:
+    """Tests for LLMConfig dataclass."""
+
+    def test_default_values(self):
+        """Test default configuration values."""
+        config = LLMConfig()
+        assert config.tool == "gemini"
+        assert config.fallback_tool == "qwen"
+        assert config.timeout_ms == 300000
+        assert config.batch_size == 5
+        assert config.max_content_chars == 8000
+        assert config.enabled is True
+
+    def test_custom_values(self):
+        """Test custom configuration values."""
+        config = LLMConfig(
+            tool="qwen",
+            fallback_tool="gemini",
+            timeout_ms=600000,
+            batch_size=10,
+            max_content_chars=4000,
+            enabled=False,
+        )
+        assert config.tool == "qwen"
+        assert config.fallback_tool == "gemini"
+        assert config.timeout_ms == 600000
+        assert config.batch_size == 10
+        assert config.max_content_chars == 4000
+        assert config.enabled is False
+
+    @patch.dict("os.environ", {"CCW_CLI_SECONDARY_TOOL": "codex", "CCW_CLI_FALLBACK_TOOL": "gemini"})
+    def test_env_override(self):
+        """Test environment variable override."""
+        config = LLMConfig()
+        assert config.tool == "codex"
+        assert config.fallback_tool == "gemini"
+
+
+# === LLMEnhancer Tests ===
+
+class TestLLMEnhancerInit:
+    """Tests for LLMEnhancer initialization."""
+
+    def test_default_init(self):
+        """Test default initialization."""
+        enhancer = LLMEnhancer()
+        assert enhancer.config is not None
+        assert enhancer.config.tool == "gemini"
+        assert enhancer._ccw_available is None
+
+    def test_custom_config(self):
+        """Test initialization with custom config."""
+        config = LLMConfig(tool="qwen", batch_size=3)
+        enhancer = LLMEnhancer(config)
+        assert enhancer.config.tool == "qwen"
+        assert enhancer.config.batch_size == 3
+
+
+class TestLLMEnhancerAvailability:
+    """Tests for CCW CLI availability check."""
+
+    @patch("shutil.which")
+    def test_ccw_available(self, mock_which):
+        """Test CCW available returns True."""
+        mock_which.return_value = "/usr/bin/ccw"
+        enhancer = LLMEnhancer()
+
+        result = enhancer.check_available()
+
+        assert result is True
+        assert enhancer._ccw_available is True
+        mock_which.assert_called_with("ccw")
+
+    @patch("shutil.which")
+    def test_ccw_not_available(self, mock_which):
+        """Test CCW not available returns False."""
+        mock_which.return_value = None
+        enhancer = LLMEnhancer()
+
+        result = enhancer.check_available()
+
+        assert result is False
+        assert enhancer._ccw_available is False
+
+    @patch("shutil.which")
+    def test_ccw_availability_cached(self, mock_which):
+        """Test availability result is cached."""
+        mock_which.return_value = "/usr/bin/ccw"
+        enhancer = LLMEnhancer()
+
+        # First call
+        enhancer.check_available()
+        # Second call
+        enhancer.check_available()
+
+        # which should only be called once
+        mock_which.assert_called_once()
+
+
+class TestPromptBuilding:
+    """Tests for prompt building."""
+
+    def test_build_single_file_prompt(self):
+        """Test prompt building with single file."""
+        enhancer = LLMEnhancer()
+        files = [
+            FileData(
+                path="/test/auth.py",
+                content="def login(): pass",
+                language="python",
+            )
+        ]
+
+        prompt = enhancer._build_batch_prompt(files)
+
+        assert "[FILE: /test/auth.py]" in prompt
+        assert "```python" in prompt
+        assert "def login(): pass" in prompt
+        assert "PURPOSE:" in prompt
+        assert "JSON format output" in prompt
+
+    def test_build_multiple_files_prompt(self):
+        """Test prompt building with multiple files."""
+        enhancer = LLMEnhancer()
+        files = [
+            FileData(path="/test/a.py", content="def a(): pass", language="python"),
+            FileData(path="/test/b.js", content="function b() {}", language="javascript"),
+        ]
+
+        prompt = enhancer._build_batch_prompt(files)
+
+        assert "[FILE: /test/a.py]" in prompt
+        assert "[FILE: /test/b.js]" in prompt
+        assert "```python" in prompt
+        assert "```javascript" in prompt
+
+    def test_build_prompt_truncates_long_content(self):
+        """Test prompt truncates long content."""
+        config = LLMConfig(max_content_chars=100)
+        enhancer = LLMEnhancer(config)
+
+        long_content = "x" * 200
+        files = [FileData(path="/test/long.py", content=long_content, language="python")]
+
+        prompt = enhancer._build_batch_prompt(files)
+
+        assert "... [truncated]" in prompt
+        assert "x" * 200 not in prompt
+
+
+class TestJSONParsing:
+    """Tests for JSON response parsing."""
+
+    def test_parse_valid_response(self):
+        """Test parsing valid JSON response."""
+        enhancer = LLMEnhancer()
+        response = json.dumps({
+            "files": {
+                "/test/auth.py": {
+                    "summary": "Authentication handler",
+                    "keywords": ["auth", "login"],
+                    "purpose": "auth",
+                }
+            }
+        })
+
+        result = enhancer._parse_response(response, "gemini")
+
+        assert "/test/auth.py" in result
+        assert result["/test/auth.py"].summary == "Authentication handler"
+        assert result["/test/auth.py"].keywords == ["auth", "login"]
+        assert result["/test/auth.py"].purpose == "auth"
+        assert result["/test/auth.py"].llm_tool == "gemini"
+
+    def test_parse_response_with_markdown(self):
+        """Test parsing response wrapped in markdown."""
+        enhancer = LLMEnhancer()
+        response = '''```json
+{
+    "files": {
+        "/test/file.py": {
+            "summary": "Test file",
+            "keywords": ["test"],
+            "purpose": "test"
+        }
+    }
+}
+```'''
+
+        result = enhancer._parse_response(response, "qwen")
+
+        assert "/test/file.py" in result
+        assert result["/test/file.py"].summary == "Test file"
+
+    def test_parse_response_multiple_files(self):
+        """Test parsing response with multiple files."""
+        enhancer = LLMEnhancer()
+        response = json.dumps({
+            "files": {
+                "/test/a.py": {"summary": "File A", "keywords": ["a"], "purpose": "util"},
+                "/test/b.py": {"summary": "File B", "keywords": ["b"], "purpose": "api"},
+            }
+        })
+
+        result = enhancer._parse_response(response, "gemini")
+
+        assert len(result) == 2
+        assert result["/test/a.py"].summary == "File A"
+        assert result["/test/b.py"].summary == "File B"
+
+    def test_parse_invalid_json(self):
+        """Test parsing invalid JSON returns empty dict."""
+        enhancer = LLMEnhancer()
+        response = "not valid json at all"
+
+        result = enhancer._parse_response(response, "gemini")
+
+        assert result == {}
+
+    def test_parse_empty_response(self):
+        """Test parsing empty response returns empty dict."""
+        enhancer = LLMEnhancer()
+
+        result = enhancer._parse_response("", "gemini")
+
+        assert result == {}
+
+
+class TestJSONExtraction:
+    """Tests for JSON extraction from mixed text."""
+
+    def test_extract_json_from_plain(self):
+        """Test extracting JSON from plain text."""
+        enhancer = LLMEnhancer()
+        text = '{"key": "value"}'
+
+        result = enhancer._extract_json(text)
+
+        assert result == '{"key": "value"}'
+
+    def test_extract_json_from_markdown(self):
+        """Test extracting JSON from markdown code block."""
+        enhancer = LLMEnhancer()
+        text = '''```json
+{"key": "value"}
+```'''
+
+        result = enhancer._extract_json(text)
+
+        assert result == '{"key": "value"}'
+
+    def test_extract_json_with_surrounding_text(self):
+        """Test extracting JSON with surrounding text."""
+        enhancer = LLMEnhancer()
+        text = 'Here is the result: {"key": "value"} That is all.'
+
+        result = enhancer._extract_json(text)
+
+        assert result == '{"key": "value"}'
+
+    def test_extract_nested_json(self):
+        """Test extracting nested JSON."""
+        enhancer = LLMEnhancer()
+        text = '{"outer": {"inner": "value"}}'
+
+        result = enhancer._extract_json(text)
+
+        assert '"outer"' in result
+        assert '"inner"' in result
+
+    def test_extract_no_json(self):
+        """Test extracting from text without JSON."""
+        enhancer = LLMEnhancer()
+        text = "No JSON here at all"
+
+        result = enhancer._extract_json(text)
+
+        assert result is None
+
+    def test_extract_malformed_json(self):
+        """Test extracting malformed JSON returns None."""
+        enhancer = LLMEnhancer()
+        text = '{"key": "value"'  # Missing closing brace
+
+        result = enhancer._extract_json(text)
+
+        assert result is None
+
+
+class TestEnhanceFiles:
+    """Tests for enhance_files method."""
+
+    @patch.object(LLMEnhancer, "check_available", return_value=False)
+    def test_enhance_files_ccw_not_available(self, mock_check):
+        """Test enhance_files returns empty when CCW not available."""
+        enhancer = LLMEnhancer()
+        files = [FileData(path="/test/a.py", content="code", language="python")]
+
+        result = enhancer.enhance_files(files)
+
+        assert result == {}
+
+    def test_enhance_files_disabled(self):
+        """Test enhance_files returns empty when disabled."""
+        config = LLMConfig(enabled=False)
+        enhancer = LLMEnhancer(config)
+        files = [FileData(path="/test/a.py", content="code", language="python")]
+
+        result = enhancer.enhance_files(files)
+
+        assert result == {}
+
+    @patch.object(LLMEnhancer, "check_available", return_value=True)
+    def test_enhance_files_empty_list(self, mock_check):
+        """Test enhance_files with empty list returns empty dict."""
+        enhancer = LLMEnhancer()
+
+        result = enhancer.enhance_files([])
+
+        assert result == {}
+
+    @patch.object(LLMEnhancer, "check_available", return_value=True)
+    @patch.object(LLMEnhancer, "_invoke_ccw_cli")
+    def test_enhance_files_success(self, mock_invoke, mock_check):
+        """Test enhance_files successful processing."""
+        mock_invoke.return_value = {
+            "success": True,
+            "stdout": json.dumps({
+                "files": {
+                    "/test/auth.py": {
+                        "summary": "Auth module",
+                        "keywords": ["auth"],
+                        "purpose": "auth",
+                    }
+                }
+            }),
+            "stderr": "",
+            "exit_code": 0,
+        }
+
+        enhancer = LLMEnhancer()
+        files = [FileData(path="/test/auth.py", content="def login(): pass", language="python")]
+
+        result = enhancer.enhance_files(files)
+
+        assert "/test/auth.py" in result
+        assert result["/test/auth.py"].summary == "Auth module"
+
+    @patch.object(LLMEnhancer, "check_available", return_value=True)
+    @patch.object(LLMEnhancer, "_invoke_ccw_cli")
+    def test_enhance_files_fallback(self, mock_invoke, mock_check):
+        """Test enhance_files falls back to secondary tool."""
+        # First call fails, second succeeds
+        mock_invoke.side_effect = [
+            {"success": False, "stdout": "", "stderr": "error", "exit_code": 1},
+            {
+                "success": True,
+                "stdout": json.dumps({
+                    "files": {
+                        "/test/file.py": {
+                            "summary": "Fallback result",
+                            "keywords": ["fallback"],
+                            "purpose": "util",
+                        }
+                    }
+                }),
+                "stderr": "",
+                "exit_code": 0,
+            },
+        ]
+
+        enhancer = LLMEnhancer()
+        files = [FileData(path="/test/file.py", content="code", language="python")]
+
+        result = enhancer.enhance_files(files)
+
+        assert "/test/file.py" in result
+        assert result["/test/file.py"].summary == "Fallback result"
+        assert mock_invoke.call_count == 2
+
+
+class TestEnhanceFile:
+    """Tests for enhance_file single file method."""
+
+    @patch.object(LLMEnhancer, "enhance_files")
+    def test_enhance_file_success(self, mock_enhance_files):
+        """Test enhance_file returns metadata on success."""
+        mock_enhance_files.return_value = {
+            "/test/auth.py": SemanticMetadata(
+                summary="Auth module",
+                keywords=["auth", "login"],
+                purpose="auth",
+                file_path="/test/auth.py",
+                llm_tool="gemini",
+            )
+        }
+
+        enhancer = LLMEnhancer()
+        result = enhancer.enhance_file("/test/auth.py", "def login(): pass", "python")
+
+        assert result.summary == "Auth module"
+        assert result.keywords == ["auth", "login"]
+
+    @patch.object(LLMEnhancer, "enhance_files")
+    def test_enhance_file_fallback_on_failure(self, mock_enhance_files):
+        """Test enhance_file returns default metadata on failure."""
+        mock_enhance_files.return_value = {}  # Enhancement failed
+
+        enhancer = LLMEnhancer()
+        result = enhancer.enhance_file("/test/file.py", "code", "python")
+
+        assert "python" in result.summary.lower()
+        assert "python" in result.keywords
+        assert result.purpose == "unknown"
+
+
+class TestBatchProcessing:
+    """Tests for batch processing."""
+
+    @patch.object(LLMEnhancer, "check_available", return_value=True)
+    @patch.object(LLMEnhancer, "_process_batch")
+    def test_batch_processing(self, mock_process, mock_check):
+        """Test files are processed in batches."""
+        mock_process.return_value = {}
+
+        config = LLMConfig(batch_size=2)
+        enhancer = LLMEnhancer(config)
+
+        files = [
+            FileData(path=f"/test/file{i}.py", content="code", language="python")
+            for i in range(5)
+        ]
+
+        enhancer.enhance_files(files)
+
+        # 5 files with batch_size=2 should result in 3 batches
+        assert mock_process.call_count == 3
+
+    @patch.object(LLMEnhancer, "check_available", return_value=True)
+    @patch.object(LLMEnhancer, "_process_batch")
+    def test_batch_continues_on_error(self, mock_process, mock_check):
+        """Test batch processing continues on error."""
+        # First batch fails, second succeeds
+        mock_process.side_effect = [
+            Exception("Batch 1 failed"),
+            {"/test/file2.py": SemanticMetadata(summary="OK", keywords=[], purpose="")},
+        ]
+
+        config = LLMConfig(batch_size=1)
+        enhancer = LLMEnhancer(config)
+
+        files = [
+            FileData(path="/test/file1.py", content="code", language="python"),
+            FileData(path="/test/file2.py", content="code", language="python"),
+        ]
+
+        result = enhancer.enhance_files(files)
+
+        # Should still get results from second batch
+        assert "/test/file2.py" in result
+
+
+# === CCW CLI Invocation Tests ===
+
+class TestCCWInvocation:
+    """Tests for CCW CLI invocation."""
+
+    @patch("subprocess.run")
+    @patch("shutil.which", return_value="/usr/bin/ccw")
+    def test_invoke_success(self, mock_which, mock_run):
+        """Test successful CCW CLI invocation."""
+        mock_run.return_value = MagicMock(
+            returncode=0,
+            stdout='{"files": {}}',
+            stderr="",
+        )
+
+        enhancer = LLMEnhancer()
+        result = enhancer._invoke_ccw_cli("test prompt", tool="gemini")
+
+        assert result["success"] is True
+        assert result["exit_code"] == 0
+
+    @patch("subprocess.run")
+    @patch("shutil.which", return_value="/usr/bin/ccw")
+    def test_invoke_failure(self, mock_which, mock_run):
+        """Test failed CCW CLI invocation."""
+        mock_run.return_value = MagicMock(
+            returncode=1,
+            stdout="",
+            stderr="Error occurred",
+        )
+
+        enhancer = LLMEnhancer()
+        result = enhancer._invoke_ccw_cli("test prompt", tool="gemini")
+
+        assert result["success"] is False
+        assert result["exit_code"] == 1
+
+    @patch("subprocess.run")
+    @patch("shutil.which", return_value="/usr/bin/ccw")
+    def test_invoke_timeout(self, mock_which, mock_run):
+        """Test CCW CLI timeout handling."""
+        import subprocess
+        mock_run.side_effect = subprocess.TimeoutExpired(cmd="ccw", timeout=300)
+
+        enhancer = LLMEnhancer()
+        result = enhancer._invoke_ccw_cli("test prompt", tool="gemini")
+
+        assert result["success"] is False
+        assert "timeout" in result["stderr"]
+
+    @patch("subprocess.run")
+    @patch("shutil.which", return_value=None)
+    def test_invoke_ccw_not_found(self, mock_which, mock_run):
+        """Test CCW CLI not found handling."""
+        mock_run.side_effect = FileNotFoundError()
+
+        enhancer = LLMEnhancer()
+        result = enhancer._invoke_ccw_cli("test prompt", tool="gemini")
+
+        assert result["success"] is False
+        assert "not found" in result["stderr"]
+
+
+# === EnhancedSemanticIndexer Tests ===
+
+class TestEnhancedSemanticIndexer:
+    """Tests for EnhancedSemanticIndexer integration."""
+
+    @pytest.fixture
+    def mock_enhancer(self):
+        """Create mock LLM enhancer."""
+        enhancer = MagicMock(spec=LLMEnhancer)
+        enhancer.enhance_files.return_value = {
+            "/test/auth.py": SemanticMetadata(
+                summary="Authentication handler",
+                keywords=["auth", "login", "jwt"],
+                purpose="auth",
+                file_path="/test/auth.py",
+                llm_tool="gemini",
+            )
+        }
+        return enhancer
+
+    @pytest.fixture
+    def mock_embedder(self):
+        """Create mock embedder."""
+        embedder = MagicMock()
+        embedder.embed.return_value = [[0.1] * 384]
+        embedder.embed_single.return_value = [0.1] * 384
+        return embedder
+
+    @pytest.fixture
+    def mock_vector_store(self):
+        """Create mock vector store."""
+        store = MagicMock()
+        store.add_chunk.return_value = 1
+        return store
+
+    def test_index_files_empty_list(self, mock_enhancer, mock_embedder, mock_vector_store):
+        """Test indexing empty file list."""
+        indexer = EnhancedSemanticIndexer(mock_enhancer, mock_embedder, mock_vector_store)
+
+        result = indexer.index_files([])
+
+        assert result == 0
+        mock_enhancer.enhance_files.assert_not_called()
+
+    def test_index_files_with_llm_enhancement(self, mock_enhancer, mock_embedder, mock_vector_store):
+        """Test indexing with LLM enhancement."""
+        indexer = EnhancedSemanticIndexer(mock_enhancer, mock_embedder, mock_vector_store)
+        files = [FileData(path="/test/auth.py", content="def login(): pass", language="python")]
+
+        result = indexer.index_files(files)
+
+        assert result == 1
+        mock_enhancer.enhance_files.assert_called_once()
+        mock_embedder.embed.assert_called_once()
+        mock_vector_store.add_chunk.assert_called_once()
+
+    def test_index_files_fallback_to_raw_code(self, mock_embedder, mock_vector_store):
+        """Test indexing falls back to raw code when LLM fails."""
+        mock_enhancer = MagicMock(spec=LLMEnhancer)
+        mock_enhancer.enhance_files.return_value = {}  # No enhancement
+
+        indexer = EnhancedSemanticIndexer(mock_enhancer, mock_embedder, mock_vector_store)
+        files = [FileData(path="/test/file.py", content="code", language="python")]
+
+        result = indexer.index_files(files)
+
+        assert result == 1
+        mock_embedder.embed_single.assert_called()
+
+    def test_create_embeddable_text(self, mock_enhancer, mock_embedder, mock_vector_store):
+        """Test embeddable text creation."""
+        indexer = EnhancedSemanticIndexer(mock_enhancer, mock_embedder, mock_vector_store)
+
+        metadata = SemanticMetadata(
+            summary="Handles user authentication",
+            keywords=["auth", "login", "user"],
+            purpose="auth",
+        )
+        file_data = FileData(path="/test/auth.py", content="code", language="python")
+
+        text = indexer._create_embeddable_text(metadata, file_data)
+
+        assert "Handles user authentication" in text
+        assert "auth" in text.lower()
+        assert "Keywords:" in text
+        assert "auth.py" in text
+
+
+# === Factory Function Tests ===
+
+class TestFactoryFunctions:
+    """Tests for factory functions."""
+
+    def test_create_enhancer_default(self):
+        """Test create_enhancer with defaults."""
+        enhancer = create_enhancer()
+
+        assert enhancer.config.tool == "gemini"
+        assert enhancer.config.enabled is True
+
+    def test_create_enhancer_custom(self):
+        """Test create_enhancer with custom params."""
+        enhancer = create_enhancer(
+            tool="qwen",
+            timeout_ms=600000,
+            batch_size=10,
+            enabled=False,
+        )
+
+        assert enhancer.config.tool == "qwen"
+        assert enhancer.config.timeout_ms == 600000
+        assert enhancer.config.batch_size == 10
+        assert enhancer.config.enabled is False
+
+    @pytest.mark.skipif(
+        not pytest.importorskip("codexlens.semantic", reason="semantic not available"),
+        reason="Semantic dependencies not installed"
+    )
+    def test_create_enhanced_indexer(self, tmp_path):
+        """Test create_enhanced_indexer factory."""
+        try:
+            from codexlens.semantic import SEMANTIC_AVAILABLE
+            if not SEMANTIC_AVAILABLE:
+                pytest.skip("Semantic dependencies not installed")
+
+            db_path = tmp_path / "semantic.db"
+            indexer = create_enhanced_indexer(db_path, llm_tool="gemini", llm_enabled=False)
+
+            assert indexer.enhancer is not None
+            assert indexer.embedder is not None
+            assert indexer.vector_store is not None
+        except ImportError:
+            pytest.skip("Semantic dependencies not installed")
+
+
+# === Edge Cases ===
+
+class TestEdgeCases:
+    """Tests for edge cases."""
+
+    def test_semantic_metadata_with_special_chars(self):
+        """Test metadata with special characters."""
+        metadata = SemanticMetadata(
+            summary='Test "quoted" and \'single\' quotes',
+            keywords=["special", "chars", "test's"],
+            purpose="test",
+        )
+        assert '"quoted"' in metadata.summary
+        assert "test's" in metadata.keywords
+
+    def test_file_data_with_unicode(self):
+        """Test FileData with unicode content."""
+        data = FileData(
+            path="/test/中文.py",
+            content="def 你好(): return '世界'",
+            language="python",
+        )
+        assert "中文" in data.path
+        assert "你好" in data.content
+
+    @patch.object(LLMEnhancer, "check_available", return_value=True)
+    @patch.object(LLMEnhancer, "_invoke_ccw_cli")
+    def test_enhance_with_very_long_content(self, mock_invoke, mock_check):
+        """Test enhancement with very long content."""
+        mock_invoke.return_value = {
+            "success": True,
+            "stdout": json.dumps({"files": {}}),
+            "stderr": "",
+            "exit_code": 0,
+        }
+
+        config = LLMConfig(max_content_chars=100)
+        enhancer = LLMEnhancer(config)
+
+        long_content = "x" * 10000
+        files = [FileData(path="/test/long.py", content=long_content, language="python")]
+
+        enhancer.enhance_files(files)
+
+        # Should not crash, content should be truncated in prompt
+        mock_invoke.assert_called_once()
+
+    def test_parse_response_with_missing_fields(self):
+        """Test parsing response with missing fields."""
+        enhancer = LLMEnhancer()
+        response = json.dumps({
+            "files": {
+                "/test/file.py": {
+                    "summary": "Only summary provided",
+                    # keywords and purpose missing
+                }
+            }
+        })
+
+        result = enhancer._parse_response(response, "gemini")
+
+        assert "/test/file.py" in result
+        assert result["/test/file.py"].summary == "Only summary provided"
+        assert result["/test/file.py"].keywords == []
+        assert result["/test/file.py"].purpose == ""
--- a/codex-lens/tests/test_search_full_coverage.py
+++ b/codex-lens/tests/test_search_full_coverage.py
--- a/codex-lens/tests/test_vector_search_full.py
+++ b/codex-lens/tests/test_vector_search_full.py
@@ -0,0 +1,747 @@
+"""Full coverage tests for vector/semantic search functionality.
+
+Tests cover:
+- Embedder model loading and embedding generation
+- VectorStore CRUD operations and caching
+- Cosine similarity computation
+- Semantic search accuracy and relevance
+- Performance benchmarks
+- Edge cases and error handling
+- Thread safety and concurrent access
+"""
+
+import json
+import tempfile
+import threading
+import time
+from pathlib import Path
+from typing import List
+
+import pytest
+
+from codexlens.entities import SemanticChunk, Symbol, SearchResult
+from codexlens.semantic import SEMANTIC_AVAILABLE, check_semantic_available
+
+# Skip all tests if semantic dependencies not available
+pytestmark = pytest.mark.skipif(
+    not SEMANTIC_AVAILABLE,
+    reason="Semantic search dependencies not installed (pip install codexlens[semantic])"
+)
+
+
+# === Fixtures ===
+
+@pytest.fixture
+def temp_db(tmp_path):
+    """Create temporary database path."""
+    return tmp_path / "test_semantic.db"
+
+
+@pytest.fixture
+def embedder():
+    """Create Embedder instance."""
+    from codexlens.semantic.embedder import Embedder
+    return Embedder()
+
+
+@pytest.fixture
+def vector_store(temp_db):
+    """Create VectorStore instance."""
+    from codexlens.semantic.vector_store import VectorStore
+    return VectorStore(temp_db)
+
+
+@pytest.fixture
+def sample_code_chunks():
+    """Sample code chunks for testing."""
+    return [
+        {
+            "content": "def authenticate(username, password): return check_credentials(username, password)",
+            "metadata": {"symbol_name": "authenticate", "symbol_kind": "function", "start_line": 1, "end_line": 1, "language": "python"},
+        },
+        {
+            "content": "class DatabaseConnection:\n    def connect(self, host, port): pass\n    def execute(self, query): pass",
+            "metadata": {"symbol_name": "DatabaseConnection", "symbol_kind": "class", "start_line": 1, "end_line": 3, "language": "python"},
+        },
+        {
+            "content": "async function fetchUserData(userId) { return await api.get('/users/' + userId); }",
+            "metadata": {"symbol_name": "fetchUserData", "symbol_kind": "function", "start_line": 1, "end_line": 1, "language": "javascript"},
+        },
+        {
+            "content": "def calculate_sum(numbers): return sum(numbers)",
+            "metadata": {"symbol_name": "calculate_sum", "symbol_kind": "function", "start_line": 1, "end_line": 1, "language": "python"},
+        },
+        {
+            "content": "class UserProfile:\n    def __init__(self, name, email):\n        self.name = name\n        self.email = email",
+            "metadata": {"symbol_name": "UserProfile", "symbol_kind": "class", "start_line": 1, "end_line": 4, "language": "python"},
+        },
+    ]
+
+
+# === Embedder Tests ===
+
+class TestEmbedder:
+    """Tests for Embedder class."""
+
+    def test_embedder_initialization(self, embedder):
+        """Test embedder initializes correctly."""
+        assert embedder.model_name == "BAAI/bge-small-en-v1.5"
+        assert embedder.EMBEDDING_DIM == 384
+        assert embedder._model is None  # Lazy loading
+
+    def test_embed_single_returns_correct_dimension(self, embedder):
+        """Test single embedding has correct dimension."""
+        text = "def hello(): print('world')"
+        embedding = embedder.embed_single(text)
+
+        assert isinstance(embedding, list)
+        assert len(embedding) == 384
+        assert all(isinstance(x, float) for x in embedding)
+
+    def test_embed_batch_returns_correct_count(self, embedder):
+        """Test batch embedding returns correct number of embeddings."""
+        texts = [
+            "def foo(): pass",
+            "def bar(): pass",
+            "def baz(): pass",
+        ]
+        embeddings = embedder.embed(texts)
+
+        assert len(embeddings) == len(texts)
+        assert all(len(e) == 384 for e in embeddings)
+
+    def test_embed_empty_string(self, embedder):
+        """Test embedding empty string."""
+        embedding = embedder.embed_single("")
+        assert len(embedding) == 384
+
+    def test_embed_unicode_text(self, embedder):
+        """Test embedding unicode text."""
+        text = "def 你好(): return '世界'"
+        embedding = embedder.embed_single(text)
+        assert len(embedding) == 384
+
+    def test_embed_long_text(self, embedder):
+        """Test embedding long text."""
+        text = "def process(): pass\n" * 100
+        embedding = embedder.embed_single(text)
+        assert len(embedding) == 384
+
+    def test_embed_special_characters(self, embedder):
+        """Test embedding text with special characters."""
+        text = "def test(): return {'key': 'value', '@decorator': True}"
+        embedding = embedder.embed_single(text)
+        assert len(embedding) == 384
+
+    def test_lazy_model_loading(self, embedder):
+        """Test model loads lazily on first embed call."""
+        assert embedder._model is None
+        embedder.embed_single("test")
+        assert embedder._model is not None
+
+    def test_model_reuse(self, embedder):
+        """Test model is reused across multiple calls."""
+        embedder.embed_single("test1")
+        model_ref = embedder._model
+        embedder.embed_single("test2")
+        assert embedder._model is model_ref  # Same instance
+
+
+class TestEmbeddingSimilarity:
+    """Tests for embedding similarity."""
+
+    def test_identical_text_similarity(self, embedder):
+        """Test identical text has similarity ~1.0."""
+        from codexlens.semantic.vector_store import _cosine_similarity
+
+        text = "def calculate_sum(a, b): return a + b"
+        emb1 = embedder.embed_single(text)
+        emb2 = embedder.embed_single(text)
+
+        similarity = _cosine_similarity(emb1, emb2)
+        assert similarity > 0.99, "Identical text should have ~1.0 similarity"
+
+    def test_similar_code_high_similarity(self, embedder):
+        """Test similar code has high similarity."""
+        from codexlens.semantic.vector_store import _cosine_similarity
+
+        code1 = "def add(a, b): return a + b"
+        code2 = "def sum_numbers(x, y): return x + y"
+
+        emb1 = embedder.embed_single(code1)
+        emb2 = embedder.embed_single(code2)
+
+        similarity = _cosine_similarity(emb1, emb2)
+        assert similarity > 0.6, "Similar functions should have high similarity"
+
+    def test_different_code_lower_similarity(self, embedder):
+        """Test different code has lower similarity than similar code."""
+        from codexlens.semantic.vector_store import _cosine_similarity
+
+        code1 = "def add(a, b): return a + b"
+        code2 = "def sum_numbers(x, y): return x + y"
+        code3 = "class UserAuth: def login(self, user, pwd): pass"
+
+        emb1 = embedder.embed_single(code1)
+        emb2 = embedder.embed_single(code2)
+        emb3 = embedder.embed_single(code3)
+
+        sim_similar = _cosine_similarity(emb1, emb2)
+        sim_different = _cosine_similarity(emb1, emb3)
+
+        assert sim_similar > sim_different, "Similar code should have higher similarity"
+
+    def test_zero_vector_similarity(self):
+        """Test cosine similarity with zero vector."""
+        from codexlens.semantic.vector_store import _cosine_similarity
+
+        zero_vec = [0.0] * 384
+        normal_vec = [1.0] * 384
+
+        similarity = _cosine_similarity(zero_vec, normal_vec)
+        assert similarity == 0.0, "Zero vector should have 0 similarity"
+
+
+# === VectorStore Tests ===
+
+class TestVectorStoreCRUD:
+    """Tests for VectorStore CRUD operations."""
+
+    def test_add_chunk(self, vector_store, embedder):
+        """Test adding a single chunk."""
+        chunk = SemanticChunk(
+            content="def test(): pass",
+            metadata={"language": "python"},
+        )
+        chunk.embedding = embedder.embed_single(chunk.content)
+
+        chunk_id = vector_store.add_chunk(chunk, "/test/file.py")
+
+        assert chunk_id > 0
+        assert vector_store.count_chunks() == 1
+
+    def test_add_chunk_without_embedding_raises(self, vector_store):
+        """Test adding chunk without embedding raises error."""
+        chunk = SemanticChunk(content="def test(): pass", metadata={})
+
+        with pytest.raises(ValueError, match="must have embedding"):
+            vector_store.add_chunk(chunk, "/test/file.py")
+
+    def test_add_chunks_batch(self, vector_store, embedder, sample_code_chunks):
+        """Test batch adding chunks."""
+        chunks = []
+        for data in sample_code_chunks:
+            chunk = SemanticChunk(content=data["content"], metadata=data["metadata"])
+            chunk.embedding = embedder.embed_single(chunk.content)
+            chunks.append(chunk)
+
+        ids = vector_store.add_chunks(chunks, "/test/multi.py")
+
+        assert len(ids) == len(chunks)
+        assert vector_store.count_chunks() == len(chunks)
+
+    def test_add_empty_batch(self, vector_store):
+        """Test adding empty batch returns empty list."""
+        ids = vector_store.add_chunks([], "/test/empty.py")
+        assert ids == []
+
+    def test_delete_file_chunks(self, vector_store, embedder):
+        """Test deleting chunks by file path."""
+        # Add chunks for two files
+        chunk1 = SemanticChunk(content="def a(): pass", metadata={})
+        chunk1.embedding = embedder.embed_single(chunk1.content)
+        vector_store.add_chunk(chunk1, "/test/file1.py")
+
+        chunk2 = SemanticChunk(content="def b(): pass", metadata={})
+        chunk2.embedding = embedder.embed_single(chunk2.content)
+        vector_store.add_chunk(chunk2, "/test/file2.py")
+
+        assert vector_store.count_chunks() == 2
+
+        # Delete one file's chunks
+        deleted = vector_store.delete_file_chunks("/test/file1.py")
+
+        assert deleted == 1
+        assert vector_store.count_chunks() == 1
+
+    def test_delete_nonexistent_file(self, vector_store):
+        """Test deleting non-existent file returns 0."""
+        deleted = vector_store.delete_file_chunks("/nonexistent/file.py")
+        assert deleted == 0
+
+    def test_count_chunks_empty(self, vector_store):
+        """Test count on empty store."""
+        assert vector_store.count_chunks() == 0
+
+
+class TestVectorStoreSearch:
+    """Tests for VectorStore search functionality."""
+
+    def test_search_similar_basic(self, vector_store, embedder, sample_code_chunks):
+        """Test basic similarity search."""
+        # Add chunks
+        for data in sample_code_chunks:
+            chunk = SemanticChunk(content=data["content"], metadata=data["metadata"])
+            chunk.embedding = embedder.embed_single(chunk.content)
+            vector_store.add_chunk(chunk, "/test/file.py")
+
+        # Search
+        query = "function to authenticate user login"
+        query_embedding = embedder.embed_single(query)
+        results = vector_store.search_similar(query_embedding, top_k=3)
+
+        assert len(results) > 0
+        assert all(isinstance(r, SearchResult) for r in results)
+        # Top result should be auth-related
+        assert "authenticate" in results[0].excerpt.lower() or "auth" in results[0].path.lower()
+
+    def test_search_respects_top_k(self, vector_store, embedder, sample_code_chunks):
+        """Test search respects top_k parameter."""
+        # Add all chunks
+        for data in sample_code_chunks:
+            chunk = SemanticChunk(content=data["content"], metadata=data["metadata"])
+            chunk.embedding = embedder.embed_single(chunk.content)
+            vector_store.add_chunk(chunk, "/test/file.py")
+
+        query_embedding = embedder.embed_single("code")
+
+        results_2 = vector_store.search_similar(query_embedding, top_k=2)
+        results_5 = vector_store.search_similar(query_embedding, top_k=5)
+
+        assert len(results_2) <= 2
+        assert len(results_5) <= 5
+
+    def test_search_min_score_filtering(self, vector_store, embedder):
+        """Test min_score filtering."""
+        chunk = SemanticChunk(
+            content="def hello(): print('hello world')",
+            metadata={},
+        )
+        chunk.embedding = embedder.embed_single(chunk.content)
+        vector_store.add_chunk(chunk, "/test/hello.py")
+
+        query_embedding = embedder.embed_single("database connection pool")
+
+        results_no_filter = vector_store.search_similar(query_embedding, min_score=0.0)
+        results_high_filter = vector_store.search_similar(query_embedding, min_score=0.9)
+
+        assert len(results_no_filter) >= len(results_high_filter)
+
+    def test_search_returns_sorted_by_score(self, vector_store, embedder, sample_code_chunks):
+        """Test results are sorted by score descending."""
+        for data in sample_code_chunks:
+            chunk = SemanticChunk(content=data["content"], metadata=data["metadata"])
+            chunk.embedding = embedder.embed_single(chunk.content)
+            vector_store.add_chunk(chunk, "/test/file.py")
+
+        query_embedding = embedder.embed_single("function")
+        results = vector_store.search_similar(query_embedding, top_k=5)
+
+        if len(results) > 1:
+            for i in range(len(results) - 1):
+                assert results[i].score >= results[i + 1].score
+
+    def test_search_includes_metadata(self, vector_store, embedder):
+        """Test search results include metadata."""
+        chunk = SemanticChunk(
+            content="def test_function(): pass",
+            metadata={
+                "symbol_name": "test_function",
+                "symbol_kind": "function",
+                "start_line": 10,
+                "end_line": 15,
+            },
+        )
+        chunk.embedding = embedder.embed_single(chunk.content)
+        vector_store.add_chunk(chunk, "/test/func.py")
+
+        query_embedding = embedder.embed_single("test function")
+        results = vector_store.search_similar(query_embedding, top_k=1)
+
+        assert len(results) == 1
+        assert results[0].symbol_name == "test_function"
+        assert results[0].symbol_kind == "function"
+        assert results[0].start_line == 10
+        assert results[0].end_line == 15
+
+    def test_search_empty_store_returns_empty(self, vector_store, embedder):
+        """Test search on empty store returns empty list."""
+        query_embedding = embedder.embed_single("anything")
+        results = vector_store.search_similar(query_embedding)
+        assert results == []
+
+    def test_search_with_return_full_content_false(self, vector_store, embedder):
+        """Test search with return_full_content=False."""
+        chunk = SemanticChunk(
+            content="def long_function(): " + "pass\n" * 100,
+            metadata={},
+        )
+        chunk.embedding = embedder.embed_single(chunk.content)
+        vector_store.add_chunk(chunk, "/test/long.py")
+
+        query_embedding = embedder.embed_single("function")
+        results = vector_store.search_similar(
+            query_embedding, top_k=1, return_full_content=False
+        )
+
+        assert len(results) == 1
+        assert results[0].content is None
+        assert results[0].excerpt is not None
+
+
+class TestVectorStoreCache:
+    """Tests for VectorStore caching behavior."""
+
+    def test_cache_invalidation_on_add(self, vector_store, embedder):
+        """Test cache is invalidated when chunks are added."""
+        chunk1 = SemanticChunk(content="def a(): pass", metadata={})
+        chunk1.embedding = embedder.embed_single(chunk1.content)
+        vector_store.add_chunk(chunk1, "/test/a.py")
+
+        # Trigger cache population
+        query_embedding = embedder.embed_single("function")
+        vector_store.search_similar(query_embedding)
+
+        initial_version = vector_store._cache_version
+
+        # Add another chunk
+        chunk2 = SemanticChunk(content="def b(): pass", metadata={})
+        chunk2.embedding = embedder.embed_single(chunk2.content)
+        vector_store.add_chunk(chunk2, "/test/b.py")
+
+        assert vector_store._cache_version > initial_version
+        assert vector_store._embedding_matrix is None
+
+    def test_cache_invalidation_on_delete(self, vector_store, embedder):
+        """Test cache is invalidated when chunks are deleted."""
+        chunk = SemanticChunk(content="def a(): pass", metadata={})
+        chunk.embedding = embedder.embed_single(chunk.content)
+        vector_store.add_chunk(chunk, "/test/a.py")
+
+        # Trigger cache population
+        query_embedding = embedder.embed_single("function")
+        vector_store.search_similar(query_embedding)
+
+        initial_version = vector_store._cache_version
+
+        # Delete chunk
+        vector_store.delete_file_chunks("/test/a.py")
+
+        assert vector_store._cache_version > initial_version
+
+    def test_manual_cache_clear(self, vector_store, embedder):
+        """Test manual cache clearing."""
+        chunk = SemanticChunk(content="def a(): pass", metadata={})
+        chunk.embedding = embedder.embed_single(chunk.content)
+        vector_store.add_chunk(chunk, "/test/a.py")
+
+        # Trigger cache population
+        query_embedding = embedder.embed_single("function")
+        vector_store.search_similar(query_embedding)
+
+        assert vector_store._embedding_matrix is not None
+
+        vector_store.clear_cache()
+
+        assert vector_store._embedding_matrix is None
+
+
+# === Semantic Search Accuracy Tests ===
+
+class TestSemanticSearchAccuracy:
+    """Tests for semantic search accuracy and relevance."""
+
+    def test_auth_query_finds_auth_code(self, vector_store, embedder, sample_code_chunks):
+        """Test authentication query finds auth code."""
+        for data in sample_code_chunks:
+            chunk = SemanticChunk(content=data["content"], metadata=data["metadata"])
+            chunk.embedding = embedder.embed_single(chunk.content)
+            vector_store.add_chunk(chunk, "/test/file.py")
+
+        query = "user authentication login"
+        query_embedding = embedder.embed_single(query)
+        results = vector_store.search_similar(query_embedding, top_k=1)
+
+        assert len(results) > 0
+        assert "authenticate" in results[0].excerpt.lower()
+
+    def test_database_query_finds_db_code(self, vector_store, embedder, sample_code_chunks):
+        """Test database query finds database code."""
+        for data in sample_code_chunks:
+            chunk = SemanticChunk(content=data["content"], metadata=data["metadata"])
+            chunk.embedding = embedder.embed_single(chunk.content)
+            vector_store.add_chunk(chunk, "/test/file.py")
+
+        query = "database connection execute query"
+        query_embedding = embedder.embed_single(query)
+        results = vector_store.search_similar(query_embedding, top_k=1)
+
+        assert len(results) > 0
+        assert "database" in results[0].excerpt.lower() or "connect" in results[0].excerpt.lower()
+
+    def test_math_query_finds_calculation_code(self, vector_store, embedder, sample_code_chunks):
+        """Test math query finds calculation code."""
+        for data in sample_code_chunks:
+            chunk = SemanticChunk(content=data["content"], metadata=data["metadata"])
+            chunk.embedding = embedder.embed_single(chunk.content)
+            vector_store.add_chunk(chunk, "/test/file.py")
+
+        query = "sum numbers add calculation"
+        query_embedding = embedder.embed_single(query)
+        results = vector_store.search_similar(query_embedding, top_k=1)
+
+        assert len(results) > 0
+        assert "sum" in results[0].excerpt.lower() or "calculate" in results[0].excerpt.lower()
+
+
+# === Performance Tests ===
+
+class TestVectorSearchPerformance:
+    """Performance tests for vector search."""
+
+    def test_embedding_performance(self, embedder):
+        """Test embedding generation performance."""
+        text = "def calculate_sum(a, b): return a + b"
+
+        # Warm up
+        embedder.embed_single(text)
+
+        # Measure
+        start = time.perf_counter()
+        iterations = 10
+        for _ in range(iterations):
+            embedder.embed_single(text)
+        elapsed = time.perf_counter() - start
+
+        avg_ms = (elapsed / iterations) * 1000
+        assert avg_ms < 100, f"Single embedding should be <100ms, got {avg_ms:.2f}ms"
+
+    def test_batch_embedding_performance(self, embedder):
+        """Test batch embedding performance."""
+        texts = [f"def function_{i}(): pass" for i in range(50)]
+
+        # Warm up
+        embedder.embed(texts[:5])
+
+        # Measure
+        start = time.perf_counter()
+        embedder.embed(texts)
+        elapsed = time.perf_counter() - start
+
+        total_ms = elapsed * 1000
+        per_text_ms = total_ms / len(texts)
+        assert per_text_ms < 20, f"Per-text embedding should be <20ms, got {per_text_ms:.2f}ms"
+
+    def test_search_performance_small(self, vector_store, embedder):
+        """Test search performance with small dataset."""
+        # Add 100 chunks
+        for i in range(100):
+            chunk = SemanticChunk(
+                content=f"def function_{i}(): return {i}",
+                metadata={"index": i},
+            )
+            chunk.embedding = embedder.embed_single(chunk.content)
+            vector_store.add_chunk(chunk, f"/test/file_{i}.py")
+
+        query_embedding = embedder.embed_single("function return value")
+
+        # Warm up
+        vector_store.search_similar(query_embedding)
+
+        # Measure
+        start = time.perf_counter()
+        iterations = 10
+        for _ in range(iterations):
+            vector_store.search_similar(query_embedding)
+        elapsed = time.perf_counter() - start
+
+        avg_ms = (elapsed / iterations) * 1000
+        assert avg_ms < 50, f"Search with 100 chunks should be <50ms, got {avg_ms:.2f}ms"
+
+    def test_search_performance_medium(self, vector_store, embedder):
+        """Test search performance with medium dataset."""
+        # Add 500 chunks in batch
+        chunks = []
+        for i in range(500):
+            chunk = SemanticChunk(
+                content=f"def function_{i}(x): return x * {i}",
+                metadata={"index": i},
+            )
+            chunk.embedding = embedder.embed_single(chunk.content)
+            chunks.append(chunk)
+
+        vector_store.add_chunks(chunks, "/test/bulk.py")
+
+        query_embedding = embedder.embed_single("multiply value")
+
+        # Warm up
+        vector_store.search_similar(query_embedding)
+
+        # Measure
+        start = time.perf_counter()
+        iterations = 5
+        for _ in range(iterations):
+            vector_store.search_similar(query_embedding)
+        elapsed = time.perf_counter() - start
+
+        avg_ms = (elapsed / iterations) * 1000
+        assert avg_ms < 100, f"Search with 500 chunks should be <100ms, got {avg_ms:.2f}ms"
+
+
+# === Thread Safety Tests ===
+
+class TestThreadSafety:
+    """Tests for thread safety."""
+
+    def test_concurrent_searches(self, vector_store, embedder, sample_code_chunks):
+        """Test concurrent searches are thread-safe."""
+        # Populate store
+        for data in sample_code_chunks:
+            chunk = SemanticChunk(content=data["content"], metadata=data["metadata"])
+            chunk.embedding = embedder.embed_single(chunk.content)
+            vector_store.add_chunk(chunk, "/test/file.py")
+
+        results_list = []
+        errors = []
+
+        def search_task(query):
+            try:
+                query_embedding = embedder.embed_single(query)
+                results = vector_store.search_similar(query_embedding, top_k=3)
+                results_list.append(len(results))
+            except Exception as e:
+                errors.append(str(e))
+
+        queries = ["authentication", "database", "function", "class", "async"]
+        threads = [threading.Thread(target=search_task, args=(q,)) for q in queries]
+
+        for t in threads:
+            t.start()
+        for t in threads:
+            t.join()
+
+        assert len(errors) == 0, f"Errors during concurrent search: {errors}"
+        assert len(results_list) == len(queries)
+
+    def test_concurrent_add_and_search(self, vector_store, embedder):
+        """Test concurrent add and search operations."""
+        errors = []
+
+        def add_task(idx):
+            try:
+                chunk = SemanticChunk(
+                    content=f"def task_{idx}(): pass",
+                    metadata={"idx": idx},
+                )
+                chunk.embedding = embedder.embed_single(chunk.content)
+                vector_store.add_chunk(chunk, f"/test/task_{idx}.py")
+            except Exception as e:
+                errors.append(f"Add error: {e}")
+
+        def search_task():
+            try:
+                query_embedding = embedder.embed_single("function task")
+                vector_store.search_similar(query_embedding)
+            except Exception as e:
+                errors.append(f"Search error: {e}")
+
+        threads = []
+        for i in range(10):
+            threads.append(threading.Thread(target=add_task, args=(i,)))
+            threads.append(threading.Thread(target=search_task))
+
+        for t in threads:
+            t.start()
+        for t in threads:
+            t.join()
+
+        assert len(errors) == 0, f"Errors during concurrent ops: {errors}"
+
+
+# === Edge Cases ===
+
+class TestEdgeCases:
+    """Tests for edge cases."""
+
+    def test_very_short_content(self, vector_store, embedder):
+        """Test handling very short content."""
+        chunk = SemanticChunk(content="x", metadata={})
+        chunk.embedding = embedder.embed_single(chunk.content)
+        vector_store.add_chunk(chunk, "/test/short.py")
+
+        query_embedding = embedder.embed_single("x")
+        results = vector_store.search_similar(query_embedding)
+
+        assert len(results) == 1
+
+    def test_special_characters_in_path(self, vector_store, embedder):
+        """Test handling special characters in file path."""
+        chunk = SemanticChunk(content="def test(): pass", metadata={})
+        chunk.embedding = embedder.embed_single(chunk.content)
+
+        special_path = "/test/path with spaces/file-name_v2.py"
+        vector_store.add_chunk(chunk, special_path)
+
+        query_embedding = embedder.embed_single("test function")
+        results = vector_store.search_similar(query_embedding)
+
+        assert len(results) == 1
+        assert results[0].path == special_path
+
+    def test_json_metadata_special_chars(self, vector_store, embedder):
+        """Test metadata with special JSON characters."""
+        metadata = {
+            "description": 'Test "quoted" text with \'single\' quotes',
+            "path": "C:\\Users\\test\\file.py",
+            "tags": ["tag1", "tag2"],
+        }
+        chunk = SemanticChunk(content="def test(): pass", metadata=metadata)
+        chunk.embedding = embedder.embed_single(chunk.content)
+
+        vector_store.add_chunk(chunk, "/test/special.py")
+
+        query_embedding = embedder.embed_single("test")
+        results = vector_store.search_similar(query_embedding)
+
+        assert len(results) == 1
+        assert results[0].metadata["description"] == metadata["description"]
+
+    def test_search_zero_top_k(self, vector_store, embedder):
+        """Test search with top_k=0."""
+        chunk = SemanticChunk(content="def test(): pass", metadata={})
+        chunk.embedding = embedder.embed_single(chunk.content)
+        vector_store.add_chunk(chunk, "/test/file.py")
+
+        query_embedding = embedder.embed_single("test")
+        results = vector_store.search_similar(query_embedding, top_k=0)
+
+        assert results == []
+
+    def test_search_very_high_min_score(self, vector_store, embedder):
+        """Test search with very high min_score filters all results."""
+        chunk = SemanticChunk(content="def hello(): print('world')", metadata={})
+        chunk.embedding = embedder.embed_single(chunk.content)
+        vector_store.add_chunk(chunk, "/test/hello.py")
+
+        # Query something unrelated with very high threshold
+        query_embedding = embedder.embed_single("database connection")
+        results = vector_store.search_similar(query_embedding, min_score=0.99)
+
+        # Should filter out since unrelated
+        assert len(results) == 0
+
+
+# === Availability Check Tests ===
+
+class TestAvailabilityCheck:
+    """Tests for semantic availability checking."""
+
+    def test_check_semantic_available(self):
+        """Test check_semantic_available function."""
+        available, error = check_semantic_available()
+        assert available is True
+        assert error is None
+
+    def test_semantic_available_flag(self):
+        """Test SEMANTIC_AVAILABLE flag is True when deps installed."""
+        assert SEMANTIC_AVAILABLE is True