mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-05 01:50:27 +08:00
perf(codex-lens): optimize search performance with vectorized operations
Performance Optimizations: - VectorStore: NumPy vectorized cosine similarity (100x+ faster) - Cached embedding matrix with pre-computed norms - Lazy content loading for top-k results only - Thread-safe cache invalidation - SQLite: Added PRAGMA mmap_size=30GB for memory-mapped I/O - FTS5: unicode61 tokenizer with tokenchars='_' for code identifiers - ChainSearch: files_only fast path skipping snippet generation - ThreadPoolExecutor: shared pool across searches New Components: - DirIndexStore: single-directory index with FTS5 and symbols - RegistryStore: global project registry with path mappings - PathMapper: source-to-index path conversion utility - IndexTreeBuilder: hierarchical index tree construction - ChainSearchEngine: parallel recursive directory search Test Coverage: - 36 comprehensive search functionality tests - 14 performance benchmark tests - 296 total tests passing (100% pass rate) Benchmark Results: - FTS5 search: 0.23-0.26ms avg (3900-4300 ops/sec) - Vector search: 1.05-1.54ms avg (650-955 ops/sec) - Full semantic: 4.56-6.38ms avg per query 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
603
codex-lens/tests/test_search_comprehensive.py
Normal file
603
codex-lens/tests/test_search_comprehensive.py
Normal file
@@ -0,0 +1,603 @@
|
||||
"""Comprehensive tests for CodexLens search functionality.
|
||||
|
||||
Tests cover:
|
||||
- FTS5 text search (basic, phrase, boolean, wildcard)
|
||||
- Chain search across directories
|
||||
- Symbol search (by name, kind, filters)
|
||||
- Files-only search mode
|
||||
- Edge cases and error handling
|
||||
"""
|
||||
|
||||
import tempfile
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from codexlens.storage.sqlite_store import SQLiteStore
|
||||
from codexlens.storage.dir_index import DirIndexStore
|
||||
from codexlens.storage.registry import RegistryStore
|
||||
from codexlens.storage.path_mapper import PathMapper
|
||||
from codexlens.search import (
|
||||
ChainSearchEngine,
|
||||
SearchOptions,
|
||||
SearchStats,
|
||||
ChainSearchResult,
|
||||
quick_search,
|
||||
)
|
||||
from codexlens.entities import IndexedFile, Symbol, SearchResult
|
||||
|
||||
|
||||
# === Fixtures ===
|
||||
|
||||
@pytest.fixture
|
||||
def temp_dir():
|
||||
"""Create a temporary directory."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
yield Path(tmpdir)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_files():
|
||||
"""Sample file data for testing."""
|
||||
return [
|
||||
(IndexedFile(
|
||||
path="/project/src/auth.py",
|
||||
language="python",
|
||||
symbols=[
|
||||
Symbol(name="authenticate", kind="function", range=(1, 10)),
|
||||
Symbol(name="verify_token", kind="function", range=(12, 20)),
|
||||
Symbol(name="AuthManager", kind="class", range=(22, 50)),
|
||||
],
|
||||
), """
|
||||
def authenticate(username, password):
|
||||
'''Authenticate user with credentials.'''
|
||||
user = find_user(username)
|
||||
if user and check_password(user, password):
|
||||
return create_token(user)
|
||||
return None
|
||||
|
||||
def verify_token(token):
|
||||
'''Verify JWT token validity.'''
|
||||
try:
|
||||
payload = decode_token(token)
|
||||
return payload
|
||||
except TokenExpired:
|
||||
return None
|
||||
|
||||
class AuthManager:
|
||||
'''Manages authentication state.'''
|
||||
def __init__(self):
|
||||
self.sessions = {}
|
||||
|
||||
def login(self, user):
|
||||
token = authenticate(user.name, user.password)
|
||||
self.sessions[user.id] = token
|
||||
return token
|
||||
"""),
|
||||
(IndexedFile(
|
||||
path="/project/src/database.py",
|
||||
language="python",
|
||||
symbols=[
|
||||
Symbol(name="connect", kind="function", range=(1, 5)),
|
||||
Symbol(name="query", kind="function", range=(7, 15)),
|
||||
Symbol(name="DatabasePool", kind="class", range=(17, 40)),
|
||||
],
|
||||
), """
|
||||
def connect(host, port, database):
|
||||
'''Establish database connection.'''
|
||||
return Connection(host, port, database)
|
||||
|
||||
def query(connection, sql, params=None):
|
||||
'''Execute SQL query and return results.'''
|
||||
cursor = connection.cursor()
|
||||
cursor.execute(sql, params or [])
|
||||
return cursor.fetchall()
|
||||
|
||||
class DatabasePool:
|
||||
'''Connection pool for database.'''
|
||||
def __init__(self, size=10):
|
||||
self.pool = []
|
||||
self.size = size
|
||||
|
||||
def get_connection(self):
|
||||
if self.pool:
|
||||
return self.pool.pop()
|
||||
return connect()
|
||||
"""),
|
||||
(IndexedFile(
|
||||
path="/project/src/utils.py",
|
||||
language="python",
|
||||
symbols=[
|
||||
Symbol(name="format_date", kind="function", range=(1, 3)),
|
||||
Symbol(name="parse_json", kind="function", range=(5, 10)),
|
||||
Symbol(name="hash_password", kind="function", range=(12, 18)),
|
||||
],
|
||||
), """
|
||||
def format_date(date, fmt='%Y-%m-%d'):
|
||||
return date.strftime(fmt)
|
||||
|
||||
def parse_json(data):
|
||||
'''Parse JSON string to dictionary.'''
|
||||
import json
|
||||
return json.loads(data)
|
||||
|
||||
def hash_password(password, salt=None):
|
||||
'''Hash password using bcrypt.'''
|
||||
import hashlib
|
||||
salt = salt or generate_salt()
|
||||
return hashlib.sha256((password + salt).encode()).hexdigest()
|
||||
"""),
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def populated_store(temp_dir, sample_files):
|
||||
"""Create a populated SQLite store for testing."""
|
||||
db_path = temp_dir / "_index.db"
|
||||
store = SQLiteStore(db_path)
|
||||
store.initialize()
|
||||
|
||||
for indexed_file, content in sample_files:
|
||||
store.add_file(indexed_file, content)
|
||||
|
||||
yield store
|
||||
store.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def populated_dir_store(temp_dir, sample_files):
|
||||
"""Create a populated DirIndexStore for testing."""
|
||||
db_path = temp_dir / "_index.db"
|
||||
store = DirIndexStore(db_path)
|
||||
|
||||
for indexed_file, content in sample_files:
|
||||
store.add_file(indexed_file, content)
|
||||
|
||||
yield store
|
||||
store.close()
|
||||
|
||||
|
||||
# === FTS5 Search Tests ===
|
||||
|
||||
class TestFTS5BasicSearch:
|
||||
"""Tests for basic FTS5 text search."""
|
||||
|
||||
def test_single_term_search(self, populated_store):
|
||||
"""Test search with a single term."""
|
||||
results = populated_store.search_fts("authenticate")
|
||||
assert len(results) >= 1
|
||||
assert any("auth" in r.path.lower() for r in results)
|
||||
|
||||
def test_case_insensitive_search(self, populated_store):
|
||||
"""Test that search is case insensitive."""
|
||||
results_lower = populated_store.search_fts("database")
|
||||
results_upper = populated_store.search_fts("DATABASE")
|
||||
results_mixed = populated_store.search_fts("DataBase")
|
||||
|
||||
# All should return similar results
|
||||
assert len(results_lower) == len(results_upper) == len(results_mixed)
|
||||
|
||||
def test_partial_word_search(self, populated_store):
|
||||
"""Test search with partial words using wildcards."""
|
||||
results = populated_store.search_fts("auth*")
|
||||
assert len(results) >= 1
|
||||
# Should match authenticate, authentication, AuthManager, etc.
|
||||
|
||||
def test_multiple_terms_search(self, populated_store):
|
||||
"""Test search with multiple terms (implicit AND)."""
|
||||
results = populated_store.search_fts("user password")
|
||||
assert len(results) >= 1
|
||||
|
||||
def test_no_results_search(self, populated_store):
|
||||
"""Test search that returns no results."""
|
||||
results = populated_store.search_fts("nonexistent_xyz_term")
|
||||
assert len(results) == 0
|
||||
|
||||
def test_search_with_limit(self, populated_store):
|
||||
"""Test search respects limit parameter."""
|
||||
results = populated_store.search_fts("def", limit=1)
|
||||
assert len(results) <= 1
|
||||
|
||||
def test_search_returns_excerpt(self, populated_store):
|
||||
"""Test search results include excerpts."""
|
||||
results = populated_store.search_fts("authenticate")
|
||||
assert len(results) >= 1
|
||||
# SearchResult should have excerpt field
|
||||
for r in results:
|
||||
assert hasattr(r, 'excerpt')
|
||||
|
||||
|
||||
class TestFTS5AdvancedSearch:
|
||||
"""Tests for advanced FTS5 search features."""
|
||||
|
||||
def test_phrase_search(self, populated_store):
|
||||
"""Test exact phrase search with quotes."""
|
||||
results = populated_store.search_fts('"verify_token"')
|
||||
assert len(results) >= 1
|
||||
|
||||
def test_boolean_or_search(self, populated_store):
|
||||
"""Test OR boolean search."""
|
||||
results = populated_store.search_fts("authenticate OR database")
|
||||
# Should find files containing either term
|
||||
assert len(results) >= 2
|
||||
|
||||
def test_boolean_not_search(self, populated_store):
|
||||
"""Test NOT boolean search."""
|
||||
all_results = populated_store.search_fts("def")
|
||||
not_results = populated_store.search_fts("def NOT authenticate")
|
||||
# NOT should return fewer results
|
||||
assert len(not_results) <= len(all_results)
|
||||
|
||||
def test_prefix_search(self, populated_store):
|
||||
"""Test prefix search with asterisk."""
|
||||
results = populated_store.search_fts("connect*")
|
||||
assert len(results) >= 1
|
||||
# Should match connect, connection, etc.
|
||||
|
||||
def test_special_characters_in_query(self, populated_store):
|
||||
"""Test search handles special characters gracefully."""
|
||||
# Should not raise an error
|
||||
results = populated_store.search_fts("__init__")
|
||||
# May or may not have results, but shouldn't crash
|
||||
|
||||
def test_unicode_search(self, temp_dir):
|
||||
"""Test search with unicode content."""
|
||||
store = SQLiteStore(temp_dir / "_index.db")
|
||||
store.initialize()
|
||||
|
||||
indexed_file = IndexedFile(
|
||||
path="/test/unicode.py",
|
||||
language="python",
|
||||
symbols=[Symbol(name="世界", kind="function", range=(1, 1))],
|
||||
)
|
||||
store.add_file(indexed_file, "def 世界(): return '你好世界'")
|
||||
|
||||
results = store.search_fts("世界")
|
||||
assert len(results) == 1
|
||||
|
||||
store.close()
|
||||
|
||||
|
||||
class TestFTS5Pagination:
|
||||
"""Tests for FTS5 search pagination."""
|
||||
|
||||
def test_offset_pagination(self, temp_dir):
|
||||
"""Test search with offset for pagination."""
|
||||
store = SQLiteStore(temp_dir / "_index.db")
|
||||
store.initialize()
|
||||
|
||||
# Add multiple files
|
||||
for i in range(10):
|
||||
indexed_file = IndexedFile(
|
||||
path=f"/test/file{i}.py",
|
||||
language="python",
|
||||
symbols=[],
|
||||
)
|
||||
store.add_file(indexed_file, f"searchable content number {i}")
|
||||
|
||||
page1 = store.search_fts("searchable", limit=3, offset=0)
|
||||
page2 = store.search_fts("searchable", limit=3, offset=3)
|
||||
page3 = store.search_fts("searchable", limit=3, offset=6)
|
||||
|
||||
# Each page should have different results
|
||||
paths1 = {r.path for r in page1}
|
||||
paths2 = {r.path for r in page2}
|
||||
paths3 = {r.path for r in page3}
|
||||
|
||||
assert paths1.isdisjoint(paths2)
|
||||
assert paths2.isdisjoint(paths3)
|
||||
|
||||
store.close()
|
||||
|
||||
def test_offset_beyond_results(self, populated_store):
|
||||
"""Test offset beyond available results."""
|
||||
results = populated_store.search_fts("authenticate", limit=10, offset=1000)
|
||||
assert len(results) == 0
|
||||
|
||||
|
||||
# === Symbol Search Tests ===
|
||||
|
||||
class TestSymbolSearch:
|
||||
"""Tests for symbol search functionality."""
|
||||
|
||||
def test_search_by_name(self, populated_store):
|
||||
"""Test symbol search by name."""
|
||||
results = populated_store.search_symbols("auth")
|
||||
assert len(results) >= 1
|
||||
assert any("auth" in s.name.lower() for s in results)
|
||||
|
||||
def test_search_by_kind_function(self, populated_store):
|
||||
"""Test symbol search filtered by kind=function."""
|
||||
results = populated_store.search_symbols("", kind="function")
|
||||
assert all(s.kind == "function" for s in results)
|
||||
|
||||
def test_search_by_kind_class(self, populated_store):
|
||||
"""Test symbol search filtered by kind=class."""
|
||||
results = populated_store.search_symbols("", kind="class")
|
||||
assert all(s.kind == "class" for s in results)
|
||||
assert any("Manager" in s.name or "Pool" in s.name for s in results)
|
||||
|
||||
def test_search_symbols_with_limit(self, populated_store):
|
||||
"""Test symbol search respects limit."""
|
||||
results = populated_store.search_symbols("", limit=2)
|
||||
assert len(results) <= 2
|
||||
|
||||
def test_search_symbols_returns_range(self, populated_store):
|
||||
"""Test symbol search results include line range."""
|
||||
results = populated_store.search_symbols("authenticate")
|
||||
assert len(results) >= 1
|
||||
for sym in results:
|
||||
assert hasattr(sym, 'range')
|
||||
assert len(sym.range) == 2
|
||||
assert sym.range[0] <= sym.range[1]
|
||||
|
||||
|
||||
# === Chain Search Tests ===
|
||||
|
||||
class TestChainSearchEngine:
|
||||
"""Tests for ChainSearchEngine."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_registry(self):
|
||||
"""Create a mock registry."""
|
||||
registry = MagicMock(spec=RegistryStore)
|
||||
registry.find_nearest_index.return_value = None
|
||||
return registry
|
||||
|
||||
@pytest.fixture
|
||||
def mock_mapper(self):
|
||||
"""Create a mock path mapper."""
|
||||
return MagicMock(spec=PathMapper)
|
||||
|
||||
def test_search_no_index_found(self, mock_registry, mock_mapper):
|
||||
"""Test search when no index is found."""
|
||||
mock_mapper.source_to_index_db.return_value = Path("/nonexistent/_index.db")
|
||||
|
||||
engine = ChainSearchEngine(mock_registry, mock_mapper)
|
||||
result = engine.search("test", Path("/nonexistent"))
|
||||
|
||||
assert result.results == []
|
||||
assert result.symbols == []
|
||||
assert result.stats.dirs_searched == 0
|
||||
|
||||
def test_search_options_depth(self, mock_registry, mock_mapper, temp_dir):
|
||||
"""Test search respects depth option."""
|
||||
# Create a simple index structure
|
||||
db_path = temp_dir / "_index.db"
|
||||
store = DirIndexStore(db_path)
|
||||
store.initialize()
|
||||
store.add_file(
|
||||
name="test.py",
|
||||
full_path=str(temp_dir / "test.py"),
|
||||
content="test content searchable",
|
||||
language="python",
|
||||
)
|
||||
store.close()
|
||||
|
||||
mock_mapper.source_to_index_db.return_value = db_path
|
||||
|
||||
engine = ChainSearchEngine(mock_registry, mock_mapper)
|
||||
options = SearchOptions(depth=0) # Only current dir
|
||||
|
||||
result = engine.search("test", temp_dir, options)
|
||||
|
||||
# With depth=0, should only search current directory
|
||||
assert result.stats.dirs_searched <= 1
|
||||
|
||||
def test_search_files_only(self, mock_registry, mock_mapper, temp_dir):
|
||||
"""Test search_files_only returns only paths."""
|
||||
db_path = temp_dir / "_index.db"
|
||||
store = DirIndexStore(db_path)
|
||||
store.initialize()
|
||||
store.add_file(
|
||||
name="test.py",
|
||||
full_path=str(temp_dir / "test.py"),
|
||||
content="searchable content here",
|
||||
language="python",
|
||||
)
|
||||
store.close()
|
||||
|
||||
mock_mapper.source_to_index_db.return_value = db_path
|
||||
|
||||
engine = ChainSearchEngine(mock_registry, mock_mapper)
|
||||
paths = engine.search_files_only("searchable", temp_dir)
|
||||
|
||||
assert isinstance(paths, list)
|
||||
for p in paths:
|
||||
assert isinstance(p, str)
|
||||
|
||||
def test_search_symbols_engine(self, mock_registry, mock_mapper, temp_dir):
|
||||
"""Test symbol search through engine."""
|
||||
db_path = temp_dir / "_index.db"
|
||||
store = DirIndexStore(db_path)
|
||||
store.initialize()
|
||||
store.add_file(
|
||||
name="test.py",
|
||||
full_path=str(temp_dir / "test.py"),
|
||||
content="def my_function(): pass",
|
||||
language="python",
|
||||
symbols=[Symbol(name="my_function", kind="function", range=(1, 5))],
|
||||
)
|
||||
store.close()
|
||||
|
||||
mock_mapper.source_to_index_db.return_value = db_path
|
||||
|
||||
engine = ChainSearchEngine(mock_registry, mock_mapper)
|
||||
symbols = engine.search_symbols("my_func", temp_dir)
|
||||
|
||||
assert len(symbols) >= 1
|
||||
assert symbols[0].name == "my_function"
|
||||
|
||||
def test_search_result_stats(self, mock_registry, mock_mapper, temp_dir):
|
||||
"""Test search result includes proper stats."""
|
||||
db_path = temp_dir / "_index.db"
|
||||
store = DirIndexStore(db_path)
|
||||
store.initialize()
|
||||
store.add_file(
|
||||
name="test.py",
|
||||
full_path=str(temp_dir / "test.py"),
|
||||
content="content to search",
|
||||
language="python",
|
||||
)
|
||||
store.close()
|
||||
|
||||
mock_mapper.source_to_index_db.return_value = db_path
|
||||
|
||||
engine = ChainSearchEngine(mock_registry, mock_mapper)
|
||||
result = engine.search("content", temp_dir)
|
||||
|
||||
assert result.stats.time_ms >= 0
|
||||
assert result.stats.dirs_searched >= 0
|
||||
assert isinstance(result.stats.errors, list)
|
||||
|
||||
|
||||
class TestSearchOptions:
|
||||
"""Tests for SearchOptions configuration."""
|
||||
|
||||
def test_default_options(self):
|
||||
"""Test default search options."""
|
||||
options = SearchOptions()
|
||||
assert options.depth == -1
|
||||
assert options.max_workers == 8
|
||||
assert options.limit_per_dir == 10
|
||||
assert options.total_limit == 100
|
||||
assert options.include_symbols is False
|
||||
assert options.files_only is False
|
||||
|
||||
def test_custom_options(self):
|
||||
"""Test custom search options."""
|
||||
options = SearchOptions(
|
||||
depth=3,
|
||||
max_workers=4,
|
||||
limit_per_dir=5,
|
||||
total_limit=50,
|
||||
include_symbols=True,
|
||||
files_only=True,
|
||||
)
|
||||
assert options.depth == 3
|
||||
assert options.max_workers == 4
|
||||
assert options.limit_per_dir == 5
|
||||
assert options.total_limit == 50
|
||||
assert options.include_symbols is True
|
||||
assert options.files_only is True
|
||||
|
||||
|
||||
# === Edge Cases and Error Handling ===
|
||||
|
||||
class TestSearchEdgeCases:
|
||||
"""Edge case tests for search functionality."""
|
||||
|
||||
def test_empty_query(self, populated_store):
|
||||
"""Test search with empty query."""
|
||||
# Empty query may raise an error or return empty results
|
||||
try:
|
||||
results = populated_store.search_fts("")
|
||||
assert isinstance(results, list)
|
||||
except Exception:
|
||||
# Some implementations may reject empty queries
|
||||
pass
|
||||
|
||||
def test_whitespace_query(self, populated_store):
|
||||
"""Test search with whitespace-only query."""
|
||||
# Whitespace query may raise an error or return empty results
|
||||
try:
|
||||
results = populated_store.search_fts(" ")
|
||||
assert isinstance(results, list)
|
||||
except Exception:
|
||||
# Some implementations may reject whitespace queries
|
||||
pass
|
||||
|
||||
def test_very_long_query(self, populated_store):
|
||||
"""Test search with very long query."""
|
||||
long_query = "function " * 100 # Repeat valid word
|
||||
try:
|
||||
results = populated_store.search_fts(long_query)
|
||||
assert isinstance(results, list)
|
||||
except Exception:
|
||||
# Very long queries may be rejected
|
||||
pass
|
||||
|
||||
def test_special_sql_characters(self, populated_store):
|
||||
"""Test search handles SQL-like characters safely."""
|
||||
# These should not cause SQL injection - may raise FTS syntax errors
|
||||
queries = ["test", "function*", "test OR data"]
|
||||
for q in queries:
|
||||
results = populated_store.search_fts(q)
|
||||
assert isinstance(results, list)
|
||||
|
||||
def test_search_reopened_store(self, temp_dir, sample_files):
|
||||
"""Test search works after store is reopened."""
|
||||
db_path = temp_dir / "_index.db"
|
||||
store = SQLiteStore(db_path)
|
||||
store.initialize()
|
||||
store.add_file(sample_files[0][0], sample_files[0][1])
|
||||
store.close()
|
||||
|
||||
# Reopen and search
|
||||
store2 = SQLiteStore(db_path)
|
||||
store2.initialize()
|
||||
results = store2.search_fts("authenticate")
|
||||
assert len(results) >= 1
|
||||
store2.close()
|
||||
|
||||
def test_concurrent_searches(self, populated_store):
|
||||
"""Test multiple concurrent searches."""
|
||||
import threading
|
||||
|
||||
results = []
|
||||
errors = []
|
||||
|
||||
def search_task(query):
|
||||
try:
|
||||
r = populated_store.search_fts(query)
|
||||
results.append(len(r))
|
||||
except Exception as e:
|
||||
errors.append(e)
|
||||
|
||||
threads = [
|
||||
threading.Thread(target=search_task, args=("authenticate",)),
|
||||
threading.Thread(target=search_task, args=("database",)),
|
||||
threading.Thread(target=search_task, args=("password",)),
|
||||
]
|
||||
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
assert len(errors) == 0
|
||||
assert len(results) == 3
|
||||
|
||||
|
||||
class TestChainSearchResult:
|
||||
"""Tests for ChainSearchResult dataclass."""
|
||||
|
||||
def test_result_structure(self):
|
||||
"""Test ChainSearchResult has all required fields."""
|
||||
result = ChainSearchResult(
|
||||
query="test",
|
||||
results=[],
|
||||
symbols=[],
|
||||
stats=SearchStats(),
|
||||
)
|
||||
assert result.query == "test"
|
||||
assert result.results == []
|
||||
assert result.symbols == []
|
||||
assert result.stats.dirs_searched == 0
|
||||
|
||||
|
||||
class TestSearchStats:
|
||||
"""Tests for SearchStats dataclass."""
|
||||
|
||||
def test_default_stats(self):
|
||||
"""Test default search stats."""
|
||||
stats = SearchStats()
|
||||
assert stats.dirs_searched == 0
|
||||
assert stats.files_matched == 0
|
||||
assert stats.time_ms == 0
|
||||
assert stats.errors == []
|
||||
|
||||
def test_stats_with_errors(self):
|
||||
"""Test search stats with errors."""
|
||||
stats = SearchStats(errors=["Error 1", "Error 2"])
|
||||
assert len(stats.errors) == 2
|
||||
660
codex-lens/tests/test_search_performance.py
Normal file
660
codex-lens/tests/test_search_performance.py
Normal file
@@ -0,0 +1,660 @@
|
||||
"""Performance benchmarks for CodexLens search functionality.
|
||||
|
||||
Measures:
|
||||
- FTS5 search speed at various scales
|
||||
- Chain search traversal performance
|
||||
- Semantic search latency
|
||||
- Memory usage during search operations
|
||||
"""
|
||||
|
||||
import gc
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import List, Tuple
|
||||
from dataclasses import dataclass
|
||||
from contextlib import contextmanager
|
||||
|
||||
import pytest
|
||||
|
||||
from codexlens.storage.sqlite_store import SQLiteStore
|
||||
from codexlens.storage.dir_index import DirIndexStore
|
||||
from codexlens.storage.registry import RegistryStore
|
||||
from codexlens.storage.path_mapper import PathMapper
|
||||
from codexlens.search import ChainSearchEngine, SearchOptions
|
||||
from codexlens.entities import IndexedFile, Symbol
|
||||
|
||||
|
||||
@dataclass
|
||||
class BenchmarkResult:
|
||||
"""Benchmark result container."""
|
||||
name: str
|
||||
iterations: int
|
||||
total_time_ms: float
|
||||
avg_time_ms: float
|
||||
min_time_ms: float
|
||||
max_time_ms: float
|
||||
ops_per_sec: float
|
||||
|
||||
def __str__(self):
|
||||
return (
|
||||
f"{self.name}:\n"
|
||||
f" Iterations: {self.iterations}\n"
|
||||
f" Total: {self.total_time_ms:.2f}ms\n"
|
||||
f" Avg: {self.avg_time_ms:.2f}ms\n"
|
||||
f" Min: {self.min_time_ms:.2f}ms\n"
|
||||
f" Max: {self.max_time_ms:.2f}ms\n"
|
||||
f" Ops/sec: {self.ops_per_sec:.1f}"
|
||||
)
|
||||
|
||||
|
||||
def benchmark(func, iterations=10, warmup=2):
|
||||
"""Run benchmark with warmup iterations."""
|
||||
# Warmup
|
||||
for _ in range(warmup):
|
||||
func()
|
||||
|
||||
# Measure
|
||||
times = []
|
||||
for _ in range(iterations):
|
||||
gc.collect()
|
||||
start = time.perf_counter()
|
||||
func()
|
||||
elapsed = (time.perf_counter() - start) * 1000
|
||||
times.append(elapsed)
|
||||
|
||||
total = sum(times)
|
||||
return BenchmarkResult(
|
||||
name=func.__name__ if hasattr(func, '__name__') else 'benchmark',
|
||||
iterations=iterations,
|
||||
total_time_ms=total,
|
||||
avg_time_ms=total / iterations,
|
||||
min_time_ms=min(times),
|
||||
max_time_ms=max(times),
|
||||
ops_per_sec=1000 / (total / iterations) if total > 0 else 0
|
||||
)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def timer(name: str):
|
||||
"""Context manager for timing code blocks."""
|
||||
start = time.perf_counter()
|
||||
yield
|
||||
elapsed = (time.perf_counter() - start) * 1000
|
||||
print(f" {name}: {elapsed:.2f}ms")
|
||||
|
||||
|
||||
# === Test Fixtures ===
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def temp_dir():
|
||||
"""Create a temporary directory for all tests."""
|
||||
tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True)
|
||||
yield Path(tmpdir.name)
|
||||
# Explicit cleanup with error handling for Windows file locking
|
||||
try:
|
||||
tmpdir.cleanup()
|
||||
except (PermissionError, OSError):
|
||||
pass # Ignore Windows file locking errors
|
||||
|
||||
|
||||
def generate_code_file(index: int, lines: int = 100) -> Tuple[IndexedFile, str]:
|
||||
"""Generate a synthetic code file for testing."""
|
||||
symbols = [
|
||||
Symbol(name=f"function_{index}_{i}", kind="function", range=(i*10+1, i*10+9))
|
||||
for i in range(lines // 10)
|
||||
]
|
||||
|
||||
content_lines = []
|
||||
for i in range(lines):
|
||||
if i % 10 == 0:
|
||||
content_lines.append(f"def function_{index}_{i//10}(param_{i}, data_{i}):")
|
||||
else:
|
||||
content_lines.append(f" # Line {i}: processing data with param_{i % 5}")
|
||||
content_lines.append(f" result_{i} = compute(data_{i})")
|
||||
|
||||
return (
|
||||
IndexedFile(
|
||||
path=f"/project/src/module_{index}/file_{index}.py",
|
||||
language="python",
|
||||
symbols=symbols,
|
||||
),
|
||||
"\n".join(content_lines)
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def small_store(temp_dir):
|
||||
"""Small store with 10 files (~100 lines each)."""
|
||||
db_path = temp_dir / "small_index.db"
|
||||
store = SQLiteStore(db_path)
|
||||
store.initialize()
|
||||
|
||||
for i in range(10):
|
||||
indexed_file, content = generate_code_file(i, lines=100)
|
||||
store.add_file(indexed_file, content)
|
||||
|
||||
yield store
|
||||
store.close()
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def medium_store(temp_dir):
|
||||
"""Medium store with 100 files (~100 lines each)."""
|
||||
db_path = temp_dir / "medium_index.db"
|
||||
store = SQLiteStore(db_path)
|
||||
store.initialize()
|
||||
|
||||
for i in range(100):
|
||||
indexed_file, content = generate_code_file(i, lines=100)
|
||||
store.add_file(indexed_file, content)
|
||||
|
||||
yield store
|
||||
store.close()
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def large_store(temp_dir):
|
||||
"""Large store with 500 files (~200 lines each)."""
|
||||
db_path = temp_dir / "large_index.db"
|
||||
store = SQLiteStore(db_path)
|
||||
store.initialize()
|
||||
|
||||
for i in range(500):
|
||||
indexed_file, content = generate_code_file(i, lines=200)
|
||||
store.add_file(indexed_file, content)
|
||||
|
||||
yield store
|
||||
store.close()
|
||||
|
||||
|
||||
# === FTS5 Performance Tests ===
|
||||
|
||||
class TestFTS5Performance:
|
||||
"""FTS5 search performance benchmarks."""
|
||||
|
||||
def test_small_store_search(self, small_store):
|
||||
"""Benchmark FTS5 search on small store (10 files)."""
|
||||
print("\n" + "="*60)
|
||||
print("FTS5 SEARCH - SMALL STORE (10 files)")
|
||||
print("="*60)
|
||||
|
||||
queries = ["function", "data", "compute", "result", "param"]
|
||||
|
||||
for query in queries:
|
||||
result = benchmark(
|
||||
lambda q=query: small_store.search_fts(q, limit=20),
|
||||
iterations=50
|
||||
)
|
||||
result.name = f"search '{query}'"
|
||||
print(f"\n{result}")
|
||||
|
||||
def test_medium_store_search(self, medium_store):
|
||||
"""Benchmark FTS5 search on medium store (100 files)."""
|
||||
print("\n" + "="*60)
|
||||
print("FTS5 SEARCH - MEDIUM STORE (100 files)")
|
||||
print("="*60)
|
||||
|
||||
queries = ["function", "data", "compute", "result", "param"]
|
||||
|
||||
for query in queries:
|
||||
result = benchmark(
|
||||
lambda q=query: medium_store.search_fts(q, limit=20),
|
||||
iterations=30
|
||||
)
|
||||
result.name = f"search '{query}'"
|
||||
print(f"\n{result}")
|
||||
|
||||
def test_large_store_search(self, large_store):
|
||||
"""Benchmark FTS5 search on large store (500 files)."""
|
||||
print("\n" + "="*60)
|
||||
print("FTS5 SEARCH - LARGE STORE (500 files)")
|
||||
print("="*60)
|
||||
|
||||
queries = ["function", "data", "compute", "result", "param"]
|
||||
|
||||
for query in queries:
|
||||
result = benchmark(
|
||||
lambda q=query: large_store.search_fts(q, limit=20),
|
||||
iterations=20
|
||||
)
|
||||
result.name = f"search '{query}'"
|
||||
print(f"\n{result}")
|
||||
|
||||
def test_search_limit_scaling(self, medium_store):
|
||||
"""Test how search time scales with result limit."""
|
||||
print("\n" + "="*60)
|
||||
print("FTS5 SEARCH - LIMIT SCALING")
|
||||
print("="*60)
|
||||
|
||||
limits = [5, 10, 20, 50, 100, 200]
|
||||
|
||||
for limit in limits:
|
||||
result = benchmark(
|
||||
lambda l=limit: medium_store.search_fts("function", limit=l),
|
||||
iterations=20
|
||||
)
|
||||
result.name = f"limit={limit}"
|
||||
print(f"\n{result}")
|
||||
|
||||
def test_complex_query_performance(self, medium_store):
|
||||
"""Test performance of complex FTS5 queries."""
|
||||
print("\n" + "="*60)
|
||||
print("FTS5 SEARCH - COMPLEX QUERIES")
|
||||
print("="*60)
|
||||
|
||||
queries = [
|
||||
("single term", "function"),
|
||||
("two terms", "function data"),
|
||||
("phrase", '"def function"'),
|
||||
("OR query", "function OR result"),
|
||||
("wildcard", "func*"),
|
||||
("NOT query", "function NOT data"),
|
||||
]
|
||||
|
||||
for name, query in queries:
|
||||
result = benchmark(
|
||||
lambda q=query: medium_store.search_fts(q, limit=20),
|
||||
iterations=20
|
||||
)
|
||||
result.name = name
|
||||
print(f"\n{result}")
|
||||
|
||||
|
||||
class TestSymbolSearchPerformance:
|
||||
"""Symbol search performance benchmarks."""
|
||||
|
||||
def test_symbol_search_scaling(self, small_store, medium_store, large_store):
|
||||
"""Test symbol search performance at different scales."""
|
||||
print("\n" + "="*60)
|
||||
print("SYMBOL SEARCH - SCALING")
|
||||
print("="*60)
|
||||
|
||||
stores = [
|
||||
("small (10 files)", small_store),
|
||||
("medium (100 files)", medium_store),
|
||||
("large (500 files)", large_store),
|
||||
]
|
||||
|
||||
for name, store in stores:
|
||||
result = benchmark(
|
||||
lambda s=store: s.search_symbols("function", limit=50),
|
||||
iterations=20
|
||||
)
|
||||
result.name = name
|
||||
print(f"\n{result}")
|
||||
|
||||
def test_symbol_search_with_kind_filter(self, medium_store):
|
||||
"""Test symbol search with kind filtering."""
|
||||
print("\n" + "="*60)
|
||||
print("SYMBOL SEARCH - KIND FILTER")
|
||||
print("="*60)
|
||||
|
||||
# Without filter
|
||||
result_no_filter = benchmark(
|
||||
lambda: medium_store.search_symbols("function", limit=50),
|
||||
iterations=20
|
||||
)
|
||||
result_no_filter.name = "no filter"
|
||||
print(f"\n{result_no_filter}")
|
||||
|
||||
# With filter
|
||||
result_with_filter = benchmark(
|
||||
lambda: medium_store.search_symbols("function", kind="function", limit=50),
|
||||
iterations=20
|
||||
)
|
||||
result_with_filter.name = "kind=function"
|
||||
print(f"\n{result_with_filter}")
|
||||
|
||||
|
||||
# === Chain Search Performance Tests ===
|
||||
|
||||
class TestChainSearchPerformance:
|
||||
"""Chain search engine performance benchmarks."""
|
||||
|
||||
@pytest.fixture
|
||||
def chain_engine_setup(self, temp_dir):
|
||||
"""Setup chain search engine with directory hierarchy."""
|
||||
# Create directory hierarchy
|
||||
root = temp_dir / "project"
|
||||
root.mkdir(exist_ok=True)
|
||||
|
||||
registry = RegistryStore(temp_dir / "registry.db")
|
||||
registry.initialize()
|
||||
mapper = PathMapper(temp_dir / "indexes")
|
||||
|
||||
# Create indexes at different depths
|
||||
dirs = [
|
||||
root,
|
||||
root / "src",
|
||||
root / "src" / "core",
|
||||
root / "src" / "utils",
|
||||
root / "tests",
|
||||
]
|
||||
|
||||
for i, dir_path in enumerate(dirs):
|
||||
dir_path.mkdir(exist_ok=True)
|
||||
index_path = mapper.source_to_index_db(dir_path)
|
||||
index_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
store = DirIndexStore(index_path)
|
||||
store.initialize()
|
||||
for j in range(20): # 20 files per directory
|
||||
indexed_file, content = generate_code_file(i * 100 + j, lines=50)
|
||||
file_path = str(dir_path / f"file_{j}.py")
|
||||
store.add_file(
|
||||
name=f"file_{j}.py",
|
||||
full_path=file_path,
|
||||
content=content,
|
||||
language="python",
|
||||
symbols=indexed_file.symbols,
|
||||
)
|
||||
store.close()
|
||||
|
||||
# Register directory
|
||||
project = registry.register_project(root, mapper.source_to_index_dir(root))
|
||||
registry.register_dir(project.id, dir_path, index_path, i, 20)
|
||||
|
||||
engine = ChainSearchEngine(registry, mapper)
|
||||
|
||||
yield {
|
||||
"engine": engine,
|
||||
"registry": registry,
|
||||
"root": root,
|
||||
}
|
||||
|
||||
registry.close()
|
||||
|
||||
def test_chain_search_depth(self, chain_engine_setup):
|
||||
"""Test chain search at different depths."""
|
||||
print("\n" + "="*60)
|
||||
print("CHAIN SEARCH - DEPTH VARIATION")
|
||||
print("="*60)
|
||||
|
||||
engine = chain_engine_setup["engine"]
|
||||
root = chain_engine_setup["root"]
|
||||
|
||||
depths = [0, 1, 2, -1] # -1 = unlimited
|
||||
|
||||
for depth in depths:
|
||||
options = SearchOptions(depth=depth, max_workers=4, total_limit=50)
|
||||
result = benchmark(
|
||||
lambda d=depth, o=options: engine.search("function", root, o),
|
||||
iterations=10
|
||||
)
|
||||
result.name = f"depth={depth}"
|
||||
print(f"\n{result}")
|
||||
|
||||
def test_chain_search_parallelism(self, chain_engine_setup):
|
||||
"""Test chain search with different worker counts."""
|
||||
print("\n" + "="*60)
|
||||
print("CHAIN SEARCH - PARALLELISM")
|
||||
print("="*60)
|
||||
|
||||
engine = chain_engine_setup["engine"]
|
||||
root = chain_engine_setup["root"]
|
||||
|
||||
worker_counts = [1, 2, 4, 8]
|
||||
|
||||
for workers in worker_counts:
|
||||
options = SearchOptions(depth=-1, max_workers=workers, total_limit=50)
|
||||
result = benchmark(
|
||||
lambda w=workers, o=options: engine.search("function", root, o),
|
||||
iterations=10
|
||||
)
|
||||
result.name = f"workers={workers}"
|
||||
print(f"\n{result}")
|
||||
|
||||
|
||||
# === Semantic Search Performance Tests ===
|
||||
|
||||
class TestSemanticSearchPerformance:
|
||||
"""Semantic search performance benchmarks."""
|
||||
|
||||
@pytest.fixture
|
||||
def semantic_setup(self, temp_dir):
|
||||
"""Setup semantic search with embeddings."""
|
||||
try:
|
||||
from codexlens.semantic import SEMANTIC_AVAILABLE
|
||||
if not SEMANTIC_AVAILABLE:
|
||||
pytest.skip("Semantic search dependencies not installed")
|
||||
|
||||
from codexlens.semantic.embedder import Embedder
|
||||
from codexlens.semantic.vector_store import VectorStore
|
||||
from codexlens.entities import SemanticChunk
|
||||
|
||||
embedder = Embedder()
|
||||
db_path = temp_dir / "semantic.db"
|
||||
vector_store = VectorStore(db_path)
|
||||
|
||||
# Add test chunks
|
||||
code_samples = [
|
||||
"def authenticate_user(username, password): verify user credentials",
|
||||
"class DatabaseConnection: manage database connections with pooling",
|
||||
"async def fetch_api_data(url): make HTTP request and return JSON",
|
||||
"function renderComponent(props): render React UI component",
|
||||
"def process_data(input): transform and validate input data",
|
||||
] * 50 # 250 chunks
|
||||
|
||||
for i, content in enumerate(code_samples):
|
||||
chunk = SemanticChunk(
|
||||
content=content,
|
||||
metadata={"index": i, "language": "python"}
|
||||
)
|
||||
chunk.embedding = embedder.embed_single(content)
|
||||
vector_store.add_chunk(chunk, f"/test/file_{i}.py")
|
||||
|
||||
yield {
|
||||
"embedder": embedder,
|
||||
"vector_store": vector_store,
|
||||
}
|
||||
|
||||
# Clean up vector store cache
|
||||
vector_store.clear_cache()
|
||||
|
||||
except ImportError:
|
||||
pytest.skip("Semantic search dependencies not installed")
|
||||
|
||||
def test_embedding_generation_speed(self, semantic_setup):
|
||||
"""Benchmark embedding generation speed."""
|
||||
print("\n" + "="*60)
|
||||
print("SEMANTIC SEARCH - EMBEDDING GENERATION")
|
||||
print("="*60)
|
||||
|
||||
embedder = semantic_setup["embedder"]
|
||||
|
||||
# Single embedding
|
||||
result = benchmark(
|
||||
lambda: embedder.embed_single("def example_function(): return 42"),
|
||||
iterations=50
|
||||
)
|
||||
result.name = "single embedding"
|
||||
print(f"\n{result}")
|
||||
|
||||
# Batch embedding
|
||||
texts = ["def func{}(): return {}".format(i, i) for i in range(10)]
|
||||
result = benchmark(
|
||||
lambda: embedder.embed(texts),
|
||||
iterations=20
|
||||
)
|
||||
result.name = "batch embedding (10 texts)"
|
||||
print(f"\n{result}")
|
||||
|
||||
def test_vector_search_speed(self, semantic_setup):
|
||||
"""Benchmark vector similarity search speed."""
|
||||
print("\n" + "="*60)
|
||||
print("SEMANTIC SEARCH - VECTOR SEARCH")
|
||||
print("="*60)
|
||||
|
||||
embedder = semantic_setup["embedder"]
|
||||
vector_store = semantic_setup["vector_store"]
|
||||
|
||||
query_embedding = embedder.embed_single("user authentication login")
|
||||
|
||||
# Different top_k values
|
||||
for top_k in [5, 10, 20, 50]:
|
||||
result = benchmark(
|
||||
lambda k=top_k: vector_store.search_similar(query_embedding, top_k=k),
|
||||
iterations=30
|
||||
)
|
||||
result.name = f"top_k={top_k}"
|
||||
print(f"\n{result}")
|
||||
|
||||
def test_full_semantic_search_latency(self, semantic_setup):
|
||||
"""Benchmark full semantic search (embed + search)."""
|
||||
print("\n" + "="*60)
|
||||
print("SEMANTIC SEARCH - FULL LATENCY")
|
||||
print("="*60)
|
||||
|
||||
embedder = semantic_setup["embedder"]
|
||||
vector_store = semantic_setup["vector_store"]
|
||||
|
||||
queries = [
|
||||
"user authentication",
|
||||
"database connection",
|
||||
"API request handler",
|
||||
"React component",
|
||||
"data processing",
|
||||
]
|
||||
|
||||
for query in queries:
|
||||
def full_search(q=query):
|
||||
embedding = embedder.embed_single(q)
|
||||
return vector_store.search_similar(embedding, top_k=10)
|
||||
|
||||
result = benchmark(full_search, iterations=20)
|
||||
result.name = f"'{query}'"
|
||||
print(f"\n{result}")
|
||||
|
||||
|
||||
# === Comparative Benchmarks ===
|
||||
|
||||
class TestComparativeBenchmarks:
|
||||
"""Compare FTS5 vs Semantic search performance."""
|
||||
|
||||
@pytest.fixture
|
||||
def comparison_setup(self, temp_dir):
|
||||
"""Setup both FTS5 and semantic stores with same content."""
|
||||
# FTS5 store
|
||||
fts_store = SQLiteStore(temp_dir / "fts_compare.db")
|
||||
fts_store.initialize()
|
||||
|
||||
code_samples = [
|
||||
("auth.py", "def authenticate_user(username, password): verify credentials"),
|
||||
("db.py", "class DatabasePool: manage database connection pooling"),
|
||||
("api.py", "async def handle_request(req): process API request"),
|
||||
("ui.py", "function Button({ onClick }): render button component"),
|
||||
("utils.py", "def process_data(input): transform and validate data"),
|
||||
] * 20
|
||||
|
||||
for i, (filename, content) in enumerate(code_samples):
|
||||
indexed_file = IndexedFile(
|
||||
path=f"/project/{filename.replace('.py', '')}_{i}.py",
|
||||
language="python",
|
||||
symbols=[Symbol(name=f"func_{i}", kind="function", range=(1, 5))],
|
||||
)
|
||||
fts_store.add_file(indexed_file, content)
|
||||
|
||||
# Semantic store (if available)
|
||||
try:
|
||||
from codexlens.semantic import SEMANTIC_AVAILABLE
|
||||
if SEMANTIC_AVAILABLE:
|
||||
from codexlens.semantic.embedder import Embedder
|
||||
from codexlens.semantic.vector_store import VectorStore
|
||||
from codexlens.entities import SemanticChunk
|
||||
|
||||
embedder = Embedder()
|
||||
semantic_store = VectorStore(temp_dir / "semantic_compare.db")
|
||||
|
||||
for i, (filename, content) in enumerate(code_samples):
|
||||
chunk = SemanticChunk(content=content, metadata={"index": i})
|
||||
chunk.embedding = embedder.embed_single(content)
|
||||
semantic_store.add_chunk(chunk, f"/project/{filename}")
|
||||
|
||||
yield {
|
||||
"fts_store": fts_store,
|
||||
"semantic_store": semantic_store,
|
||||
"embedder": embedder,
|
||||
"has_semantic": True,
|
||||
}
|
||||
# Close semantic store connection
|
||||
semantic_store.clear_cache()
|
||||
else:
|
||||
yield {"fts_store": fts_store, "has_semantic": False}
|
||||
except ImportError:
|
||||
yield {"fts_store": fts_store, "has_semantic": False}
|
||||
|
||||
fts_store.close()
|
||||
|
||||
def test_fts_vs_semantic_latency(self, comparison_setup):
|
||||
"""Compare FTS5 vs Semantic search latency."""
|
||||
print("\n" + "="*60)
|
||||
print("FTS5 vs SEMANTIC - LATENCY COMPARISON")
|
||||
print("="*60)
|
||||
|
||||
fts_store = comparison_setup["fts_store"]
|
||||
|
||||
queries = [
|
||||
"authenticate",
|
||||
"database",
|
||||
"request",
|
||||
"button",
|
||||
"process",
|
||||
]
|
||||
|
||||
print("\nFTS5 Search:")
|
||||
for query in queries:
|
||||
result = benchmark(
|
||||
lambda q=query: fts_store.search_fts(q, limit=10),
|
||||
iterations=30
|
||||
)
|
||||
result.name = f"'{query}'"
|
||||
print(f" {result.name}: avg={result.avg_time_ms:.2f}ms")
|
||||
|
||||
if comparison_setup.get("has_semantic"):
|
||||
semantic_store = comparison_setup["semantic_store"]
|
||||
embedder = comparison_setup["embedder"]
|
||||
|
||||
print("\nSemantic Search (embed + search):")
|
||||
for query in queries:
|
||||
def semantic_search(q=query):
|
||||
emb = embedder.embed_single(q)
|
||||
return semantic_store.search_similar(emb, top_k=10)
|
||||
|
||||
result = benchmark(semantic_search, iterations=20)
|
||||
result.name = f"'{query}'"
|
||||
print(f" {result.name}: avg={result.avg_time_ms:.2f}ms")
|
||||
else:
|
||||
print("\n(Semantic search not available)")
|
||||
|
||||
|
||||
# === Memory Usage Tests ===
|
||||
|
||||
class TestMemoryUsage:
|
||||
"""Memory usage during search operations."""
|
||||
|
||||
def test_search_memory_footprint(self, medium_store):
|
||||
"""Measure memory footprint during search."""
|
||||
print("\n" + "="*60)
|
||||
print("MEMORY USAGE - SEARCH OPERATIONS")
|
||||
print("="*60)
|
||||
|
||||
import tracemalloc
|
||||
|
||||
tracemalloc.start()
|
||||
|
||||
# Run multiple searches
|
||||
for _ in range(100):
|
||||
medium_store.search_fts("function", limit=20)
|
||||
|
||||
current, peak = tracemalloc.get_traced_memory()
|
||||
tracemalloc.stop()
|
||||
|
||||
print(f"\nAfter 100 FTS5 searches:")
|
||||
print(f" Current memory: {current / 1024 / 1024:.2f} MB")
|
||||
print(f" Peak memory: {peak / 1024 / 1024:.2f} MB")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v", "-s", "--tb=short"])
|
||||
Reference in New Issue
Block a user