fix(search): handle path operation failures in symbol filtering

Adds robust exception handling for os.path.commonpath() in search_symbols()
to prevent crashes on malformed paths and Windows cross-drive scenarios.
Invalid symbols are skipped with debug logging, search continues.

Solution-ID: SOL-1735385400004
Issue-ID: ISS-1766921318981-4
Task-ID: T1
This commit is contained in:
catlog22
2025-12-29 18:59:10 +08:00
parent 5d5652c2c5
commit 6a73d3c379
2 changed files with 111 additions and 1 deletions

View File

@@ -337,12 +337,41 @@ class ChainSearchEngine:
root_cmp = root_str.lower().rstrip("\\/")
dir_cmp = file_dir_str.lower().rstrip("\\/")
# Guard against Windows cross-drive comparisons (ValueError).
if os.name == "nt":
root_drive, _ = os.path.splitdrive(root_cmp)
dir_drive, _ = os.path.splitdrive(dir_cmp)
if root_drive and dir_drive and root_drive != dir_drive:
self.logger.debug(
"Skipping symbol due to cross-drive path (root=%s file=%s name=%s)",
root_cmp,
sym.file,
sym.name,
)
continue
if os.path.commonpath([root_cmp, dir_cmp]) != root_cmp:
continue
rel = os.path.relpath(dir_cmp, root_cmp)
rel_depth = 0 if rel == "." else len(rel.split(os.sep))
except Exception:
except ValueError as exc:
self.logger.debug(
"Skipping symbol due to path operation failure (root=%s file=%s name=%s): %s",
str(search_root),
sym.file,
sym.name,
exc,
)
continue
except Exception as exc:
self.logger.debug(
"Skipping symbol due to unexpected path error (root=%s file=%s name=%s): %s",
str(search_root),
sym.file,
sym.name,
exc,
)
continue
if options.depth >= 0 and rel_depth > options.depth:

View File

@@ -0,0 +1,81 @@
import logging
import os
import tempfile
from pathlib import Path
from unittest.mock import MagicMock
import pytest
from codexlens.config import Config
from codexlens.entities import Symbol
from codexlens.search.chain_search import ChainSearchEngine, SearchOptions
from codexlens.storage.global_index import GlobalSymbolIndex
from codexlens.storage.path_mapper import PathMapper
from codexlens.storage.registry import RegistryStore
@pytest.fixture()
def temp_paths():
tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True)
root = Path(tmpdir.name)
yield root
try:
tmpdir.cleanup()
except (PermissionError, OSError):
pass
def test_symbol_filtering_handles_path_failures(monkeypatch: pytest.MonkeyPatch, caplog, temp_paths: Path) -> None:
project_root = temp_paths / "project"
(project_root / "src").mkdir(parents=True, exist_ok=True)
index_root = temp_paths / "indexes"
mapper = PathMapper(index_root=index_root)
index_db_path = mapper.source_to_index_db(project_root)
index_db_path.parent.mkdir(parents=True, exist_ok=True)
index_db_path.write_text("", encoding="utf-8") # existence is enough for _find_start_index
registry = RegistryStore(db_path=temp_paths / "registry.db")
registry.initialize()
project_info = registry.register_project(project_root, mapper.source_to_index_dir(project_root))
global_db_path = project_info.index_root / GlobalSymbolIndex.DEFAULT_DB_NAME
global_index = GlobalSymbolIndex(global_db_path, project_id=project_info.id)
global_index.initialize()
valid_file = project_root / "src" / "auth.py"
valid_sym = Symbol(name="AuthManager", kind="class", range=(1, 2), file=str(valid_file))
bad_null = Symbol(name="BadNull", kind="class", range=(1, 2), file="bad\0path.py")
bad_relative = Symbol(name="BadRelative", kind="class", range=(1, 2), file="relative/path.py")
candidates = [valid_sym, bad_null, bad_relative]
if os.name == "nt":
root_drive, _ = os.path.splitdrive(str(project_root.resolve()))
other_drive = "C:" if root_drive.lower() != "c:" else "D:"
candidates.append(
Symbol(name="CrossDrive", kind="class", range=(1, 2), file=f"{other_drive}\\other\\file.py")
)
def fake_search(self, name: str, kind=None, limit: int = 20, prefix_mode: bool = False):
return candidates
monkeypatch.setattr(GlobalSymbolIndex, "search", fake_search)
config = Config(data_dir=temp_paths / "data", global_symbol_index_enabled=True)
engine = ChainSearchEngine(registry, mapper, config=config)
engine._search_symbols_parallel = MagicMock(side_effect=AssertionError("should not traverse chain"))
caplog.set_level(logging.DEBUG, logger="codexlens.search.chain_search")
symbols = engine.search_symbols(
"Auth",
project_root,
options=SearchOptions(depth=5, total_limit=10),
)
assert [s.name for s in symbols] == ["AuthManager"]
assert "BadNull" in caplog.text
assert "BadRelative" in caplog.text
if os.name == "nt":
assert "CrossDrive" in caplog.text