feat: Add comprehensive tests for contentPattern and glob pattern matching

- Implemented final verification tests for contentPattern to validate behavior with empty strings, dangerous patterns, and normal patterns.
- Created glob pattern matching tests to verify regex conversion and matching functionality.
- Developed infinite loop risk tests using Worker threads to isolate potential blocking operations.
- Introduced optimized contentPattern tests to validate improvements in the findMatches function.
- Added verification tests to assess the effectiveness of contentPattern optimizations.
- Conducted safety tests for contentPattern to identify edge cases and potential vulnerabilities.
- Implemented unrestricted loop tests to analyze infinite loop risks without match limits.
- Developed tests for zero-width pattern detection logic to ensure proper handling of dangerous regex patterns.
This commit is contained in:
catlog22
2026-02-09 11:13:01 +08:00
parent dfe153778c
commit 964292ebdb
62 changed files with 7588 additions and 374 deletions

View File

@@ -72,7 +72,10 @@ def httpx_clients(monkeypatch: pytest.MonkeyPatch) -> list[DummyClient]:
def test_api_reranker_requires_api_key(
monkeypatch: pytest.MonkeyPatch, httpx_clients: list[DummyClient]
) -> None:
monkeypatch.delenv("RERANKER_API_KEY", raising=False)
# Force empty key in-process so the reranker does not fall back to any
# workspace/global .env configuration that may exist on the machine.
monkeypatch.setenv("RERANKER_API_KEY", "")
monkeypatch.setenv("CODEXLENS_RERANKER_API_KEY", "")
with pytest.raises(ValueError, match="Missing API key"):
APIReranker()
@@ -92,10 +95,37 @@ def test_api_reranker_reads_api_key_from_env(
assert httpx_clients[0].closed is True
def test_api_reranker_strips_v1_from_api_base_to_avoid_double_v1(
monkeypatch: pytest.MonkeyPatch, httpx_clients: list[DummyClient]
) -> None:
monkeypatch.setenv("RERANKER_API_KEY", "test-key")
reranker = APIReranker(api_base="https://api.siliconflow.cn/v1", provider="siliconflow")
assert len(httpx_clients) == 1
# Endpoint already includes /v1, so api_base should not.
assert httpx_clients[0].base_url == "https://api.siliconflow.cn"
reranker.close()
def test_api_reranker_strips_endpoint_from_api_base_to_avoid_double_endpoint(
monkeypatch: pytest.MonkeyPatch, httpx_clients: list[DummyClient]
) -> None:
monkeypatch.setenv("RERANKER_API_KEY", "test-key")
reranker = APIReranker(api_base="https://api.siliconflow.cn/v1/rerank", provider="siliconflow")
assert len(httpx_clients) == 1
# If api_base already includes the endpoint suffix, strip it.
assert httpx_clients[0].base_url == "https://api.siliconflow.cn"
reranker.close()
def test_api_reranker_scores_pairs_siliconflow(
monkeypatch: pytest.MonkeyPatch, httpx_clients: list[DummyClient]
) -> None:
monkeypatch.delenv("RERANKER_API_KEY", raising=False)
# Avoid picking up any machine-local default model from global .env.
monkeypatch.setenv("RERANKER_MODEL", "")
monkeypatch.setenv("CODEXLENS_RERANKER_MODEL", "")
reranker = APIReranker(api_key="k", provider="siliconflow")
client = httpx_clients[0]
@@ -168,4 +198,3 @@ def test_factory_api_backend_constructs_reranker(
reranker = get_reranker(backend="api")
assert isinstance(reranker, APIReranker)
assert len(httpx_clients) == 1

View File

@@ -231,6 +231,26 @@ class TestHDBSCANStrategy:
assert all_indices == set(range(len(sample_results)))
def test_cluster_supports_cosine_metric(
self, sample_results: List[SearchResult], mock_embeddings
):
"""Test HDBSCANStrategy can run with metric='cosine' (via precomputed distances)."""
try:
from codexlens.search.clustering import HDBSCANStrategy
except ImportError:
pytest.skip("hdbscan not installed")
config = ClusteringConfig(min_cluster_size=2, min_samples=1, metric="cosine")
strategy = HDBSCANStrategy(config)
clusters = strategy.cluster(mock_embeddings, sample_results)
all_indices = set()
for cluster in clusters:
all_indices.update(cluster)
assert all_indices == set(range(len(sample_results)))
def test_cluster_empty_results(self, hdbscan_strategy):
"""Test cluster() with empty results."""
import numpy as np