mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-03-19 18:58:47 +08:00
Major improvements to smart-search, chain-search cascade, ranking pipeline, reranker factory, CLI history store, codex-lens integration, and uv-manager. Simplify command-generator skill by inlining phases. Add comprehensive tests. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
205 lines
7.1 KiB
Python
205 lines
7.1 KiB
Python
import gc
|
|
import gc
|
|
import shutil
|
|
import sqlite3
|
|
import tempfile
|
|
import time
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
import codexlens.cli.embedding_manager as embedding_manager
|
|
from codexlens.cli.embedding_manager import get_embedding_stats_summary, get_embeddings_status
|
|
|
|
|
|
@pytest.fixture
|
|
def status_temp_dir() -> Path:
|
|
temp_path = Path(tempfile.mkdtemp())
|
|
try:
|
|
yield temp_path
|
|
finally:
|
|
gc.collect()
|
|
for _ in range(5):
|
|
try:
|
|
if temp_path.exists():
|
|
shutil.rmtree(temp_path)
|
|
break
|
|
except PermissionError:
|
|
time.sleep(0.1)
|
|
|
|
|
|
def _create_index_db(index_path: Path, files: list[str], embedded_files: list[str] | None = None) -> None:
|
|
index_path.parent.mkdir(parents=True, exist_ok=True)
|
|
with sqlite3.connect(index_path) as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"""
|
|
CREATE TABLE files (
|
|
id INTEGER PRIMARY KEY,
|
|
path TEXT NOT NULL UNIQUE,
|
|
content TEXT,
|
|
language TEXT,
|
|
hash TEXT
|
|
)
|
|
"""
|
|
)
|
|
cursor.executemany(
|
|
"INSERT INTO files (path, content, language, hash) VALUES (?, ?, ?, ?)",
|
|
[(file_path, "", "python", f"hash-{idx}") for idx, file_path in enumerate(files)],
|
|
)
|
|
|
|
if embedded_files is not None:
|
|
cursor.execute(
|
|
"""
|
|
CREATE TABLE semantic_chunks (
|
|
id INTEGER PRIMARY KEY,
|
|
file_path TEXT NOT NULL,
|
|
content TEXT,
|
|
embedding BLOB,
|
|
metadata TEXT,
|
|
category TEXT
|
|
)
|
|
"""
|
|
)
|
|
cursor.executemany(
|
|
"INSERT INTO semantic_chunks (file_path, content, embedding, metadata, category) VALUES (?, ?, ?, ?, ?)",
|
|
[(file_path, "chunk", b"vec", "{}", "code") for file_path in embedded_files],
|
|
)
|
|
conn.commit()
|
|
|
|
|
|
def _create_vectors_meta_db(meta_path: Path, embedded_files: list[str], binary_vector_count: int = 0) -> None:
|
|
meta_path.parent.mkdir(parents=True, exist_ok=True)
|
|
with sqlite3.connect(meta_path) as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"""
|
|
CREATE TABLE chunk_metadata (
|
|
chunk_id INTEGER PRIMARY KEY,
|
|
file_path TEXT NOT NULL,
|
|
content TEXT,
|
|
start_line INTEGER,
|
|
end_line INTEGER,
|
|
category TEXT,
|
|
metadata TEXT,
|
|
source_index_db TEXT
|
|
)
|
|
"""
|
|
)
|
|
cursor.execute(
|
|
"""
|
|
CREATE TABLE binary_vectors (
|
|
chunk_id INTEGER PRIMARY KEY,
|
|
vector BLOB NOT NULL
|
|
)
|
|
"""
|
|
)
|
|
cursor.executemany(
|
|
"""
|
|
INSERT INTO chunk_metadata (
|
|
chunk_id, file_path, content, start_line, end_line, category, metadata, source_index_db
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
""",
|
|
[
|
|
(idx, file_path, "chunk", 1, 1, "code", "{}", str(meta_path.parent / "_index.db"))
|
|
for idx, file_path in enumerate(embedded_files, start=1)
|
|
],
|
|
)
|
|
cursor.executemany(
|
|
"INSERT INTO binary_vectors (chunk_id, vector) VALUES (?, ?)",
|
|
[(idx, b"\x01") for idx in range(1, binary_vector_count + 1)],
|
|
)
|
|
conn.commit()
|
|
|
|
|
|
def test_root_status_does_not_inherit_child_embeddings(
|
|
monkeypatch: pytest.MonkeyPatch, status_temp_dir: Path
|
|
) -> None:
|
|
workspace = status_temp_dir / "workspace"
|
|
workspace.mkdir()
|
|
_create_index_db(workspace / "_index.db", ["a.py", "b.py"])
|
|
_create_index_db(workspace / "child" / "_index.db", ["child.py"], embedded_files=["child.py"])
|
|
|
|
monkeypatch.setattr(
|
|
embedding_manager,
|
|
"_get_model_info_from_index",
|
|
lambda index_path: {
|
|
"model_profile": "fast",
|
|
"model_name": "unit-test-model",
|
|
"embedding_dim": 384,
|
|
"backend": "fastembed",
|
|
"created_at": "2026-03-13T00:00:00Z",
|
|
"updated_at": "2026-03-13T00:00:00Z",
|
|
} if index_path.parent.name == "child" else None,
|
|
)
|
|
|
|
status = get_embeddings_status(workspace)
|
|
assert status["success"] is True
|
|
|
|
result = status["result"]
|
|
assert result["coverage_percent"] == 0.0
|
|
assert result["files_with_embeddings"] == 0
|
|
assert result["root"]["has_embeddings"] is False
|
|
assert result["model_info"] is None
|
|
assert result["subtree"]["indexes_with_embeddings"] == 1
|
|
assert result["subtree"]["coverage_percent"] > 0
|
|
|
|
|
|
def test_root_status_uses_validated_centralized_metadata(status_temp_dir: Path) -> None:
|
|
workspace = status_temp_dir / "workspace"
|
|
workspace.mkdir()
|
|
_create_index_db(workspace / "_index.db", ["a.py", "b.py"])
|
|
_create_vectors_meta_db(workspace / "_vectors_meta.db", ["a.py"])
|
|
(workspace / "_vectors.hnsw").write_bytes(b"hnsw")
|
|
|
|
status = get_embeddings_status(workspace)
|
|
assert status["success"] is True
|
|
|
|
result = status["result"]
|
|
assert result["coverage_percent"] == 50.0
|
|
assert result["files_with_embeddings"] == 1
|
|
assert result["total_chunks"] == 1
|
|
assert result["root"]["has_embeddings"] is True
|
|
assert result["root"]["storage_mode"] == "centralized"
|
|
assert result["centralized"]["dense_ready"] is True
|
|
assert result["centralized"]["usable"] is True
|
|
|
|
|
|
def test_embedding_stats_summary_skips_ignored_artifact_indexes(status_temp_dir: Path) -> None:
|
|
workspace = status_temp_dir / "workspace"
|
|
workspace.mkdir()
|
|
_create_index_db(workspace / "_index.db", ["root.py"])
|
|
_create_index_db(workspace / "src" / "_index.db", ["src.py"])
|
|
_create_index_db(workspace / "dist" / "_index.db", ["bundle.py"], embedded_files=["bundle.py"])
|
|
_create_index_db(workspace / ".workflow" / "_index.db", ["trace.py"], embedded_files=["trace.py"])
|
|
|
|
summary = get_embedding_stats_summary(workspace)
|
|
|
|
assert summary["success"] is True
|
|
result = summary["result"]
|
|
assert result["total_indexes"] == 2
|
|
assert {Path(item["path"]).relative_to(workspace).as_posix() for item in result["indexes"]} == {
|
|
"_index.db",
|
|
"src/_index.db",
|
|
}
|
|
|
|
|
|
def test_root_status_ignores_empty_centralized_artifacts(status_temp_dir: Path) -> None:
|
|
workspace = status_temp_dir / "workspace"
|
|
workspace.mkdir()
|
|
_create_index_db(workspace / "_index.db", ["a.py", "b.py"])
|
|
_create_vectors_meta_db(workspace / "_vectors_meta.db", [])
|
|
(workspace / "_vectors.hnsw").write_bytes(b"hnsw")
|
|
(workspace / "_binary_vectors.mmap").write_bytes(b"mmap")
|
|
|
|
status = get_embeddings_status(workspace)
|
|
assert status["success"] is True
|
|
|
|
result = status["result"]
|
|
assert result["coverage_percent"] == 0.0
|
|
assert result["files_with_embeddings"] == 0
|
|
assert result["root"]["has_embeddings"] is False
|
|
assert result["centralized"]["chunk_metadata_rows"] == 0
|
|
assert result["centralized"]["binary_vector_rows"] == 0
|
|
assert result["centralized"]["usable"] is False
|