refactor: rename package codexlens -> codexlens_search for independent distribution

Rename the v2 search engine package to `codexlens-search` (import as
`codexlens_search`) so it can be installed independently and consumed
by the original codex-lens as a dependency. This avoids package path
conflicts since both previously used `src/codexlens/`.

Changes:
- Rename src/codexlens/ -> src/codexlens_search/
- Update pyproject.toml: name=codexlens-search, version=0.2.0
- Update all imports across source, tests, and scripts
- Add public API exports in __init__.py (Config, SearchPipeline,
  IndexingPipeline, SearchResult, IndexStats)

37/37 tests pass. No functional changes.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
catlog22
2026-03-16 23:23:22 +08:00
parent a0a50d338a
commit 6712965b7f
32 changed files with 83 additions and 63 deletions

View File

@@ -0,0 +1,69 @@
from __future__ import annotations
import sqlite3
from pathlib import Path
class FTSEngine:
def __init__(self, db_path: str | Path) -> None:
self._conn = sqlite3.connect(str(db_path), check_same_thread=False)
self._conn.execute(
"CREATE VIRTUAL TABLE IF NOT EXISTS docs "
"USING fts5(content, tokenize='porter unicode61')"
)
self._conn.execute(
"CREATE TABLE IF NOT EXISTS docs_meta "
"(id INTEGER PRIMARY KEY, path TEXT)"
)
self._conn.commit()
def add_documents(self, docs: list[tuple[int, str, str]]) -> None:
"""Add documents in batch. docs: list of (id, path, content)."""
if not docs:
return
self._conn.executemany(
"INSERT OR REPLACE INTO docs_meta (id, path) VALUES (?, ?)",
[(doc_id, path) for doc_id, path, content in docs],
)
self._conn.executemany(
"INSERT OR REPLACE INTO docs (rowid, content) VALUES (?, ?)",
[(doc_id, content) for doc_id, path, content in docs],
)
self._conn.commit()
def exact_search(self, query: str, top_k: int = 50) -> list[tuple[int, float]]:
"""FTS5 MATCH query, return (id, bm25_score) sorted by score descending."""
try:
rows = self._conn.execute(
"SELECT rowid, bm25(docs) AS score FROM docs "
"WHERE docs MATCH ? ORDER BY score LIMIT ?",
(query, top_k),
).fetchall()
except sqlite3.OperationalError:
return []
# bm25 in SQLite FTS5 returns negative values (lower = better match)
# Negate so higher is better
return [(int(row[0]), -float(row[1])) for row in rows]
def fuzzy_search(self, query: str, top_k: int = 50) -> list[tuple[int, float]]:
"""Prefix search: each token + '*', return (id, score) sorted descending."""
tokens = query.strip().split()
if not tokens:
return []
prefix_query = " ".join(t + "*" for t in tokens)
try:
rows = self._conn.execute(
"SELECT rowid, bm25(docs) AS score FROM docs "
"WHERE docs MATCH ? ORDER BY score LIMIT ?",
(prefix_query, top_k),
).fetchall()
except sqlite3.OperationalError:
return []
return [(int(row[0]), -float(row[1])) for row in rows]
def get_content(self, doc_id: int) -> str:
"""Retrieve content for a doc_id."""
row = self._conn.execute(
"SELECT content FROM docs WHERE rowid = ?", (doc_id,)
).fetchone()
return row[0] if row else ""