feat: Add method to retrieve all semantic chunks from the vector store

- Implemented `get_all_chunks` method in `VectorStore` class to fetch all semantic chunks from the database.
- Added a new benchmark script `analyze_methods.py` for analyzing hybrid search methods and storage architecture.
- Included detailed analysis of method contributions, storage conflicts, and FTS + Rerank fusion experiments.
- Updated results JSON structure to reflect new analysis outputs and method performance metrics.
This commit is contained in:
catlog22
2026-01-02 12:32:43 +08:00
parent 9129c981a4
commit 56c03c847a
4 changed files with 1256 additions and 0 deletions

View File

@@ -1033,6 +1033,28 @@ class VectorStore:
row = conn.execute("SELECT COUNT(*) FROM semantic_chunks").fetchone()
return row[0] if row else 0
def get_all_chunks(self) -> List[SemanticChunk]:
"""Get all chunks from the store.
Returns:
List of SemanticChunk objects with id and content.
"""
with sqlite3.connect(self.db_path) as conn:
conn.row_factory = sqlite3.Row
rows = conn.execute(
"SELECT id, file_path, content, metadata FROM semantic_chunks"
).fetchall()
chunks = []
for row in rows:
chunks.append(SemanticChunk(
id=row["id"],
content=row["content"],
file_path=row["file_path"],
metadata=json.loads(row["metadata"]) if row["metadata"] else None,
))
return chunks
def clear_cache(self) -> None:
"""Manually clear the embedding cache."""
self._invalidate_cache()