feat: Add method to retrieve all semantic chunks from the vector store

- Implemented `get_all_chunks` method in `VectorStore` class to fetch all semantic chunks from the database. - Added a new benchmark script `analyze_methods.py` for analyzing hybrid search methods and storage architecture. - Included detailed analysis of method contributions, storage conflicts, and FTS + Rerank fusion experiments. - Updated results JSON structure to reflect new analysis outputs and method performance metrics.
2026-02-10 02:24:35 +08:00 · 2026-01-02 12:32:43 +08:00
parent 9129c981a4
commit 56c03c847a
4 changed files with 1256 additions and 0 deletions
--- a/codex-lens/src/codexlens/semantic/vector_store.py
+++ b/codex-lens/src/codexlens/semantic/vector_store.py
@@ -1033,6 +1033,28 @@ class VectorStore:
            row = conn.execute("SELECT COUNT(*) FROM semantic_chunks").fetchone()
            return row[0] if row else 0

+    def get_all_chunks(self) -> List[SemanticChunk]:
+        """Get all chunks from the store.
+
+        Returns:
+            List of SemanticChunk objects with id and content.
+        """
+        with sqlite3.connect(self.db_path) as conn:
+            conn.row_factory = sqlite3.Row
+            rows = conn.execute(
+                "SELECT id, file_path, content, metadata FROM semantic_chunks"
+            ).fetchall()
+
+            chunks = []
+            for row in rows:
+                chunks.append(SemanticChunk(
+                    id=row["id"],
+                    content=row["content"],
+                    file_path=row["file_path"],
+                    metadata=json.loads(row["metadata"]) if row["metadata"] else None,
+                ))
+            return chunks
+
    def clear_cache(self) -> None:
        """Manually clear the embedding cache."""
        self._invalidate_cache()